http-fetch.con commit http-fetch: fix alternates handling. (883653b)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int commits_on_stdin = 0;
  40
  41static int got_alternates = -1;
  42static int corrupt_object_found = 0;
  43
  44static struct curl_slist *no_pragma_header;
  45
  46struct alt_base
  47{
  48        const char *base;
  49        int path_len;
  50        int got_indices;
  51        struct packed_git *packs;
  52        struct alt_base *next;
  53};
  54
  55static struct alt_base *alt = NULL;
  56
  57enum object_request_state {
  58        WAITING,
  59        ABORTED,
  60        ACTIVE,
  61        COMPLETE,
  62};
  63
  64struct object_request
  65{
  66        unsigned char sha1[20];
  67        struct alt_base *repo;
  68        char *url;
  69        char filename[PATH_MAX];
  70        char tmpfile[PATH_MAX];
  71        int local;
  72        enum object_request_state state;
  73        CURLcode curl_result;
  74        char errorstr[CURL_ERROR_SIZE];
  75        long http_code;
  76        unsigned char real_sha1[20];
  77        SHA_CTX c;
  78        z_stream stream;
  79        int zret;
  80        int rename;
  81        struct active_request_slot *slot;
  82        struct object_request *next;
  83};
  84
  85struct alternates_request {
  86        const char *base;
  87        char *url;
  88        struct buffer *buffer;
  89        struct active_request_slot *slot;
  90        int http_specific;
  91};
  92
  93#ifndef NO_EXPAT
  94struct xml_ctx
  95{
  96        char *name;
  97        int len;
  98        char *cdata;
  99        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
 100        void *userData;
 101};
 102
 103struct remote_ls_ctx
 104{
 105        struct alt_base *repo;
 106        char *path;
 107        void (*userFunc)(struct remote_ls_ctx *ls);
 108        void *userData;
 109        int flags;
 110        char *dentry_name;
 111        int dentry_flags;
 112        int rc;
 113        struct remote_ls_ctx *parent;
 114};
 115#endif
 116
 117static struct object_request *object_queue_head = NULL;
 118
 119static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 120                               void *data)
 121{
 122        unsigned char expn[4096];
 123        size_t size = eltsize * nmemb;
 124        int posn = 0;
 125        struct object_request *obj_req = (struct object_request *)data;
 126        do {
 127                ssize_t retval = write(obj_req->local,
 128                                       (char *) ptr + posn, size - posn);
 129                if (retval < 0)
 130                        return posn;
 131                posn += retval;
 132        } while (posn < size);
 133
 134        obj_req->stream.avail_in = size;
 135        obj_req->stream.next_in = ptr;
 136        do {
 137                obj_req->stream.next_out = expn;
 138                obj_req->stream.avail_out = sizeof(expn);
 139                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 140                SHA1_Update(&obj_req->c, expn,
 141                            sizeof(expn) - obj_req->stream.avail_out);
 142        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 143        data_received++;
 144        return size;
 145}
 146
 147static void fetch_alternates(const char *base);
 148
 149static void process_object_response(void *callback_data);
 150
 151static void start_object_request(struct object_request *obj_req)
 152{
 153        char *hex = sha1_to_hex(obj_req->sha1);
 154        char prevfile[PATH_MAX];
 155        char *url;
 156        char *posn;
 157        int prevlocal;
 158        unsigned char prev_buf[PREV_BUF_SIZE];
 159        ssize_t prev_read = 0;
 160        long prev_posn = 0;
 161        char range[RANGE_HEADER_SIZE];
 162        struct curl_slist *range_header = NULL;
 163        struct active_request_slot *slot;
 164
 165        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 166        unlink(prevfile);
 167        rename(obj_req->tmpfile, prevfile);
 168        unlink(obj_req->tmpfile);
 169
 170        if (obj_req->local != -1)
 171                error("fd leakage in start: %d", obj_req->local);
 172        obj_req->local = open(obj_req->tmpfile,
 173                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 174        /* This could have failed due to the "lazy directory creation";
 175         * try to mkdir the last path component.
 176         */
 177        if (obj_req->local < 0 && errno == ENOENT) {
 178                char *dir = strrchr(obj_req->tmpfile, '/');
 179                if (dir) {
 180                        *dir = 0;
 181                        mkdir(obj_req->tmpfile, 0777);
 182                        *dir = '/';
 183                }
 184                obj_req->local = open(obj_req->tmpfile,
 185                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 186        }
 187
 188        if (obj_req->local < 0) {
 189                obj_req->state = ABORTED;
 190                error("Couldn't create temporary file %s for %s: %s",
 191                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 192                return;
 193        }
 194
 195        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 196
 197        inflateInit(&obj_req->stream);
 198
 199        SHA1_Init(&obj_req->c);
 200
 201        url = xmalloc(strlen(obj_req->repo->base) + 50);
 202        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 203        strcpy(url, obj_req->repo->base);
 204        posn = url + strlen(obj_req->repo->base);
 205        strcpy(posn, "objects/");
 206        posn += 8;
 207        memcpy(posn, hex, 2);
 208        posn += 2;
 209        *(posn++) = '/';
 210        strcpy(posn, hex + 2);
 211        strcpy(obj_req->url, url);
 212
 213        /* If a previous temp file is present, process what was already
 214           fetched. */
 215        prevlocal = open(prevfile, O_RDONLY);
 216        if (prevlocal != -1) {
 217                do {
 218                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 219                        if (prev_read>0) {
 220                                if (fwrite_sha1_file(prev_buf,
 221                                                     1,
 222                                                     prev_read,
 223                                                     obj_req) == prev_read) {
 224                                        prev_posn += prev_read;
 225                                } else {
 226                                        prev_read = -1;
 227                                }
 228                        }
 229                } while (prev_read > 0);
 230                close(prevlocal);
 231        }
 232        unlink(prevfile);
 233
 234        /* Reset inflate/SHA1 if there was an error reading the previous temp
 235           file; also rewind to the beginning of the local file. */
 236        if (prev_read == -1) {
 237                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 238                inflateInit(&obj_req->stream);
 239                SHA1_Init(&obj_req->c);
 240                if (prev_posn>0) {
 241                        prev_posn = 0;
 242                        lseek(obj_req->local, SEEK_SET, 0);
 243                        ftruncate(obj_req->local, 0);
 244                }
 245        }
 246
 247        slot = get_active_slot();
 248        slot->callback_func = process_object_response;
 249        slot->callback_data = obj_req;
 250        obj_req->slot = slot;
 251
 252        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 253        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 254        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 255        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 256        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 257
 258        /* If we have successfully processed data from a previous fetch
 259           attempt, only fetch the data we don't already have. */
 260        if (prev_posn>0) {
 261                if (get_verbosely)
 262                        fprintf(stderr,
 263                                "Resuming fetch of object %s at byte %ld\n",
 264                                hex, prev_posn);
 265                sprintf(range, "Range: bytes=%ld-", prev_posn);
 266                range_header = curl_slist_append(range_header, range);
 267                curl_easy_setopt(slot->curl,
 268                                 CURLOPT_HTTPHEADER, range_header);
 269        }
 270
 271        /* Try to get the request started, abort the request on error */
 272        obj_req->state = ACTIVE;
 273        if (!start_active_slot(slot)) {
 274                obj_req->state = ABORTED;
 275                obj_req->slot = NULL;
 276                close(obj_req->local); obj_req->local = -1;
 277                free(obj_req->url);
 278                return;
 279        }
 280}
 281
 282static void finish_object_request(struct object_request *obj_req)
 283{
 284        struct stat st;
 285
 286        fchmod(obj_req->local, 0444);
 287        close(obj_req->local); obj_req->local = -1;
 288
 289        if (obj_req->http_code == 416) {
 290                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 291        } else if (obj_req->curl_result != CURLE_OK) {
 292                if (stat(obj_req->tmpfile, &st) == 0)
 293                        if (st.st_size == 0)
 294                                unlink(obj_req->tmpfile);
 295                return;
 296        }
 297
 298        inflateEnd(&obj_req->stream);
 299        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 300        if (obj_req->zret != Z_STREAM_END) {
 301                unlink(obj_req->tmpfile);
 302                return;
 303        }
 304        if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 305                unlink(obj_req->tmpfile);
 306                return;
 307        }
 308        obj_req->rename =
 309                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 310
 311        if (obj_req->rename == 0)
 312                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 313}
 314
 315static void process_object_response(void *callback_data)
 316{
 317        struct object_request *obj_req =
 318                (struct object_request *)callback_data;
 319
 320        obj_req->curl_result = obj_req->slot->curl_result;
 321        obj_req->http_code = obj_req->slot->http_code;
 322        obj_req->slot = NULL;
 323        obj_req->state = COMPLETE;
 324
 325        /* Use alternates if necessary */
 326        if (obj_req->http_code == 404 ||
 327            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 328                fetch_alternates(alt->base);
 329                if (obj_req->repo->next != NULL) {
 330                        obj_req->repo =
 331                                obj_req->repo->next;
 332                        close(obj_req->local);
 333                        obj_req->local = -1;
 334                        start_object_request(obj_req);
 335                        return;
 336                }
 337        }
 338
 339        finish_object_request(obj_req);
 340}
 341
 342static void release_object_request(struct object_request *obj_req)
 343{
 344        struct object_request *entry = object_queue_head;
 345
 346        if (obj_req->local != -1)
 347                error("fd leakage in release: %d", obj_req->local);
 348        if (obj_req == object_queue_head) {
 349                object_queue_head = obj_req->next;
 350        } else {
 351                while (entry->next != NULL && entry->next != obj_req)
 352                        entry = entry->next;
 353                if (entry->next == obj_req)
 354                        entry->next = entry->next->next;
 355        }
 356
 357        free(obj_req->url);
 358        free(obj_req);
 359}
 360
 361#ifdef USE_CURL_MULTI
 362void fill_active_slots(void)
 363{
 364        struct object_request *obj_req = object_queue_head;
 365        struct active_request_slot *slot = active_queue_head;
 366        int num_transfers;
 367
 368        while (active_requests < max_requests && obj_req != NULL) {
 369                if (obj_req->state == WAITING) {
 370                        if (has_sha1_file(obj_req->sha1))
 371                                obj_req->state = COMPLETE;
 372                        else
 373                                start_object_request(obj_req);
 374                        curl_multi_perform(curlm, &num_transfers);
 375                }
 376                obj_req = obj_req->next;
 377        }
 378
 379        while (slot != NULL) {
 380                if (!slot->in_use && slot->curl != NULL) {
 381                        curl_easy_cleanup(slot->curl);
 382                        slot->curl = NULL;
 383                }
 384                slot = slot->next;
 385        }
 386}
 387#endif
 388
 389void prefetch(unsigned char *sha1)
 390{
 391        struct object_request *newreq;
 392        struct object_request *tail;
 393        char *filename = sha1_file_name(sha1);
 394
 395        newreq = xmalloc(sizeof(*newreq));
 396        memcpy(newreq->sha1, sha1, 20);
 397        newreq->repo = alt;
 398        newreq->url = NULL;
 399        newreq->local = -1;
 400        newreq->state = WAITING;
 401        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 402        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 403                 "%s.temp", filename);
 404        newreq->slot = NULL;
 405        newreq->next = NULL;
 406
 407        if (object_queue_head == NULL) {
 408                object_queue_head = newreq;
 409        } else {
 410                tail = object_queue_head;
 411                while (tail->next != NULL) {
 412                        tail = tail->next;
 413                }
 414                tail->next = newreq;
 415        }
 416
 417#ifdef USE_CURL_MULTI
 418        fill_active_slots();
 419        step_active_slots();
 420#endif
 421}
 422
 423static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 424{
 425        char *hex = sha1_to_hex(sha1);
 426        char *filename;
 427        char *url;
 428        char tmpfile[PATH_MAX];
 429        long prev_posn = 0;
 430        char range[RANGE_HEADER_SIZE];
 431        struct curl_slist *range_header = NULL;
 432
 433        FILE *indexfile;
 434        struct active_request_slot *slot;
 435        struct slot_results results;
 436
 437        if (has_pack_index(sha1))
 438                return 0;
 439
 440        if (get_verbosely)
 441                fprintf(stderr, "Getting index for pack %s\n", hex);
 442
 443        url = xmalloc(strlen(repo->base) + 64);
 444        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 445
 446        filename = sha1_pack_index_name(sha1);
 447        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 448        indexfile = fopen(tmpfile, "a");
 449        if (!indexfile)
 450                return error("Unable to open local file %s for pack index",
 451                             filename);
 452
 453        slot = get_active_slot();
 454        slot->results = &results;
 455        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 456        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 457        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 458        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 459        slot->local = indexfile;
 460
 461        /* If there is data present from a previous transfer attempt,
 462           resume where it left off */
 463        prev_posn = ftell(indexfile);
 464        if (prev_posn>0) {
 465                if (get_verbosely)
 466                        fprintf(stderr,
 467                                "Resuming fetch of index for pack %s at byte %ld\n",
 468                                hex, prev_posn);
 469                sprintf(range, "Range: bytes=%ld-", prev_posn);
 470                range_header = curl_slist_append(range_header, range);
 471                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 472        }
 473
 474        if (start_active_slot(slot)) {
 475                run_active_slot(slot);
 476                if (results.curl_result != CURLE_OK) {
 477                        fclose(indexfile);
 478                        return error("Unable to get pack index %s\n%s", url,
 479                                     curl_errorstr);
 480                }
 481        } else {
 482                fclose(indexfile);
 483                return error("Unable to start request");
 484        }
 485
 486        fclose(indexfile);
 487
 488        return move_temp_to_file(tmpfile, filename);
 489}
 490
 491static int setup_index(struct alt_base *repo, unsigned char *sha1)
 492{
 493        struct packed_git *new_pack;
 494        if (has_pack_file(sha1))
 495                return 0; /* don't list this as something we can get */
 496
 497        if (fetch_index(repo, sha1))
 498                return -1;
 499
 500        new_pack = parse_pack_index(sha1);
 501        new_pack->next = repo->packs;
 502        repo->packs = new_pack;
 503        return 0;
 504}
 505
 506static void process_alternates_response(void *callback_data)
 507{
 508        struct alternates_request *alt_req =
 509                (struct alternates_request *)callback_data;
 510        struct active_request_slot *slot = alt_req->slot;
 511        struct alt_base *tail = alt;
 512        const char *base = alt_req->base;
 513        static const char null_byte = '\0';
 514        char *data;
 515        int i = 0;
 516
 517        if (alt_req->http_specific) {
 518                if (slot->curl_result != CURLE_OK ||
 519                    !alt_req->buffer->posn) {
 520
 521                        /* Try reusing the slot to get non-http alternates */
 522                        alt_req->http_specific = 0;
 523                        sprintf(alt_req->url, "%s/objects/info/alternates",
 524                                base);
 525                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 526                                         alt_req->url);
 527                        active_requests++;
 528                        slot->in_use = 1;
 529                        if (slot->finished != NULL)
 530                                (*slot->finished) = 0;
 531                        if (!start_active_slot(slot)) {
 532                                got_alternates = -1;
 533                                slot->in_use = 0;
 534                                if (slot->finished != NULL)
 535                                        (*slot->finished) = 1;
 536                        }
 537                        return;
 538                }
 539        } else if (slot->curl_result != CURLE_OK) {
 540                if (slot->http_code != 404 &&
 541                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 542                        got_alternates = -1;
 543                        return;
 544                }
 545        }
 546
 547        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 548        alt_req->buffer->posn--;
 549        data = alt_req->buffer->buffer;
 550
 551        while (i < alt_req->buffer->posn) {
 552                int posn = i;
 553                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 554                        posn++;
 555                if (data[posn] == '\n') {
 556                        int okay = 0;
 557                        int serverlen = 0;
 558                        struct alt_base *newalt;
 559                        char *target = NULL;
 560                        char *path;
 561                        if (data[i] == '/') {
 562                                /* This counts
 563                                 * http://git.host/pub/scm/linux.git/
 564                                 * -----------here^
 565                                 * so memcpy(dst, base, serverlen) will
 566                                 * copy up to "...git.host".
 567                                 */
 568                                const char *colon_ss = strstr(base,"://");
 569                                if (colon_ss) {
 570                                        serverlen = (strchr(colon_ss + 3, '/')
 571                                                     - base);
 572                                        okay = 1;
 573                                }
 574                        } else if (!memcmp(data + i, "../", 3)) {
 575                                /* Relative URL; chop the corresponding
 576                                 * number of subpath from base (and ../
 577                                 * from data), and concatenate the result.
 578                                 *
 579                                 * The code first drops ../ from data, and
 580                                 * then drops one ../ from data and one path
 581                                 * from base.  IOW, one extra ../ is dropped
 582                                 * from data than path is dropped from base.
 583                                 *
 584                                 * This is not wrong.  The alternate in
 585                                 *     http://git.host/pub/scm/linux.git/
 586                                 * to borrow from
 587                                 *     http://git.host/pub/scm/linus.git/
 588                                 * is ../../linus.git/objects/.  You need
 589                                 * two ../../ to borrow from your direct
 590                                 * neighbour.
 591                                 */
 592                                i += 3;
 593                                serverlen = strlen(base);
 594                                while (i + 2 < posn &&
 595                                       !memcmp(data + i, "../", 3)) {
 596                                        do {
 597                                                serverlen--;
 598                                        } while (serverlen &&
 599                                                 base[serverlen - 1] != '/');
 600                                        i += 3;
 601                                }
 602                                /* If the server got removed, give up. */
 603                                okay = strchr(base, ':') - base + 3 <
 604                                        serverlen;
 605                        } else if (alt_req->http_specific) {
 606                                char *colon = strchr(data + i, ':');
 607                                char *slash = strchr(data + i, '/');
 608                                if (colon && slash && colon < data + posn &&
 609                                    slash < data + posn && colon < slash) {
 610                                        okay = 1;
 611                                }
 612                        }
 613                        /* skip "objects\n" at end */
 614                        if (okay) {
 615                                target = xmalloc(serverlen + posn - i - 6);
 616                                memcpy(target, base, serverlen);
 617                                memcpy(target + serverlen, data + i,
 618                                       posn - i - 7);
 619                                target[serverlen + posn - i - 7] = 0;
 620                                if (get_verbosely)
 621                                        fprintf(stderr,
 622                                                "Also look at %s\n", target);
 623                                newalt = xmalloc(sizeof(*newalt));
 624                                newalt->next = NULL;
 625                                newalt->base = target;
 626                                newalt->got_indices = 0;
 627                                newalt->packs = NULL;
 628                                path = strstr(target, "//");
 629                                if (path) {
 630                                        path = strchr(path+2, '/');
 631                                        if (path)
 632                                                newalt->path_len = strlen(path);
 633                                }
 634
 635                                while (tail->next != NULL)
 636                                        tail = tail->next;
 637                                tail->next = newalt;
 638                        }
 639                }
 640                i = posn + 1;
 641        }
 642
 643        got_alternates = 1;
 644}
 645
 646static void fetch_alternates(const char *base)
 647{
 648        struct buffer buffer;
 649        char *url;
 650        char *data;
 651        struct active_request_slot *slot;
 652        struct alternates_request alt_req;
 653
 654        /* If another request has already started fetching alternates,
 655           wait for them to arrive and return to processing this request's
 656           curl message */
 657#ifdef USE_CURL_MULTI
 658        while (got_alternates == 0) {
 659                step_active_slots();
 660        }
 661#endif
 662
 663        /* Nothing to do if they've already been fetched */
 664        if (got_alternates == 1)
 665                return;
 666
 667        /* Start the fetch */
 668        got_alternates = 0;
 669
 670        data = xmalloc(4096);
 671        buffer.size = 4096;
 672        buffer.posn = 0;
 673        buffer.buffer = data;
 674
 675        if (get_verbosely)
 676                fprintf(stderr, "Getting alternates list for %s\n", base);
 677
 678        url = xmalloc(strlen(base) + 31);
 679        sprintf(url, "%s/objects/info/http-alternates", base);
 680
 681        /* Use a callback to process the result, since another request
 682           may fail and need to have alternates loaded before continuing */
 683        slot = get_active_slot();
 684        slot->callback_func = process_alternates_response;
 685        slot->callback_data = &alt_req;
 686
 687        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 688        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 689        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 690
 691        alt_req.base = base;
 692        alt_req.url = url;
 693        alt_req.buffer = &buffer;
 694        alt_req.http_specific = 1;
 695        alt_req.slot = slot;
 696
 697        if (start_active_slot(slot))
 698                run_active_slot(slot);
 699        else
 700                got_alternates = -1;
 701
 702        free(data);
 703        free(url);
 704}
 705
 706#ifndef NO_EXPAT
 707static void
 708xml_start_tag(void *userData, const char *name, const char **atts)
 709{
 710        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 711        const char *c = strchr(name, ':');
 712        int new_len;
 713
 714        if (c == NULL)
 715                c = name;
 716        else
 717                c++;
 718
 719        new_len = strlen(ctx->name) + strlen(c) + 2;
 720
 721        if (new_len > ctx->len) {
 722                ctx->name = xrealloc(ctx->name, new_len);
 723                ctx->len = new_len;
 724        }
 725        strcat(ctx->name, ".");
 726        strcat(ctx->name, c);
 727
 728        if (ctx->cdata) {
 729                free(ctx->cdata);
 730                ctx->cdata = NULL;
 731        }
 732
 733        ctx->userFunc(ctx, 0);
 734}
 735
 736static void
 737xml_end_tag(void *userData, const char *name)
 738{
 739        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 740        const char *c = strchr(name, ':');
 741        char *ep;
 742
 743        ctx->userFunc(ctx, 1);
 744
 745        if (c == NULL)
 746                c = name;
 747        else
 748                c++;
 749
 750        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 751        *ep = 0;
 752}
 753
 754static void
 755xml_cdata(void *userData, const XML_Char *s, int len)
 756{
 757        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 758        if (ctx->cdata)
 759                free(ctx->cdata);
 760        ctx->cdata = xmalloc(len + 1);
 761        strlcpy(ctx->cdata, s, len + 1);
 762}
 763
 764static int remote_ls(struct alt_base *repo, const char *path, int flags,
 765                     void (*userFunc)(struct remote_ls_ctx *ls),
 766                     void *userData);
 767
 768static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 769{
 770        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 771
 772        if (tag_closed) {
 773                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 774                        if (ls->dentry_flags & IS_DIR) {
 775                                if (ls->flags & PROCESS_DIRS) {
 776                                        ls->userFunc(ls);
 777                                }
 778                                if (strcmp(ls->dentry_name, ls->path) &&
 779                                    ls->flags & RECURSIVE) {
 780                                        ls->rc = remote_ls(ls->repo,
 781                                                           ls->dentry_name,
 782                                                           ls->flags,
 783                                                           ls->userFunc,
 784                                                           ls->userData);
 785                                }
 786                        } else if (ls->flags & PROCESS_FILES) {
 787                                ls->userFunc(ls);
 788                        }
 789                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 790                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 791                                                  ls->repo->path_len + 1);
 792                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 793                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 794                        ls->dentry_flags |= IS_DIR;
 795                }
 796        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 797                if (ls->dentry_name) {
 798                        free(ls->dentry_name);
 799                }
 800                ls->dentry_name = NULL;
 801                ls->dentry_flags = 0;
 802        }
 803}
 804
 805static int remote_ls(struct alt_base *repo, const char *path, int flags,
 806                     void (*userFunc)(struct remote_ls_ctx *ls),
 807                     void *userData)
 808{
 809        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 810        struct active_request_slot *slot;
 811        struct slot_results results;
 812        struct buffer in_buffer;
 813        struct buffer out_buffer;
 814        char *in_data;
 815        char *out_data;
 816        XML_Parser parser = XML_ParserCreate(NULL);
 817        enum XML_Status result;
 818        struct curl_slist *dav_headers = NULL;
 819        struct xml_ctx ctx;
 820        struct remote_ls_ctx ls;
 821
 822        ls.flags = flags;
 823        ls.repo = repo;
 824        ls.path = strdup(path);
 825        ls.dentry_name = NULL;
 826        ls.dentry_flags = 0;
 827        ls.userData = userData;
 828        ls.userFunc = userFunc;
 829        ls.rc = 0;
 830
 831        sprintf(url, "%s%s", repo->base, path);
 832
 833        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 834        out_data = xmalloc(out_buffer.size + 1);
 835        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 836        out_buffer.posn = 0;
 837        out_buffer.buffer = out_data;
 838
 839        in_buffer.size = 4096;
 840        in_data = xmalloc(in_buffer.size);
 841        in_buffer.posn = 0;
 842        in_buffer.buffer = in_data;
 843
 844        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 845        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 846
 847        slot = get_active_slot();
 848        slot->results = &results;
 849        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 850        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 851        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 852        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 853        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 854        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 855        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 856        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 857        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 858
 859        if (start_active_slot(slot)) {
 860                run_active_slot(slot);
 861                if (results.curl_result == CURLE_OK) {
 862                        ctx.name = xcalloc(10, 1);
 863                        ctx.len = 0;
 864                        ctx.cdata = NULL;
 865                        ctx.userFunc = handle_remote_ls_ctx;
 866                        ctx.userData = &ls;
 867                        XML_SetUserData(parser, &ctx);
 868                        XML_SetElementHandler(parser, xml_start_tag,
 869                                              xml_end_tag);
 870                        XML_SetCharacterDataHandler(parser, xml_cdata);
 871                        result = XML_Parse(parser, in_buffer.buffer,
 872                                           in_buffer.posn, 1);
 873                        free(ctx.name);
 874
 875                        if (result != XML_STATUS_OK) {
 876                                ls.rc = error("XML error: %s",
 877                                              XML_ErrorString(
 878                                                      XML_GetErrorCode(parser)));
 879                        }
 880                } else {
 881                        ls.rc = -1;
 882                }
 883        } else {
 884                ls.rc = error("Unable to start PROPFIND request");
 885        }
 886
 887        free(ls.path);
 888        free(url);
 889        free(out_data);
 890        free(in_buffer.buffer);
 891        curl_slist_free_all(dav_headers);
 892
 893        return ls.rc;
 894}
 895
 896static void process_ls_pack(struct remote_ls_ctx *ls)
 897{
 898        unsigned char sha1[20];
 899
 900        if (strlen(ls->dentry_name) == 63 &&
 901            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 902            has_extension(ls->dentry_name, ".pack")) {
 903                get_sha1_hex(ls->dentry_name + 18, sha1);
 904                setup_index(ls->repo, sha1);
 905        }
 906}
 907#endif
 908
 909static int fetch_indices(struct alt_base *repo)
 910{
 911        unsigned char sha1[20];
 912        char *url;
 913        struct buffer buffer;
 914        char *data;
 915        int i = 0;
 916
 917        struct active_request_slot *slot;
 918        struct slot_results results;
 919
 920        if (repo->got_indices)
 921                return 0;
 922
 923        data = xmalloc(4096);
 924        buffer.size = 4096;
 925        buffer.posn = 0;
 926        buffer.buffer = data;
 927
 928        if (get_verbosely)
 929                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 930
 931#ifndef NO_EXPAT
 932        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 933                      process_ls_pack, NULL) == 0)
 934                return 0;
 935#endif
 936
 937        url = xmalloc(strlen(repo->base) + 21);
 938        sprintf(url, "%s/objects/info/packs", repo->base);
 939
 940        slot = get_active_slot();
 941        slot->results = &results;
 942        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 943        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 944        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 945        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 946        if (start_active_slot(slot)) {
 947                run_active_slot(slot);
 948                if (results.curl_result != CURLE_OK) {
 949                        if (results.http_code == 404 ||
 950                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 951                                repo->got_indices = 1;
 952                                free(buffer.buffer);
 953                                return 0;
 954                        } else {
 955                                repo->got_indices = 0;
 956                                free(buffer.buffer);
 957                                return error("%s", curl_errorstr);
 958                        }
 959                }
 960        } else {
 961                repo->got_indices = 0;
 962                free(buffer.buffer);
 963                return error("Unable to start request");
 964        }
 965
 966        data = buffer.buffer;
 967        while (i < buffer.posn) {
 968                switch (data[i]) {
 969                case 'P':
 970                        i++;
 971                        if (i + 52 <= buffer.posn &&
 972                            !strncmp(data + i, " pack-", 6) &&
 973                            !strncmp(data + i + 46, ".pack\n", 6)) {
 974                                get_sha1_hex(data + i + 6, sha1);
 975                                setup_index(repo, sha1);
 976                                i += 51;
 977                                break;
 978                        }
 979                default:
 980                        while (i < buffer.posn && data[i] != '\n')
 981                                i++;
 982                }
 983                i++;
 984        }
 985
 986        free(buffer.buffer);
 987        repo->got_indices = 1;
 988        return 0;
 989}
 990
 991static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 992{
 993        char *url;
 994        struct packed_git *target;
 995        struct packed_git **lst;
 996        FILE *packfile;
 997        char *filename;
 998        char tmpfile[PATH_MAX];
 999        int ret;
1000        long prev_posn = 0;
1001        char range[RANGE_HEADER_SIZE];
1002        struct curl_slist *range_header = NULL;
1003
1004        struct active_request_slot *slot;
1005        struct slot_results results;
1006
1007        if (fetch_indices(repo))
1008                return -1;
1009        target = find_sha1_pack(sha1, repo->packs);
1010        if (!target)
1011                return -1;
1012
1013        if (get_verbosely) {
1014                fprintf(stderr, "Getting pack %s\n",
1015                        sha1_to_hex(target->sha1));
1016                fprintf(stderr, " which contains %s\n",
1017                        sha1_to_hex(sha1));
1018        }
1019
1020        url = xmalloc(strlen(repo->base) + 65);
1021        sprintf(url, "%s/objects/pack/pack-%s.pack",
1022                repo->base, sha1_to_hex(target->sha1));
1023
1024        filename = sha1_pack_name(target->sha1);
1025        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1026        packfile = fopen(tmpfile, "a");
1027        if (!packfile)
1028                return error("Unable to open local file %s for pack",
1029                             filename);
1030
1031        slot = get_active_slot();
1032        slot->results = &results;
1033        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1034        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1035        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1036        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1037        slot->local = packfile;
1038
1039        /* If there is data present from a previous transfer attempt,
1040           resume where it left off */
1041        prev_posn = ftell(packfile);
1042        if (prev_posn>0) {
1043                if (get_verbosely)
1044                        fprintf(stderr,
1045                                "Resuming fetch of pack %s at byte %ld\n",
1046                                sha1_to_hex(target->sha1), prev_posn);
1047                sprintf(range, "Range: bytes=%ld-", prev_posn);
1048                range_header = curl_slist_append(range_header, range);
1049                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1050        }
1051
1052        if (start_active_slot(slot)) {
1053                run_active_slot(slot);
1054                if (results.curl_result != CURLE_OK) {
1055                        fclose(packfile);
1056                        return error("Unable to get pack file %s\n%s", url,
1057                                     curl_errorstr);
1058                }
1059        } else {
1060                fclose(packfile);
1061                return error("Unable to start request");
1062        }
1063
1064        fclose(packfile);
1065
1066        ret = move_temp_to_file(tmpfile, filename);
1067        if (ret)
1068                return ret;
1069
1070        lst = &repo->packs;
1071        while (*lst != target)
1072                lst = &((*lst)->next);
1073        *lst = (*lst)->next;
1074
1075        if (verify_pack(target, 0))
1076                return -1;
1077        install_packed_git(target);
1078
1079        return 0;
1080}
1081
1082static void abort_object_request(struct object_request *obj_req)
1083{
1084        if (obj_req->local >= 0) {
1085                close(obj_req->local);
1086                obj_req->local = -1;
1087        }
1088        unlink(obj_req->tmpfile);
1089        if (obj_req->slot) {
1090                release_active_slot(obj_req->slot);
1091                obj_req->slot = NULL;
1092        }
1093        release_object_request(obj_req);
1094}
1095
1096static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1097{
1098        char *hex = sha1_to_hex(sha1);
1099        int ret = 0;
1100        struct object_request *obj_req = object_queue_head;
1101
1102        while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1103                obj_req = obj_req->next;
1104        if (obj_req == NULL)
1105                return error("Couldn't find request for %s in the queue", hex);
1106
1107        if (has_sha1_file(obj_req->sha1)) {
1108                abort_object_request(obj_req);
1109                return 0;
1110        }
1111
1112#ifdef USE_CURL_MULTI
1113        while (obj_req->state == WAITING) {
1114                step_active_slots();
1115        }
1116#else
1117        start_object_request(obj_req);
1118#endif
1119
1120        while (obj_req->state == ACTIVE) {
1121                run_active_slot(obj_req->slot);
1122        }
1123        if (obj_req->local != -1) {
1124                close(obj_req->local); obj_req->local = -1;
1125        }
1126
1127        if (obj_req->state == ABORTED) {
1128                ret = error("Request for %s aborted", hex);
1129        } else if (obj_req->curl_result != CURLE_OK &&
1130                   obj_req->http_code != 416) {
1131                if (obj_req->http_code == 404 ||
1132                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1133                        ret = -1; /* Be silent, it is probably in a pack. */
1134                else
1135                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1136                                    obj_req->errorstr, obj_req->curl_result,
1137                                    obj_req->http_code, hex);
1138        } else if (obj_req->zret != Z_STREAM_END) {
1139                corrupt_object_found++;
1140                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1141        } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1142                ret = error("File %s has bad hash", hex);
1143        } else if (obj_req->rename < 0) {
1144                ret = error("unable to write sha1 filename %s",
1145                            obj_req->filename);
1146        }
1147
1148        release_object_request(obj_req);
1149        return ret;
1150}
1151
1152int fetch(unsigned char *sha1)
1153{
1154        struct alt_base *altbase = alt;
1155
1156        if (!fetch_object(altbase, sha1))
1157                return 0;
1158        while (altbase) {
1159                if (!fetch_pack(altbase, sha1))
1160                        return 0;
1161                fetch_alternates(alt->base);
1162                altbase = altbase->next;
1163        }
1164        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1165                     alt->base);
1166}
1167
1168static inline int needs_quote(int ch)
1169{
1170        if (((ch >= 'A') && (ch <= 'Z'))
1171                        || ((ch >= 'a') && (ch <= 'z'))
1172                        || ((ch >= '0') && (ch <= '9'))
1173                        || (ch == '/')
1174                        || (ch == '-')
1175                        || (ch == '.'))
1176                return 0;
1177        return 1;
1178}
1179
1180static inline int hex(int v)
1181{
1182        if (v < 10) return '0' + v;
1183        else return 'A' + v - 10;
1184}
1185
1186static char *quote_ref_url(const char *base, const char *ref)
1187{
1188        const char *cp;
1189        char *dp, *qref;
1190        int len, baselen, ch;
1191
1192        baselen = strlen(base);
1193        len = baselen + 6; /* "refs/" + NUL */
1194        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1195                if (needs_quote(ch))
1196                        len += 2; /* extra two hex plus replacement % */
1197        qref = xmalloc(len);
1198        memcpy(qref, base, baselen);
1199        memcpy(qref + baselen, "refs/", 5);
1200        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1201                if (needs_quote(ch)) {
1202                        *dp++ = '%';
1203                        *dp++ = hex((ch >> 4) & 0xF);
1204                        *dp++ = hex(ch & 0xF);
1205                }
1206                else
1207                        *dp++ = ch;
1208        }
1209        *dp = 0;
1210
1211        return qref;
1212}
1213
1214int fetch_ref(char *ref, unsigned char *sha1)
1215{
1216        char *url;
1217        char hex[42];
1218        struct buffer buffer;
1219        const char *base = alt->base;
1220        struct active_request_slot *slot;
1221        struct slot_results results;
1222        buffer.size = 41;
1223        buffer.posn = 0;
1224        buffer.buffer = hex;
1225        hex[41] = '\0';
1226
1227        url = quote_ref_url(base, ref);
1228        slot = get_active_slot();
1229        slot->results = &results;
1230        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1231        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1232        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1233        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1234        if (start_active_slot(slot)) {
1235                run_active_slot(slot);
1236                if (results.curl_result != CURLE_OK)
1237                        return error("Couldn't get %s for %s\n%s",
1238                                     url, ref, curl_errorstr);
1239        } else {
1240                return error("Unable to start request");
1241        }
1242
1243        hex[40] = '\0';
1244        get_sha1_hex(hex, sha1);
1245        return 0;
1246}
1247
1248int main(int argc, const char **argv)
1249{
1250        int commits;
1251        const char **write_ref = NULL;
1252        char **commit_id;
1253        const char *url;
1254        char *path;
1255        int arg = 1;
1256        int rc = 0;
1257
1258        setup_ident();
1259        setup_git_directory();
1260        git_config(git_default_config);
1261
1262        while (arg < argc && argv[arg][0] == '-') {
1263                if (argv[arg][1] == 't') {
1264                        get_tree = 1;
1265                } else if (argv[arg][1] == 'c') {
1266                        get_history = 1;
1267                } else if (argv[arg][1] == 'a') {
1268                        get_all = 1;
1269                        get_tree = 1;
1270                        get_history = 1;
1271                } else if (argv[arg][1] == 'v') {
1272                        get_verbosely = 1;
1273                } else if (argv[arg][1] == 'w') {
1274                        write_ref = &argv[arg + 1];
1275                        arg++;
1276                } else if (!strcmp(argv[arg], "--recover")) {
1277                        get_recover = 1;
1278                } else if (!strcmp(argv[arg], "--stdin")) {
1279                        commits_on_stdin = 1;
1280                }
1281                arg++;
1282        }
1283        if (argc < arg + 2 - commits_on_stdin) {
1284                usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1285                return 1;
1286        }
1287        if (commits_on_stdin) {
1288                commits = pull_targets_stdin(&commit_id, &write_ref);
1289        } else {
1290                commit_id = (char **) &argv[arg++];
1291                commits = 1;
1292        }
1293        url = argv[arg];
1294
1295        http_init();
1296
1297        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1298
1299        alt = xmalloc(sizeof(*alt));
1300        alt->base = url;
1301        alt->got_indices = 0;
1302        alt->packs = NULL;
1303        alt->next = NULL;
1304        path = strstr(url, "//");
1305        if (path) {
1306                path = strchr(path+2, '/');
1307                if (path)
1308                        alt->path_len = strlen(path);
1309        }
1310
1311        if (pull(commits, commit_id, write_ref, url))
1312                rc = 1;
1313
1314        http_cleanup();
1315
1316        curl_slist_free_all(no_pragma_header);
1317
1318        if (commits_on_stdin)
1319                pull_targets_free(commits, commit_id, write_ref);
1320
1321        if (corrupt_object_found) {
1322                fprintf(stderr,
1323"Some loose object were found to be corrupt, but they might be just\n"
1324"a false '404 Not Found' error message sent with incorrect HTTP\n"
1325"status code.  Suggest running git fsck-objects.\n");
1326        }
1327        return rc;
1328}