http-fetch.con commit Merge branch 'jk/diff' (b467fb0)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int commits_on_stdin;
  40
  41static int got_alternates = -1;
  42static int corrupt_object_found;
  43
  44static struct curl_slist *no_pragma_header;
  45
  46struct alt_base
  47{
  48        const char *base;
  49        int path_len;
  50        int got_indices;
  51        struct packed_git *packs;
  52        struct alt_base *next;
  53};
  54
  55static struct alt_base *alt;
  56
  57enum object_request_state {
  58        WAITING,
  59        ABORTED,
  60        ACTIVE,
  61        COMPLETE,
  62};
  63
  64struct object_request
  65{
  66        unsigned char sha1[20];
  67        struct alt_base *repo;
  68        char *url;
  69        char filename[PATH_MAX];
  70        char tmpfile[PATH_MAX];
  71        int local;
  72        enum object_request_state state;
  73        CURLcode curl_result;
  74        char errorstr[CURL_ERROR_SIZE];
  75        long http_code;
  76        unsigned char real_sha1[20];
  77        SHA_CTX c;
  78        z_stream stream;
  79        int zret;
  80        int rename;
  81        struct active_request_slot *slot;
  82        struct object_request *next;
  83};
  84
  85struct alternates_request {
  86        const char *base;
  87        char *url;
  88        struct buffer *buffer;
  89        struct active_request_slot *slot;
  90        int http_specific;
  91};
  92
  93#ifndef NO_EXPAT
  94struct xml_ctx
  95{
  96        char *name;
  97        int len;
  98        char *cdata;
  99        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
 100        void *userData;
 101};
 102
 103struct remote_ls_ctx
 104{
 105        struct alt_base *repo;
 106        char *path;
 107        void (*userFunc)(struct remote_ls_ctx *ls);
 108        void *userData;
 109        int flags;
 110        char *dentry_name;
 111        int dentry_flags;
 112        int rc;
 113        struct remote_ls_ctx *parent;
 114};
 115#endif
 116
 117static struct object_request *object_queue_head;
 118
 119static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 120                               void *data)
 121{
 122        unsigned char expn[4096];
 123        size_t size = eltsize * nmemb;
 124        int posn = 0;
 125        struct object_request *obj_req = (struct object_request *)data;
 126        do {
 127                ssize_t retval = write(obj_req->local,
 128                                       (char *) ptr + posn, size - posn);
 129                if (retval < 0)
 130                        return posn;
 131                posn += retval;
 132        } while (posn < size);
 133
 134        obj_req->stream.avail_in = size;
 135        obj_req->stream.next_in = ptr;
 136        do {
 137                obj_req->stream.next_out = expn;
 138                obj_req->stream.avail_out = sizeof(expn);
 139                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 140                SHA1_Update(&obj_req->c, expn,
 141                            sizeof(expn) - obj_req->stream.avail_out);
 142        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 143        data_received++;
 144        return size;
 145}
 146
 147static void fetch_alternates(const char *base);
 148
 149static void process_object_response(void *callback_data);
 150
 151static void start_object_request(struct object_request *obj_req)
 152{
 153        char *hex = sha1_to_hex(obj_req->sha1);
 154        char prevfile[PATH_MAX];
 155        char *url;
 156        char *posn;
 157        int prevlocal;
 158        unsigned char prev_buf[PREV_BUF_SIZE];
 159        ssize_t prev_read = 0;
 160        long prev_posn = 0;
 161        char range[RANGE_HEADER_SIZE];
 162        struct curl_slist *range_header = NULL;
 163        struct active_request_slot *slot;
 164
 165        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 166        unlink(prevfile);
 167        rename(obj_req->tmpfile, prevfile);
 168        unlink(obj_req->tmpfile);
 169
 170        if (obj_req->local != -1)
 171                error("fd leakage in start: %d", obj_req->local);
 172        obj_req->local = open(obj_req->tmpfile,
 173                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 174        /* This could have failed due to the "lazy directory creation";
 175         * try to mkdir the last path component.
 176         */
 177        if (obj_req->local < 0 && errno == ENOENT) {
 178                char *dir = strrchr(obj_req->tmpfile, '/');
 179                if (dir) {
 180                        *dir = 0;
 181                        mkdir(obj_req->tmpfile, 0777);
 182                        *dir = '/';
 183                }
 184                obj_req->local = open(obj_req->tmpfile,
 185                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 186        }
 187
 188        if (obj_req->local < 0) {
 189                obj_req->state = ABORTED;
 190                error("Couldn't create temporary file %s for %s: %s",
 191                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 192                return;
 193        }
 194
 195        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 196
 197        inflateInit(&obj_req->stream);
 198
 199        SHA1_Init(&obj_req->c);
 200
 201        url = xmalloc(strlen(obj_req->repo->base) + 50);
 202        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 203        strcpy(url, obj_req->repo->base);
 204        posn = url + strlen(obj_req->repo->base);
 205        strcpy(posn, "objects/");
 206        posn += 8;
 207        memcpy(posn, hex, 2);
 208        posn += 2;
 209        *(posn++) = '/';
 210        strcpy(posn, hex + 2);
 211        strcpy(obj_req->url, url);
 212
 213        /* If a previous temp file is present, process what was already
 214           fetched. */
 215        prevlocal = open(prevfile, O_RDONLY);
 216        if (prevlocal != -1) {
 217                do {
 218                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 219                        if (prev_read>0) {
 220                                if (fwrite_sha1_file(prev_buf,
 221                                                     1,
 222                                                     prev_read,
 223                                                     obj_req) == prev_read) {
 224                                        prev_posn += prev_read;
 225                                } else {
 226                                        prev_read = -1;
 227                                }
 228                        }
 229                } while (prev_read > 0);
 230                close(prevlocal);
 231        }
 232        unlink(prevfile);
 233
 234        /* Reset inflate/SHA1 if there was an error reading the previous temp
 235           file; also rewind to the beginning of the local file. */
 236        if (prev_read == -1) {
 237                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 238                inflateInit(&obj_req->stream);
 239                SHA1_Init(&obj_req->c);
 240                if (prev_posn>0) {
 241                        prev_posn = 0;
 242                        lseek(obj_req->local, SEEK_SET, 0);
 243                        ftruncate(obj_req->local, 0);
 244                }
 245        }
 246
 247        slot = get_active_slot();
 248        slot->callback_func = process_object_response;
 249        slot->callback_data = obj_req;
 250        obj_req->slot = slot;
 251
 252        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 253        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 254        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 255        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 256        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 257
 258        /* If we have successfully processed data from a previous fetch
 259           attempt, only fetch the data we don't already have. */
 260        if (prev_posn>0) {
 261                if (get_verbosely)
 262                        fprintf(stderr,
 263                                "Resuming fetch of object %s at byte %ld\n",
 264                                hex, prev_posn);
 265                sprintf(range, "Range: bytes=%ld-", prev_posn);
 266                range_header = curl_slist_append(range_header, range);
 267                curl_easy_setopt(slot->curl,
 268                                 CURLOPT_HTTPHEADER, range_header);
 269        }
 270
 271        /* Try to get the request started, abort the request on error */
 272        obj_req->state = ACTIVE;
 273        if (!start_active_slot(slot)) {
 274                obj_req->state = ABORTED;
 275                obj_req->slot = NULL;
 276                close(obj_req->local); obj_req->local = -1;
 277                free(obj_req->url);
 278                return;
 279        }
 280}
 281
 282static void finish_object_request(struct object_request *obj_req)
 283{
 284        struct stat st;
 285
 286        fchmod(obj_req->local, 0444);
 287        close(obj_req->local); obj_req->local = -1;
 288
 289        if (obj_req->http_code == 416) {
 290                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 291        } else if (obj_req->curl_result != CURLE_OK) {
 292                if (stat(obj_req->tmpfile, &st) == 0)
 293                        if (st.st_size == 0)
 294                                unlink(obj_req->tmpfile);
 295                return;
 296        }
 297
 298        inflateEnd(&obj_req->stream);
 299        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 300        if (obj_req->zret != Z_STREAM_END) {
 301                unlink(obj_req->tmpfile);
 302                return;
 303        }
 304        if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
 305                unlink(obj_req->tmpfile);
 306                return;
 307        }
 308        obj_req->rename =
 309                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 310
 311        if (obj_req->rename == 0)
 312                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 313}
 314
 315static void process_object_response(void *callback_data)
 316{
 317        struct object_request *obj_req =
 318                (struct object_request *)callback_data;
 319
 320        obj_req->curl_result = obj_req->slot->curl_result;
 321        obj_req->http_code = obj_req->slot->http_code;
 322        obj_req->slot = NULL;
 323        obj_req->state = COMPLETE;
 324
 325        /* Use alternates if necessary */
 326        if (obj_req->http_code == 404 ||
 327            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 328                fetch_alternates(alt->base);
 329                if (obj_req->repo->next != NULL) {
 330                        obj_req->repo =
 331                                obj_req->repo->next;
 332                        close(obj_req->local);
 333                        obj_req->local = -1;
 334                        start_object_request(obj_req);
 335                        return;
 336                }
 337        }
 338
 339        finish_object_request(obj_req);
 340}
 341
 342static void release_object_request(struct object_request *obj_req)
 343{
 344        struct object_request *entry = object_queue_head;
 345
 346        if (obj_req->local != -1)
 347                error("fd leakage in release: %d", obj_req->local);
 348        if (obj_req == object_queue_head) {
 349                object_queue_head = obj_req->next;
 350        } else {
 351                while (entry->next != NULL && entry->next != obj_req)
 352                        entry = entry->next;
 353                if (entry->next == obj_req)
 354                        entry->next = entry->next->next;
 355        }
 356
 357        free(obj_req->url);
 358        free(obj_req);
 359}
 360
 361#ifdef USE_CURL_MULTI
 362void fill_active_slots(void)
 363{
 364        struct object_request *obj_req = object_queue_head;
 365        struct active_request_slot *slot = active_queue_head;
 366        int num_transfers;
 367
 368        while (active_requests < max_requests && obj_req != NULL) {
 369                if (obj_req->state == WAITING) {
 370                        if (has_sha1_file(obj_req->sha1))
 371                                obj_req->state = COMPLETE;
 372                        else
 373                                start_object_request(obj_req);
 374                        curl_multi_perform(curlm, &num_transfers);
 375                }
 376                obj_req = obj_req->next;
 377        }
 378
 379        while (slot != NULL) {
 380                if (!slot->in_use && slot->curl != NULL) {
 381                        curl_easy_cleanup(slot->curl);
 382                        slot->curl = NULL;
 383                }
 384                slot = slot->next;
 385        }
 386}
 387#endif
 388
 389void prefetch(unsigned char *sha1)
 390{
 391        struct object_request *newreq;
 392        struct object_request *tail;
 393        char *filename = sha1_file_name(sha1);
 394
 395        newreq = xmalloc(sizeof(*newreq));
 396        hashcpy(newreq->sha1, sha1);
 397        newreq->repo = alt;
 398        newreq->url = NULL;
 399        newreq->local = -1;
 400        newreq->state = WAITING;
 401        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 402        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 403                 "%s.temp", filename);
 404        newreq->slot = NULL;
 405        newreq->next = NULL;
 406
 407        if (object_queue_head == NULL) {
 408                object_queue_head = newreq;
 409        } else {
 410                tail = object_queue_head;
 411                while (tail->next != NULL) {
 412                        tail = tail->next;
 413                }
 414                tail->next = newreq;
 415        }
 416
 417#ifdef USE_CURL_MULTI
 418        fill_active_slots();
 419        step_active_slots();
 420#endif
 421}
 422
 423static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 424{
 425        char *hex = sha1_to_hex(sha1);
 426        char *filename;
 427        char *url;
 428        char tmpfile[PATH_MAX];
 429        long prev_posn = 0;
 430        char range[RANGE_HEADER_SIZE];
 431        struct curl_slist *range_header = NULL;
 432
 433        FILE *indexfile;
 434        struct active_request_slot *slot;
 435        struct slot_results results;
 436
 437        if (has_pack_index(sha1))
 438                return 0;
 439
 440        if (get_verbosely)
 441                fprintf(stderr, "Getting index for pack %s\n", hex);
 442
 443        url = xmalloc(strlen(repo->base) + 64);
 444        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 445
 446        filename = sha1_pack_index_name(sha1);
 447        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 448        indexfile = fopen(tmpfile, "a");
 449        if (!indexfile)
 450                return error("Unable to open local file %s for pack index",
 451                             filename);
 452
 453        slot = get_active_slot();
 454        slot->results = &results;
 455        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 456        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 457        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 458        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 459        slot->local = indexfile;
 460
 461        /* If there is data present from a previous transfer attempt,
 462           resume where it left off */
 463        prev_posn = ftell(indexfile);
 464        if (prev_posn>0) {
 465                if (get_verbosely)
 466                        fprintf(stderr,
 467                                "Resuming fetch of index for pack %s at byte %ld\n",
 468                                hex, prev_posn);
 469                sprintf(range, "Range: bytes=%ld-", prev_posn);
 470                range_header = curl_slist_append(range_header, range);
 471                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 472        }
 473
 474        if (start_active_slot(slot)) {
 475                run_active_slot(slot);
 476                if (results.curl_result != CURLE_OK) {
 477                        fclose(indexfile);
 478                        return error("Unable to get pack index %s\n%s", url,
 479                                     curl_errorstr);
 480                }
 481        } else {
 482                fclose(indexfile);
 483                return error("Unable to start request");
 484        }
 485
 486        fclose(indexfile);
 487
 488        return move_temp_to_file(tmpfile, filename);
 489}
 490
 491static int setup_index(struct alt_base *repo, unsigned char *sha1)
 492{
 493        struct packed_git *new_pack;
 494        if (has_pack_file(sha1))
 495                return 0; /* don't list this as something we can get */
 496
 497        if (fetch_index(repo, sha1))
 498                return -1;
 499
 500        new_pack = parse_pack_index(sha1);
 501        new_pack->next = repo->packs;
 502        repo->packs = new_pack;
 503        return 0;
 504}
 505
 506static void process_alternates_response(void *callback_data)
 507{
 508        struct alternates_request *alt_req =
 509                (struct alternates_request *)callback_data;
 510        struct active_request_slot *slot = alt_req->slot;
 511        struct alt_base *tail = alt;
 512        const char *base = alt_req->base;
 513        static const char null_byte = '\0';
 514        char *data;
 515        int i = 0;
 516
 517        if (alt_req->http_specific) {
 518                if (slot->curl_result != CURLE_OK ||
 519                    !alt_req->buffer->posn) {
 520
 521                        /* Try reusing the slot to get non-http alternates */
 522                        alt_req->http_specific = 0;
 523                        sprintf(alt_req->url, "%s/objects/info/alternates",
 524                                base);
 525                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 526                                         alt_req->url);
 527                        active_requests++;
 528                        slot->in_use = 1;
 529                        if (slot->finished != NULL)
 530                                (*slot->finished) = 0;
 531                        if (!start_active_slot(slot)) {
 532                                got_alternates = -1;
 533                                slot->in_use = 0;
 534                                if (slot->finished != NULL)
 535                                        (*slot->finished) = 1;
 536                        }
 537                        return;
 538                }
 539        } else if (slot->curl_result != CURLE_OK) {
 540                if (slot->http_code != 404 &&
 541                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 542                        got_alternates = -1;
 543                        return;
 544                }
 545        }
 546
 547        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 548        alt_req->buffer->posn--;
 549        data = alt_req->buffer->buffer;
 550
 551        while (i < alt_req->buffer->posn) {
 552                int posn = i;
 553                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 554                        posn++;
 555                if (data[posn] == '\n') {
 556                        int okay = 0;
 557                        int serverlen = 0;
 558                        struct alt_base *newalt;
 559                        char *target = NULL;
 560                        char *path;
 561                        if (data[i] == '/') {
 562                                /* This counts
 563                                 * http://git.host/pub/scm/linux.git/
 564                                 * -----------here^
 565                                 * so memcpy(dst, base, serverlen) will
 566                                 * copy up to "...git.host".
 567                                 */
 568                                const char *colon_ss = strstr(base,"://");
 569                                if (colon_ss) {
 570                                        serverlen = (strchr(colon_ss + 3, '/')
 571                                                     - base);
 572                                        okay = 1;
 573                                }
 574                        } else if (!memcmp(data + i, "../", 3)) {
 575                                /* Relative URL; chop the corresponding
 576                                 * number of subpath from base (and ../
 577                                 * from data), and concatenate the result.
 578                                 *
 579                                 * The code first drops ../ from data, and
 580                                 * then drops one ../ from data and one path
 581                                 * from base.  IOW, one extra ../ is dropped
 582                                 * from data than path is dropped from base.
 583                                 *
 584                                 * This is not wrong.  The alternate in
 585                                 *     http://git.host/pub/scm/linux.git/
 586                                 * to borrow from
 587                                 *     http://git.host/pub/scm/linus.git/
 588                                 * is ../../linus.git/objects/.  You need
 589                                 * two ../../ to borrow from your direct
 590                                 * neighbour.
 591                                 */
 592                                i += 3;
 593                                serverlen = strlen(base);
 594                                while (i + 2 < posn &&
 595                                       !memcmp(data + i, "../", 3)) {
 596                                        do {
 597                                                serverlen--;
 598                                        } while (serverlen &&
 599                                                 base[serverlen - 1] != '/');
 600                                        i += 3;
 601                                }
 602                                /* If the server got removed, give up. */
 603                                okay = strchr(base, ':') - base + 3 <
 604                                        serverlen;
 605                        } else if (alt_req->http_specific) {
 606                                char *colon = strchr(data + i, ':');
 607                                char *slash = strchr(data + i, '/');
 608                                if (colon && slash && colon < data + posn &&
 609                                    slash < data + posn && colon < slash) {
 610                                        okay = 1;
 611                                }
 612                        }
 613                        /* skip "objects\n" at end */
 614                        if (okay) {
 615                                target = xmalloc(serverlen + posn - i - 6);
 616                                memcpy(target, base, serverlen);
 617                                memcpy(target + serverlen, data + i,
 618                                       posn - i - 7);
 619                                target[serverlen + posn - i - 7] = 0;
 620                                if (get_verbosely)
 621                                        fprintf(stderr,
 622                                                "Also look at %s\n", target);
 623                                newalt = xmalloc(sizeof(*newalt));
 624                                newalt->next = NULL;
 625                                newalt->base = target;
 626                                newalt->got_indices = 0;
 627                                newalt->packs = NULL;
 628                                path = strstr(target, "//");
 629                                if (path) {
 630                                        path = strchr(path+2, '/');
 631                                        if (path)
 632                                                newalt->path_len = strlen(path);
 633                                }
 634
 635                                while (tail->next != NULL)
 636                                        tail = tail->next;
 637                                tail->next = newalt;
 638                        }
 639                }
 640                i = posn + 1;
 641        }
 642
 643        got_alternates = 1;
 644}
 645
 646static void fetch_alternates(const char *base)
 647{
 648        struct buffer buffer;
 649        char *url;
 650        char *data;
 651        struct active_request_slot *slot;
 652        struct alternates_request alt_req;
 653
 654        /* If another request has already started fetching alternates,
 655           wait for them to arrive and return to processing this request's
 656           curl message */
 657#ifdef USE_CURL_MULTI
 658        while (got_alternates == 0) {
 659                step_active_slots();
 660        }
 661#endif
 662
 663        /* Nothing to do if they've already been fetched */
 664        if (got_alternates == 1)
 665                return;
 666
 667        /* Start the fetch */
 668        got_alternates = 0;
 669
 670        data = xmalloc(4096);
 671        buffer.size = 4096;
 672        buffer.posn = 0;
 673        buffer.buffer = data;
 674
 675        if (get_verbosely)
 676                fprintf(stderr, "Getting alternates list for %s\n", base);
 677
 678        url = xmalloc(strlen(base) + 31);
 679        sprintf(url, "%s/objects/info/http-alternates", base);
 680
 681        /* Use a callback to process the result, since another request
 682           may fail and need to have alternates loaded before continuing */
 683        slot = get_active_slot();
 684        slot->callback_func = process_alternates_response;
 685        slot->callback_data = &alt_req;
 686
 687        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 688        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 689        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 690
 691        alt_req.base = base;
 692        alt_req.url = url;
 693        alt_req.buffer = &buffer;
 694        alt_req.http_specific = 1;
 695        alt_req.slot = slot;
 696
 697        if (start_active_slot(slot))
 698                run_active_slot(slot);
 699        else
 700                got_alternates = -1;
 701
 702        free(data);
 703        free(url);
 704}
 705
 706#ifndef NO_EXPAT
 707static void
 708xml_start_tag(void *userData, const char *name, const char **atts)
 709{
 710        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 711        const char *c = strchr(name, ':');
 712        int new_len;
 713
 714        if (c == NULL)
 715                c = name;
 716        else
 717                c++;
 718
 719        new_len = strlen(ctx->name) + strlen(c) + 2;
 720
 721        if (new_len > ctx->len) {
 722                ctx->name = xrealloc(ctx->name, new_len);
 723                ctx->len = new_len;
 724        }
 725        strcat(ctx->name, ".");
 726        strcat(ctx->name, c);
 727
 728        free(ctx->cdata);
 729        ctx->cdata = NULL;
 730
 731        ctx->userFunc(ctx, 0);
 732}
 733
 734static void
 735xml_end_tag(void *userData, const char *name)
 736{
 737        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 738        const char *c = strchr(name, ':');
 739        char *ep;
 740
 741        ctx->userFunc(ctx, 1);
 742
 743        if (c == NULL)
 744                c = name;
 745        else
 746                c++;
 747
 748        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 749        *ep = 0;
 750}
 751
 752static void
 753xml_cdata(void *userData, const XML_Char *s, int len)
 754{
 755        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 756        free(ctx->cdata);
 757        ctx->cdata = xmalloc(len + 1);
 758        strlcpy(ctx->cdata, s, len + 1);
 759}
 760
 761static int remote_ls(struct alt_base *repo, const char *path, int flags,
 762                     void (*userFunc)(struct remote_ls_ctx *ls),
 763                     void *userData);
 764
 765static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 766{
 767        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 768
 769        if (tag_closed) {
 770                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 771                        if (ls->dentry_flags & IS_DIR) {
 772                                if (ls->flags & PROCESS_DIRS) {
 773                                        ls->userFunc(ls);
 774                                }
 775                                if (strcmp(ls->dentry_name, ls->path) &&
 776                                    ls->flags & RECURSIVE) {
 777                                        ls->rc = remote_ls(ls->repo,
 778                                                           ls->dentry_name,
 779                                                           ls->flags,
 780                                                           ls->userFunc,
 781                                                           ls->userData);
 782                                }
 783                        } else if (ls->flags & PROCESS_FILES) {
 784                                ls->userFunc(ls);
 785                        }
 786                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 787                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 788                                                  ls->repo->path_len + 1);
 789                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 790                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 791                        ls->dentry_flags |= IS_DIR;
 792                }
 793        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 794                free(ls->dentry_name);
 795                ls->dentry_name = NULL;
 796                ls->dentry_flags = 0;
 797        }
 798}
 799
 800static int remote_ls(struct alt_base *repo, const char *path, int flags,
 801                     void (*userFunc)(struct remote_ls_ctx *ls),
 802                     void *userData)
 803{
 804        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 805        struct active_request_slot *slot;
 806        struct slot_results results;
 807        struct buffer in_buffer;
 808        struct buffer out_buffer;
 809        char *in_data;
 810        char *out_data;
 811        XML_Parser parser = XML_ParserCreate(NULL);
 812        enum XML_Status result;
 813        struct curl_slist *dav_headers = NULL;
 814        struct xml_ctx ctx;
 815        struct remote_ls_ctx ls;
 816
 817        ls.flags = flags;
 818        ls.repo = repo;
 819        ls.path = xstrdup(path);
 820        ls.dentry_name = NULL;
 821        ls.dentry_flags = 0;
 822        ls.userData = userData;
 823        ls.userFunc = userFunc;
 824        ls.rc = 0;
 825
 826        sprintf(url, "%s%s", repo->base, path);
 827
 828        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 829        out_data = xmalloc(out_buffer.size + 1);
 830        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 831        out_buffer.posn = 0;
 832        out_buffer.buffer = out_data;
 833
 834        in_buffer.size = 4096;
 835        in_data = xmalloc(in_buffer.size);
 836        in_buffer.posn = 0;
 837        in_buffer.buffer = in_data;
 838
 839        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 840        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 841
 842        slot = get_active_slot();
 843        slot->results = &results;
 844        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 845        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 846        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 847        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 848        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 849        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 850        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 851        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 852        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 853
 854        if (start_active_slot(slot)) {
 855                run_active_slot(slot);
 856                if (results.curl_result == CURLE_OK) {
 857                        ctx.name = xcalloc(10, 1);
 858                        ctx.len = 0;
 859                        ctx.cdata = NULL;
 860                        ctx.userFunc = handle_remote_ls_ctx;
 861                        ctx.userData = &ls;
 862                        XML_SetUserData(parser, &ctx);
 863                        XML_SetElementHandler(parser, xml_start_tag,
 864                                              xml_end_tag);
 865                        XML_SetCharacterDataHandler(parser, xml_cdata);
 866                        result = XML_Parse(parser, in_buffer.buffer,
 867                                           in_buffer.posn, 1);
 868                        free(ctx.name);
 869
 870                        if (result != XML_STATUS_OK) {
 871                                ls.rc = error("XML error: %s",
 872                                              XML_ErrorString(
 873                                                      XML_GetErrorCode(parser)));
 874                        }
 875                } else {
 876                        ls.rc = -1;
 877                }
 878        } else {
 879                ls.rc = error("Unable to start PROPFIND request");
 880        }
 881
 882        free(ls.path);
 883        free(url);
 884        free(out_data);
 885        free(in_buffer.buffer);
 886        curl_slist_free_all(dav_headers);
 887
 888        return ls.rc;
 889}
 890
 891static void process_ls_pack(struct remote_ls_ctx *ls)
 892{
 893        unsigned char sha1[20];
 894
 895        if (strlen(ls->dentry_name) == 63 &&
 896            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 897            has_extension(ls->dentry_name, ".pack")) {
 898                get_sha1_hex(ls->dentry_name + 18, sha1);
 899                setup_index(ls->repo, sha1);
 900        }
 901}
 902#endif
 903
 904static int fetch_indices(struct alt_base *repo)
 905{
 906        unsigned char sha1[20];
 907        char *url;
 908        struct buffer buffer;
 909        char *data;
 910        int i = 0;
 911
 912        struct active_request_slot *slot;
 913        struct slot_results results;
 914
 915        if (repo->got_indices)
 916                return 0;
 917
 918        data = xmalloc(4096);
 919        buffer.size = 4096;
 920        buffer.posn = 0;
 921        buffer.buffer = data;
 922
 923        if (get_verbosely)
 924                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 925
 926#ifndef NO_EXPAT
 927        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 928                      process_ls_pack, NULL) == 0)
 929                return 0;
 930#endif
 931
 932        url = xmalloc(strlen(repo->base) + 21);
 933        sprintf(url, "%s/objects/info/packs", repo->base);
 934
 935        slot = get_active_slot();
 936        slot->results = &results;
 937        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 938        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 939        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 940        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 941        if (start_active_slot(slot)) {
 942                run_active_slot(slot);
 943                if (results.curl_result != CURLE_OK) {
 944                        if (results.http_code == 404 ||
 945                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 946                                repo->got_indices = 1;
 947                                free(buffer.buffer);
 948                                return 0;
 949                        } else {
 950                                repo->got_indices = 0;
 951                                free(buffer.buffer);
 952                                return error("%s", curl_errorstr);
 953                        }
 954                }
 955        } else {
 956                repo->got_indices = 0;
 957                free(buffer.buffer);
 958                return error("Unable to start request");
 959        }
 960
 961        data = buffer.buffer;
 962        while (i < buffer.posn) {
 963                switch (data[i]) {
 964                case 'P':
 965                        i++;
 966                        if (i + 52 <= buffer.posn &&
 967                            !strncmp(data + i, " pack-", 6) &&
 968                            !strncmp(data + i + 46, ".pack\n", 6)) {
 969                                get_sha1_hex(data + i + 6, sha1);
 970                                setup_index(repo, sha1);
 971                                i += 51;
 972                                break;
 973                        }
 974                default:
 975                        while (i < buffer.posn && data[i] != '\n')
 976                                i++;
 977                }
 978                i++;
 979        }
 980
 981        free(buffer.buffer);
 982        repo->got_indices = 1;
 983        return 0;
 984}
 985
 986static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 987{
 988        char *url;
 989        struct packed_git *target;
 990        struct packed_git **lst;
 991        FILE *packfile;
 992        char *filename;
 993        char tmpfile[PATH_MAX];
 994        int ret;
 995        long prev_posn = 0;
 996        char range[RANGE_HEADER_SIZE];
 997        struct curl_slist *range_header = NULL;
 998
 999        struct active_request_slot *slot;
1000        struct slot_results results;
1001
1002        if (fetch_indices(repo))
1003                return -1;
1004        target = find_sha1_pack(sha1, repo->packs);
1005        if (!target)
1006                return -1;
1007
1008        if (get_verbosely) {
1009                fprintf(stderr, "Getting pack %s\n",
1010                        sha1_to_hex(target->sha1));
1011                fprintf(stderr, " which contains %s\n",
1012                        sha1_to_hex(sha1));
1013        }
1014
1015        url = xmalloc(strlen(repo->base) + 65);
1016        sprintf(url, "%s/objects/pack/pack-%s.pack",
1017                repo->base, sha1_to_hex(target->sha1));
1018
1019        filename = sha1_pack_name(target->sha1);
1020        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1021        packfile = fopen(tmpfile, "a");
1022        if (!packfile)
1023                return error("Unable to open local file %s for pack",
1024                             filename);
1025
1026        slot = get_active_slot();
1027        slot->results = &results;
1028        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1029        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1030        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1031        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1032        slot->local = packfile;
1033
1034        /* If there is data present from a previous transfer attempt,
1035           resume where it left off */
1036        prev_posn = ftell(packfile);
1037        if (prev_posn>0) {
1038                if (get_verbosely)
1039                        fprintf(stderr,
1040                                "Resuming fetch of pack %s at byte %ld\n",
1041                                sha1_to_hex(target->sha1), prev_posn);
1042                sprintf(range, "Range: bytes=%ld-", prev_posn);
1043                range_header = curl_slist_append(range_header, range);
1044                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1045        }
1046
1047        if (start_active_slot(slot)) {
1048                run_active_slot(slot);
1049                if (results.curl_result != CURLE_OK) {
1050                        fclose(packfile);
1051                        return error("Unable to get pack file %s\n%s", url,
1052                                     curl_errorstr);
1053                }
1054        } else {
1055                fclose(packfile);
1056                return error("Unable to start request");
1057        }
1058
1059        fclose(packfile);
1060
1061        ret = move_temp_to_file(tmpfile, filename);
1062        if (ret)
1063                return ret;
1064
1065        lst = &repo->packs;
1066        while (*lst != target)
1067                lst = &((*lst)->next);
1068        *lst = (*lst)->next;
1069
1070        if (verify_pack(target, 0))
1071                return -1;
1072        install_packed_git(target);
1073
1074        return 0;
1075}
1076
1077static void abort_object_request(struct object_request *obj_req)
1078{
1079        if (obj_req->local >= 0) {
1080                close(obj_req->local);
1081                obj_req->local = -1;
1082        }
1083        unlink(obj_req->tmpfile);
1084        if (obj_req->slot) {
1085                release_active_slot(obj_req->slot);
1086                obj_req->slot = NULL;
1087        }
1088        release_object_request(obj_req);
1089}
1090
1091static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1092{
1093        char *hex = sha1_to_hex(sha1);
1094        int ret = 0;
1095        struct object_request *obj_req = object_queue_head;
1096
1097        while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1098                obj_req = obj_req->next;
1099        if (obj_req == NULL)
1100                return error("Couldn't find request for %s in the queue", hex);
1101
1102        if (has_sha1_file(obj_req->sha1)) {
1103                abort_object_request(obj_req);
1104                return 0;
1105        }
1106
1107#ifdef USE_CURL_MULTI
1108        while (obj_req->state == WAITING) {
1109                step_active_slots();
1110        }
1111#else
1112        start_object_request(obj_req);
1113#endif
1114
1115        while (obj_req->state == ACTIVE) {
1116                run_active_slot(obj_req->slot);
1117        }
1118        if (obj_req->local != -1) {
1119                close(obj_req->local); obj_req->local = -1;
1120        }
1121
1122        if (obj_req->state == ABORTED) {
1123                ret = error("Request for %s aborted", hex);
1124        } else if (obj_req->curl_result != CURLE_OK &&
1125                   obj_req->http_code != 416) {
1126                if (obj_req->http_code == 404 ||
1127                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1128                        ret = -1; /* Be silent, it is probably in a pack. */
1129                else
1130                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1131                                    obj_req->errorstr, obj_req->curl_result,
1132                                    obj_req->http_code, hex);
1133        } else if (obj_req->zret != Z_STREAM_END) {
1134                corrupt_object_found++;
1135                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1136        } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1137                ret = error("File %s has bad hash", hex);
1138        } else if (obj_req->rename < 0) {
1139                ret = error("unable to write sha1 filename %s",
1140                            obj_req->filename);
1141        }
1142
1143        release_object_request(obj_req);
1144        return ret;
1145}
1146
1147int fetch(unsigned char *sha1)
1148{
1149        struct alt_base *altbase = alt;
1150
1151        if (!fetch_object(altbase, sha1))
1152                return 0;
1153        while (altbase) {
1154                if (!fetch_pack(altbase, sha1))
1155                        return 0;
1156                fetch_alternates(alt->base);
1157                altbase = altbase->next;
1158        }
1159        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1160                     alt->base);
1161}
1162
1163static inline int needs_quote(int ch)
1164{
1165        if (((ch >= 'A') && (ch <= 'Z'))
1166                        || ((ch >= 'a') && (ch <= 'z'))
1167                        || ((ch >= '0') && (ch <= '9'))
1168                        || (ch == '/')
1169                        || (ch == '-')
1170                        || (ch == '.'))
1171                return 0;
1172        return 1;
1173}
1174
1175static inline int hex(int v)
1176{
1177        if (v < 10) return '0' + v;
1178        else return 'A' + v - 10;
1179}
1180
1181static char *quote_ref_url(const char *base, const char *ref)
1182{
1183        const char *cp;
1184        char *dp, *qref;
1185        int len, baselen, ch;
1186
1187        baselen = strlen(base);
1188        len = baselen + 6; /* "refs/" + NUL */
1189        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1190                if (needs_quote(ch))
1191                        len += 2; /* extra two hex plus replacement % */
1192        qref = xmalloc(len);
1193        memcpy(qref, base, baselen);
1194        memcpy(qref + baselen, "refs/", 5);
1195        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1196                if (needs_quote(ch)) {
1197                        *dp++ = '%';
1198                        *dp++ = hex((ch >> 4) & 0xF);
1199                        *dp++ = hex(ch & 0xF);
1200                }
1201                else
1202                        *dp++ = ch;
1203        }
1204        *dp = 0;
1205
1206        return qref;
1207}
1208
1209int fetch_ref(char *ref, unsigned char *sha1)
1210{
1211        char *url;
1212        char hex[42];
1213        struct buffer buffer;
1214        const char *base = alt->base;
1215        struct active_request_slot *slot;
1216        struct slot_results results;
1217        buffer.size = 41;
1218        buffer.posn = 0;
1219        buffer.buffer = hex;
1220        hex[41] = '\0';
1221
1222        url = quote_ref_url(base, ref);
1223        slot = get_active_slot();
1224        slot->results = &results;
1225        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1226        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1227        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1228        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1229        if (start_active_slot(slot)) {
1230                run_active_slot(slot);
1231                if (results.curl_result != CURLE_OK)
1232                        return error("Couldn't get %s for %s\n%s",
1233                                     url, ref, curl_errorstr);
1234        } else {
1235                return error("Unable to start request");
1236        }
1237
1238        hex[40] = '\0';
1239        get_sha1_hex(hex, sha1);
1240        return 0;
1241}
1242
1243int main(int argc, const char **argv)
1244{
1245        int commits;
1246        const char **write_ref = NULL;
1247        char **commit_id;
1248        const char *url;
1249        char *path;
1250        int arg = 1;
1251        int rc = 0;
1252
1253        setup_ident();
1254        setup_git_directory();
1255        git_config(git_default_config);
1256
1257        while (arg < argc && argv[arg][0] == '-') {
1258                if (argv[arg][1] == 't') {
1259                        get_tree = 1;
1260                } else if (argv[arg][1] == 'c') {
1261                        get_history = 1;
1262                } else if (argv[arg][1] == 'a') {
1263                        get_all = 1;
1264                        get_tree = 1;
1265                        get_history = 1;
1266                } else if (argv[arg][1] == 'v') {
1267                        get_verbosely = 1;
1268                } else if (argv[arg][1] == 'w') {
1269                        write_ref = &argv[arg + 1];
1270                        arg++;
1271                } else if (!strcmp(argv[arg], "--recover")) {
1272                        get_recover = 1;
1273                } else if (!strcmp(argv[arg], "--stdin")) {
1274                        commits_on_stdin = 1;
1275                }
1276                arg++;
1277        }
1278        if (argc < arg + 2 - commits_on_stdin) {
1279                usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1280                return 1;
1281        }
1282        if (commits_on_stdin) {
1283                commits = pull_targets_stdin(&commit_id, &write_ref);
1284        } else {
1285                commit_id = (char **) &argv[arg++];
1286                commits = 1;
1287        }
1288        url = argv[arg];
1289
1290        http_init();
1291
1292        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1293
1294        alt = xmalloc(sizeof(*alt));
1295        alt->base = url;
1296        alt->got_indices = 0;
1297        alt->packs = NULL;
1298        alt->next = NULL;
1299        path = strstr(url, "//");
1300        if (path) {
1301                path = strchr(path+2, '/');
1302                if (path)
1303                        alt->path_len = strlen(path);
1304        }
1305
1306        if (pull(commits, commit_id, write_ref, url))
1307                rc = 1;
1308
1309        http_cleanup();
1310
1311        curl_slist_free_all(no_pragma_header);
1312
1313        if (commits_on_stdin)
1314                pull_targets_free(commits, commit_id, write_ref);
1315
1316        if (corrupt_object_found) {
1317                fprintf(stderr,
1318"Some loose object were found to be corrupt, but they might be just\n"
1319"a false '404 Not Found' error message sent with incorrect HTTP\n"
1320"status code.  Suggest running git fsck-objects.\n");
1321        }
1322        return rc;
1323}