http-fetch.con commit gitweb: Add diff tree, with links to patches, to commitdiff view (b4657e7)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int commits_on_stdin;
  40
  41static int got_alternates = -1;
  42static int corrupt_object_found;
  43
  44static struct curl_slist *no_pragma_header;
  45
  46struct alt_base
  47{
  48        const char *base;
  49        int path_len;
  50        int got_indices;
  51        struct packed_git *packs;
  52        struct alt_base *next;
  53};
  54
  55static struct alt_base *alt;
  56
  57enum object_request_state {
  58        WAITING,
  59        ABORTED,
  60        ACTIVE,
  61        COMPLETE,
  62};
  63
  64struct object_request
  65{
  66        unsigned char sha1[20];
  67        struct alt_base *repo;
  68        char *url;
  69        char filename[PATH_MAX];
  70        char tmpfile[PATH_MAX];
  71        int local;
  72        enum object_request_state state;
  73        CURLcode curl_result;
  74        char errorstr[CURL_ERROR_SIZE];
  75        long http_code;
  76        unsigned char real_sha1[20];
  77        SHA_CTX c;
  78        z_stream stream;
  79        int zret;
  80        int rename;
  81        struct active_request_slot *slot;
  82        struct object_request *next;
  83};
  84
  85struct alternates_request {
  86        const char *base;
  87        char *url;
  88        struct buffer *buffer;
  89        struct active_request_slot *slot;
  90        int http_specific;
  91};
  92
  93#ifndef NO_EXPAT
  94struct xml_ctx
  95{
  96        char *name;
  97        int len;
  98        char *cdata;
  99        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
 100        void *userData;
 101};
 102
 103struct remote_ls_ctx
 104{
 105        struct alt_base *repo;
 106        char *path;
 107        void (*userFunc)(struct remote_ls_ctx *ls);
 108        void *userData;
 109        int flags;
 110        char *dentry_name;
 111        int dentry_flags;
 112        int rc;
 113        struct remote_ls_ctx *parent;
 114};
 115#endif
 116
 117static struct object_request *object_queue_head;
 118
 119static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 120                               void *data)
 121{
 122        unsigned char expn[4096];
 123        size_t size = eltsize * nmemb;
 124        int posn = 0;
 125        struct object_request *obj_req = (struct object_request *)data;
 126        do {
 127                ssize_t retval = write(obj_req->local,
 128                                       (char *) ptr + posn, size - posn);
 129                if (retval < 0)
 130                        return posn;
 131                posn += retval;
 132        } while (posn < size);
 133
 134        obj_req->stream.avail_in = size;
 135        obj_req->stream.next_in = ptr;
 136        do {
 137                obj_req->stream.next_out = expn;
 138                obj_req->stream.avail_out = sizeof(expn);
 139                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 140                SHA1_Update(&obj_req->c, expn,
 141                            sizeof(expn) - obj_req->stream.avail_out);
 142        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 143        data_received++;
 144        return size;
 145}
 146
 147static void fetch_alternates(const char *base);
 148
 149static void process_object_response(void *callback_data);
 150
 151static void start_object_request(struct object_request *obj_req)
 152{
 153        char *hex = sha1_to_hex(obj_req->sha1);
 154        char prevfile[PATH_MAX];
 155        char *url;
 156        char *posn;
 157        int prevlocal;
 158        unsigned char prev_buf[PREV_BUF_SIZE];
 159        ssize_t prev_read = 0;
 160        long prev_posn = 0;
 161        char range[RANGE_HEADER_SIZE];
 162        struct curl_slist *range_header = NULL;
 163        struct active_request_slot *slot;
 164
 165        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 166        unlink(prevfile);
 167        rename(obj_req->tmpfile, prevfile);
 168        unlink(obj_req->tmpfile);
 169
 170        if (obj_req->local != -1)
 171                error("fd leakage in start: %d", obj_req->local);
 172        obj_req->local = open(obj_req->tmpfile,
 173                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 174        /* This could have failed due to the "lazy directory creation";
 175         * try to mkdir the last path component.
 176         */
 177        if (obj_req->local < 0 && errno == ENOENT) {
 178                char *dir = strrchr(obj_req->tmpfile, '/');
 179                if (dir) {
 180                        *dir = 0;
 181                        mkdir(obj_req->tmpfile, 0777);
 182                        *dir = '/';
 183                }
 184                obj_req->local = open(obj_req->tmpfile,
 185                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 186        }
 187
 188        if (obj_req->local < 0) {
 189                obj_req->state = ABORTED;
 190                error("Couldn't create temporary file %s for %s: %s",
 191                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 192                return;
 193        }
 194
 195        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 196
 197        inflateInit(&obj_req->stream);
 198
 199        SHA1_Init(&obj_req->c);
 200
 201        url = xmalloc(strlen(obj_req->repo->base) + 50);
 202        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 203        strcpy(url, obj_req->repo->base);
 204        posn = url + strlen(obj_req->repo->base);
 205        strcpy(posn, "objects/");
 206        posn += 8;
 207        memcpy(posn, hex, 2);
 208        posn += 2;
 209        *(posn++) = '/';
 210        strcpy(posn, hex + 2);
 211        strcpy(obj_req->url, url);
 212
 213        /* If a previous temp file is present, process what was already
 214           fetched. */
 215        prevlocal = open(prevfile, O_RDONLY);
 216        if (prevlocal != -1) {
 217                do {
 218                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 219                        if (prev_read>0) {
 220                                if (fwrite_sha1_file(prev_buf,
 221                                                     1,
 222                                                     prev_read,
 223                                                     obj_req) == prev_read) {
 224                                        prev_posn += prev_read;
 225                                } else {
 226                                        prev_read = -1;
 227                                }
 228                        }
 229                } while (prev_read > 0);
 230                close(prevlocal);
 231        }
 232        unlink(prevfile);
 233
 234        /* Reset inflate/SHA1 if there was an error reading the previous temp
 235           file; also rewind to the beginning of the local file. */
 236        if (prev_read == -1) {
 237                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 238                inflateInit(&obj_req->stream);
 239                SHA1_Init(&obj_req->c);
 240                if (prev_posn>0) {
 241                        prev_posn = 0;
 242                        lseek(obj_req->local, SEEK_SET, 0);
 243                        ftruncate(obj_req->local, 0);
 244                }
 245        }
 246
 247        slot = get_active_slot();
 248        slot->callback_func = process_object_response;
 249        slot->callback_data = obj_req;
 250        obj_req->slot = slot;
 251
 252        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 253        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 254        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 255        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 256        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 257
 258        /* If we have successfully processed data from a previous fetch
 259           attempt, only fetch the data we don't already have. */
 260        if (prev_posn>0) {
 261                if (get_verbosely)
 262                        fprintf(stderr,
 263                                "Resuming fetch of object %s at byte %ld\n",
 264                                hex, prev_posn);
 265                sprintf(range, "Range: bytes=%ld-", prev_posn);
 266                range_header = curl_slist_append(range_header, range);
 267                curl_easy_setopt(slot->curl,
 268                                 CURLOPT_HTTPHEADER, range_header);
 269        }
 270
 271        /* Try to get the request started, abort the request on error */
 272        obj_req->state = ACTIVE;
 273        if (!start_active_slot(slot)) {
 274                obj_req->state = ABORTED;
 275                obj_req->slot = NULL;
 276                close(obj_req->local); obj_req->local = -1;
 277                free(obj_req->url);
 278                return;
 279        }
 280}
 281
 282static void finish_object_request(struct object_request *obj_req)
 283{
 284        struct stat st;
 285
 286        fchmod(obj_req->local, 0444);
 287        close(obj_req->local); obj_req->local = -1;
 288
 289        if (obj_req->http_code == 416) {
 290                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 291        } else if (obj_req->curl_result != CURLE_OK) {
 292                if (stat(obj_req->tmpfile, &st) == 0)
 293                        if (st.st_size == 0)
 294                                unlink(obj_req->tmpfile);
 295                return;
 296        }
 297
 298        inflateEnd(&obj_req->stream);
 299        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 300        if (obj_req->zret != Z_STREAM_END) {
 301                unlink(obj_req->tmpfile);
 302                return;
 303        }
 304        if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
 305                unlink(obj_req->tmpfile);
 306                return;
 307        }
 308        obj_req->rename =
 309                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 310
 311        if (obj_req->rename == 0)
 312                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 313}
 314
 315static void process_object_response(void *callback_data)
 316{
 317        struct object_request *obj_req =
 318                (struct object_request *)callback_data;
 319
 320        obj_req->curl_result = obj_req->slot->curl_result;
 321        obj_req->http_code = obj_req->slot->http_code;
 322        obj_req->slot = NULL;
 323        obj_req->state = COMPLETE;
 324
 325        /* Use alternates if necessary */
 326        if (obj_req->http_code == 404 ||
 327            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 328                fetch_alternates(alt->base);
 329                if (obj_req->repo->next != NULL) {
 330                        obj_req->repo =
 331                                obj_req->repo->next;
 332                        close(obj_req->local);
 333                        obj_req->local = -1;
 334                        start_object_request(obj_req);
 335                        return;
 336                }
 337        }
 338
 339        finish_object_request(obj_req);
 340}
 341
 342static void release_object_request(struct object_request *obj_req)
 343{
 344        struct object_request *entry = object_queue_head;
 345
 346        if (obj_req->local != -1)
 347                error("fd leakage in release: %d", obj_req->local);
 348        if (obj_req == object_queue_head) {
 349                object_queue_head = obj_req->next;
 350        } else {
 351                while (entry->next != NULL && entry->next != obj_req)
 352                        entry = entry->next;
 353                if (entry->next == obj_req)
 354                        entry->next = entry->next->next;
 355        }
 356
 357        free(obj_req->url);
 358        free(obj_req);
 359}
 360
 361#ifdef USE_CURL_MULTI
 362void fill_active_slots(void)
 363{
 364        struct object_request *obj_req = object_queue_head;
 365        struct active_request_slot *slot = active_queue_head;
 366        int num_transfers;
 367
 368        while (active_requests < max_requests && obj_req != NULL) {
 369                if (obj_req->state == WAITING) {
 370                        if (has_sha1_file(obj_req->sha1))
 371                                obj_req->state = COMPLETE;
 372                        else
 373                                start_object_request(obj_req);
 374                        curl_multi_perform(curlm, &num_transfers);
 375                }
 376                obj_req = obj_req->next;
 377        }
 378
 379        while (slot != NULL) {
 380                if (!slot->in_use && slot->curl != NULL) {
 381                        curl_easy_cleanup(slot->curl);
 382                        slot->curl = NULL;
 383                }
 384                slot = slot->next;
 385        }
 386}
 387#endif
 388
 389void prefetch(unsigned char *sha1)
 390{
 391        struct object_request *newreq;
 392        struct object_request *tail;
 393        char *filename = sha1_file_name(sha1);
 394
 395        newreq = xmalloc(sizeof(*newreq));
 396        hashcpy(newreq->sha1, sha1);
 397        newreq->repo = alt;
 398        newreq->url = NULL;
 399        newreq->local = -1;
 400        newreq->state = WAITING;
 401        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 402        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 403                 "%s.temp", filename);
 404        newreq->slot = NULL;
 405        newreq->next = NULL;
 406
 407        if (object_queue_head == NULL) {
 408                object_queue_head = newreq;
 409        } else {
 410                tail = object_queue_head;
 411                while (tail->next != NULL) {
 412                        tail = tail->next;
 413                }
 414                tail->next = newreq;
 415        }
 416
 417#ifdef USE_CURL_MULTI
 418        fill_active_slots();
 419        step_active_slots();
 420#endif
 421}
 422
 423static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 424{
 425        char *hex = sha1_to_hex(sha1);
 426        char *filename;
 427        char *url;
 428        char tmpfile[PATH_MAX];
 429        long prev_posn = 0;
 430        char range[RANGE_HEADER_SIZE];
 431        struct curl_slist *range_header = NULL;
 432
 433        FILE *indexfile;
 434        struct active_request_slot *slot;
 435        struct slot_results results;
 436
 437        if (has_pack_index(sha1))
 438                return 0;
 439
 440        if (get_verbosely)
 441                fprintf(stderr, "Getting index for pack %s\n", hex);
 442
 443        url = xmalloc(strlen(repo->base) + 64);
 444        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 445
 446        filename = sha1_pack_index_name(sha1);
 447        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 448        indexfile = fopen(tmpfile, "a");
 449        if (!indexfile)
 450                return error("Unable to open local file %s for pack index",
 451                             filename);
 452
 453        slot = get_active_slot();
 454        slot->results = &results;
 455        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 456        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 457        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 458        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 459        slot->local = indexfile;
 460
 461        /* If there is data present from a previous transfer attempt,
 462           resume where it left off */
 463        prev_posn = ftell(indexfile);
 464        if (prev_posn>0) {
 465                if (get_verbosely)
 466                        fprintf(stderr,
 467                                "Resuming fetch of index for pack %s at byte %ld\n",
 468                                hex, prev_posn);
 469                sprintf(range, "Range: bytes=%ld-", prev_posn);
 470                range_header = curl_slist_append(range_header, range);
 471                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 472        }
 473
 474        if (start_active_slot(slot)) {
 475                run_active_slot(slot);
 476                if (results.curl_result != CURLE_OK) {
 477                        fclose(indexfile);
 478                        return error("Unable to get pack index %s\n%s", url,
 479                                     curl_errorstr);
 480                }
 481        } else {
 482                fclose(indexfile);
 483                return error("Unable to start request");
 484        }
 485
 486        fclose(indexfile);
 487
 488        return move_temp_to_file(tmpfile, filename);
 489}
 490
 491static int setup_index(struct alt_base *repo, unsigned char *sha1)
 492{
 493        struct packed_git *new_pack;
 494        if (has_pack_file(sha1))
 495                return 0; /* don't list this as something we can get */
 496
 497        if (fetch_index(repo, sha1))
 498                return -1;
 499
 500        new_pack = parse_pack_index(sha1);
 501        new_pack->next = repo->packs;
 502        repo->packs = new_pack;
 503        return 0;
 504}
 505
 506static void process_alternates_response(void *callback_data)
 507{
 508        struct alternates_request *alt_req =
 509                (struct alternates_request *)callback_data;
 510        struct active_request_slot *slot = alt_req->slot;
 511        struct alt_base *tail = alt;
 512        const char *base = alt_req->base;
 513        static const char null_byte = '\0';
 514        char *data;
 515        int i = 0;
 516
 517        if (alt_req->http_specific) {
 518                if (slot->curl_result != CURLE_OK ||
 519                    !alt_req->buffer->posn) {
 520
 521                        /* Try reusing the slot to get non-http alternates */
 522                        alt_req->http_specific = 0;
 523                        sprintf(alt_req->url, "%s/objects/info/alternates",
 524                                base);
 525                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 526                                         alt_req->url);
 527                        active_requests++;
 528                        slot->in_use = 1;
 529                        if (slot->finished != NULL)
 530                                (*slot->finished) = 0;
 531                        if (!start_active_slot(slot)) {
 532                                got_alternates = -1;
 533                                slot->in_use = 0;
 534                                if (slot->finished != NULL)
 535                                        (*slot->finished) = 1;
 536                        }
 537                        return;
 538                }
 539        } else if (slot->curl_result != CURLE_OK) {
 540                if (slot->http_code != 404 &&
 541                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 542                        got_alternates = -1;
 543                        return;
 544                }
 545        }
 546
 547        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 548        alt_req->buffer->posn--;
 549        data = alt_req->buffer->buffer;
 550
 551        while (i < alt_req->buffer->posn) {
 552                int posn = i;
 553                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 554                        posn++;
 555                if (data[posn] == '\n') {
 556                        int okay = 0;
 557                        int serverlen = 0;
 558                        struct alt_base *newalt;
 559                        char *target = NULL;
 560                        char *path;
 561                        if (data[i] == '/') {
 562                                serverlen = strchr(base + 8, '/') - base;
 563                                okay = 1;
 564                        } else if (!memcmp(data + i, "../", 3)) {
 565                                i += 3;
 566                                serverlen = strlen(base);
 567                                while (i + 2 < posn &&
 568                                       !memcmp(data + i, "../", 3)) {
 569                                        do {
 570                                                serverlen--;
 571                                        } while (serverlen &&
 572                                                 base[serverlen - 1] != '/');
 573                                        i += 3;
 574                                }
 575                                /* If the server got removed, give up. */
 576                                okay = strchr(base, ':') - base + 3 <
 577                                        serverlen;
 578                        } else if (alt_req->http_specific) {
 579                                char *colon = strchr(data + i, ':');
 580                                char *slash = strchr(data + i, '/');
 581                                if (colon && slash && colon < data + posn &&
 582                                    slash < data + posn && colon < slash) {
 583                                        okay = 1;
 584                                }
 585                        }
 586                        /* skip 'objects' at end */
 587                        if (okay) {
 588                                target = xmalloc(serverlen + posn - i - 6);
 589                                strlcpy(target, base, serverlen);
 590                                strlcpy(target + serverlen, data + i, posn - i - 6);
 591                                if (get_verbosely)
 592                                        fprintf(stderr,
 593                                                "Also look at %s\n", target);
 594                                newalt = xmalloc(sizeof(*newalt));
 595                                newalt->next = NULL;
 596                                newalt->base = target;
 597                                newalt->got_indices = 0;
 598                                newalt->packs = NULL;
 599                                path = strstr(target, "//");
 600                                if (path) {
 601                                        path = strchr(path+2, '/');
 602                                        if (path)
 603                                                newalt->path_len = strlen(path);
 604                                }
 605
 606                                while (tail->next != NULL)
 607                                        tail = tail->next;
 608                                tail->next = newalt;
 609                        }
 610                }
 611                i = posn + 1;
 612        }
 613
 614        got_alternates = 1;
 615}
 616
 617static void fetch_alternates(const char *base)
 618{
 619        struct buffer buffer;
 620        char *url;
 621        char *data;
 622        struct active_request_slot *slot;
 623        struct alternates_request alt_req;
 624
 625        /* If another request has already started fetching alternates,
 626           wait for them to arrive and return to processing this request's
 627           curl message */
 628#ifdef USE_CURL_MULTI
 629        while (got_alternates == 0) {
 630                step_active_slots();
 631        }
 632#endif
 633
 634        /* Nothing to do if they've already been fetched */
 635        if (got_alternates == 1)
 636                return;
 637
 638        /* Start the fetch */
 639        got_alternates = 0;
 640
 641        data = xmalloc(4096);
 642        buffer.size = 4096;
 643        buffer.posn = 0;
 644        buffer.buffer = data;
 645
 646        if (get_verbosely)
 647                fprintf(stderr, "Getting alternates list for %s\n", base);
 648
 649        url = xmalloc(strlen(base) + 31);
 650        sprintf(url, "%s/objects/info/http-alternates", base);
 651
 652        /* Use a callback to process the result, since another request
 653           may fail and need to have alternates loaded before continuing */
 654        slot = get_active_slot();
 655        slot->callback_func = process_alternates_response;
 656        slot->callback_data = &alt_req;
 657
 658        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 659        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 660        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 661
 662        alt_req.base = base;
 663        alt_req.url = url;
 664        alt_req.buffer = &buffer;
 665        alt_req.http_specific = 1;
 666        alt_req.slot = slot;
 667
 668        if (start_active_slot(slot))
 669                run_active_slot(slot);
 670        else
 671                got_alternates = -1;
 672
 673        free(data);
 674        free(url);
 675}
 676
 677#ifndef NO_EXPAT
 678static void
 679xml_start_tag(void *userData, const char *name, const char **atts)
 680{
 681        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 682        const char *c = strchr(name, ':');
 683        int new_len;
 684
 685        if (c == NULL)
 686                c = name;
 687        else
 688                c++;
 689
 690        new_len = strlen(ctx->name) + strlen(c) + 2;
 691
 692        if (new_len > ctx->len) {
 693                ctx->name = xrealloc(ctx->name, new_len);
 694                ctx->len = new_len;
 695        }
 696        strcat(ctx->name, ".");
 697        strcat(ctx->name, c);
 698
 699        free(ctx->cdata);
 700        ctx->cdata = NULL;
 701
 702        ctx->userFunc(ctx, 0);
 703}
 704
 705static void
 706xml_end_tag(void *userData, const char *name)
 707{
 708        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 709        const char *c = strchr(name, ':');
 710        char *ep;
 711
 712        ctx->userFunc(ctx, 1);
 713
 714        if (c == NULL)
 715                c = name;
 716        else
 717                c++;
 718
 719        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 720        *ep = 0;
 721}
 722
 723static void
 724xml_cdata(void *userData, const XML_Char *s, int len)
 725{
 726        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 727        free(ctx->cdata);
 728        ctx->cdata = xmalloc(len + 1);
 729        strlcpy(ctx->cdata, s, len + 1);
 730}
 731
 732static int remote_ls(struct alt_base *repo, const char *path, int flags,
 733                     void (*userFunc)(struct remote_ls_ctx *ls),
 734                     void *userData);
 735
 736static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 737{
 738        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 739
 740        if (tag_closed) {
 741                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 742                        if (ls->dentry_flags & IS_DIR) {
 743                                if (ls->flags & PROCESS_DIRS) {
 744                                        ls->userFunc(ls);
 745                                }
 746                                if (strcmp(ls->dentry_name, ls->path) &&
 747                                    ls->flags & RECURSIVE) {
 748                                        ls->rc = remote_ls(ls->repo,
 749                                                           ls->dentry_name,
 750                                                           ls->flags,
 751                                                           ls->userFunc,
 752                                                           ls->userData);
 753                                }
 754                        } else if (ls->flags & PROCESS_FILES) {
 755                                ls->userFunc(ls);
 756                        }
 757                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 758                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 759                                                  ls->repo->path_len + 1);
 760                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 761                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 762                        ls->dentry_flags |= IS_DIR;
 763                }
 764        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 765                free(ls->dentry_name);
 766                ls->dentry_name = NULL;
 767                ls->dentry_flags = 0;
 768        }
 769}
 770
 771static int remote_ls(struct alt_base *repo, const char *path, int flags,
 772                     void (*userFunc)(struct remote_ls_ctx *ls),
 773                     void *userData)
 774{
 775        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 776        struct active_request_slot *slot;
 777        struct slot_results results;
 778        struct buffer in_buffer;
 779        struct buffer out_buffer;
 780        char *in_data;
 781        char *out_data;
 782        XML_Parser parser = XML_ParserCreate(NULL);
 783        enum XML_Status result;
 784        struct curl_slist *dav_headers = NULL;
 785        struct xml_ctx ctx;
 786        struct remote_ls_ctx ls;
 787
 788        ls.flags = flags;
 789        ls.repo = repo;
 790        ls.path = strdup(path);
 791        ls.dentry_name = NULL;
 792        ls.dentry_flags = 0;
 793        ls.userData = userData;
 794        ls.userFunc = userFunc;
 795        ls.rc = 0;
 796
 797        sprintf(url, "%s%s", repo->base, path);
 798
 799        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 800        out_data = xmalloc(out_buffer.size + 1);
 801        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 802        out_buffer.posn = 0;
 803        out_buffer.buffer = out_data;
 804
 805        in_buffer.size = 4096;
 806        in_data = xmalloc(in_buffer.size);
 807        in_buffer.posn = 0;
 808        in_buffer.buffer = in_data;
 809
 810        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 811        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 812
 813        slot = get_active_slot();
 814        slot->results = &results;
 815        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 816        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 817        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 818        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 819        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 820        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 821        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 822        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 823        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 824
 825        if (start_active_slot(slot)) {
 826                run_active_slot(slot);
 827                if (results.curl_result == CURLE_OK) {
 828                        ctx.name = xcalloc(10, 1);
 829                        ctx.len = 0;
 830                        ctx.cdata = NULL;
 831                        ctx.userFunc = handle_remote_ls_ctx;
 832                        ctx.userData = &ls;
 833                        XML_SetUserData(parser, &ctx);
 834                        XML_SetElementHandler(parser, xml_start_tag,
 835                                              xml_end_tag);
 836                        XML_SetCharacterDataHandler(parser, xml_cdata);
 837                        result = XML_Parse(parser, in_buffer.buffer,
 838                                           in_buffer.posn, 1);
 839                        free(ctx.name);
 840
 841                        if (result != XML_STATUS_OK) {
 842                                ls.rc = error("XML error: %s",
 843                                              XML_ErrorString(
 844                                                      XML_GetErrorCode(parser)));
 845                        }
 846                } else {
 847                        ls.rc = -1;
 848                }
 849        } else {
 850                ls.rc = error("Unable to start PROPFIND request");
 851        }
 852
 853        free(ls.path);
 854        free(url);
 855        free(out_data);
 856        free(in_buffer.buffer);
 857        curl_slist_free_all(dav_headers);
 858
 859        return ls.rc;
 860}
 861
 862static void process_ls_pack(struct remote_ls_ctx *ls)
 863{
 864        unsigned char sha1[20];
 865
 866        if (strlen(ls->dentry_name) == 63 &&
 867            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 868            has_extension(ls->dentry_name, ".pack")) {
 869                get_sha1_hex(ls->dentry_name + 18, sha1);
 870                setup_index(ls->repo, sha1);
 871        }
 872}
 873#endif
 874
 875static int fetch_indices(struct alt_base *repo)
 876{
 877        unsigned char sha1[20];
 878        char *url;
 879        struct buffer buffer;
 880        char *data;
 881        int i = 0;
 882
 883        struct active_request_slot *slot;
 884        struct slot_results results;
 885
 886        if (repo->got_indices)
 887                return 0;
 888
 889        data = xmalloc(4096);
 890        buffer.size = 4096;
 891        buffer.posn = 0;
 892        buffer.buffer = data;
 893
 894        if (get_verbosely)
 895                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 896
 897#ifndef NO_EXPAT
 898        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 899                      process_ls_pack, NULL) == 0)
 900                return 0;
 901#endif
 902
 903        url = xmalloc(strlen(repo->base) + 21);
 904        sprintf(url, "%s/objects/info/packs", repo->base);
 905
 906        slot = get_active_slot();
 907        slot->results = &results;
 908        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 909        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 910        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 911        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 912        if (start_active_slot(slot)) {
 913                run_active_slot(slot);
 914                if (results.curl_result != CURLE_OK) {
 915                        if (results.http_code == 404 ||
 916                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 917                                repo->got_indices = 1;
 918                                free(buffer.buffer);
 919                                return 0;
 920                        } else {
 921                                repo->got_indices = 0;
 922                                free(buffer.buffer);
 923                                return error("%s", curl_errorstr);
 924                        }
 925                }
 926        } else {
 927                repo->got_indices = 0;
 928                free(buffer.buffer);
 929                return error("Unable to start request");
 930        }
 931
 932        data = buffer.buffer;
 933        while (i < buffer.posn) {
 934                switch (data[i]) {
 935                case 'P':
 936                        i++;
 937                        if (i + 52 <= buffer.posn &&
 938                            !strncmp(data + i, " pack-", 6) &&
 939                            !strncmp(data + i + 46, ".pack\n", 6)) {
 940                                get_sha1_hex(data + i + 6, sha1);
 941                                setup_index(repo, sha1);
 942                                i += 51;
 943                                break;
 944                        }
 945                default:
 946                        while (i < buffer.posn && data[i] != '\n')
 947                                i++;
 948                }
 949                i++;
 950        }
 951
 952        free(buffer.buffer);
 953        repo->got_indices = 1;
 954        return 0;
 955}
 956
 957static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 958{
 959        char *url;
 960        struct packed_git *target;
 961        struct packed_git **lst;
 962        FILE *packfile;
 963        char *filename;
 964        char tmpfile[PATH_MAX];
 965        int ret;
 966        long prev_posn = 0;
 967        char range[RANGE_HEADER_SIZE];
 968        struct curl_slist *range_header = NULL;
 969
 970        struct active_request_slot *slot;
 971        struct slot_results results;
 972
 973        if (fetch_indices(repo))
 974                return -1;
 975        target = find_sha1_pack(sha1, repo->packs);
 976        if (!target)
 977                return -1;
 978
 979        if (get_verbosely) {
 980                fprintf(stderr, "Getting pack %s\n",
 981                        sha1_to_hex(target->sha1));
 982                fprintf(stderr, " which contains %s\n",
 983                        sha1_to_hex(sha1));
 984        }
 985
 986        url = xmalloc(strlen(repo->base) + 65);
 987        sprintf(url, "%s/objects/pack/pack-%s.pack",
 988                repo->base, sha1_to_hex(target->sha1));
 989
 990        filename = sha1_pack_name(target->sha1);
 991        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 992        packfile = fopen(tmpfile, "a");
 993        if (!packfile)
 994                return error("Unable to open local file %s for pack",
 995                             filename);
 996
 997        slot = get_active_slot();
 998        slot->results = &results;
 999        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1000        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1001        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1002        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1003        slot->local = packfile;
1004
1005        /* If there is data present from a previous transfer attempt,
1006           resume where it left off */
1007        prev_posn = ftell(packfile);
1008        if (prev_posn>0) {
1009                if (get_verbosely)
1010                        fprintf(stderr,
1011                                "Resuming fetch of pack %s at byte %ld\n",
1012                                sha1_to_hex(target->sha1), prev_posn);
1013                sprintf(range, "Range: bytes=%ld-", prev_posn);
1014                range_header = curl_slist_append(range_header, range);
1015                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1016        }
1017
1018        if (start_active_slot(slot)) {
1019                run_active_slot(slot);
1020                if (results.curl_result != CURLE_OK) {
1021                        fclose(packfile);
1022                        return error("Unable to get pack file %s\n%s", url,
1023                                     curl_errorstr);
1024                }
1025        } else {
1026                fclose(packfile);
1027                return error("Unable to start request");
1028        }
1029
1030        fclose(packfile);
1031
1032        ret = move_temp_to_file(tmpfile, filename);
1033        if (ret)
1034                return ret;
1035
1036        lst = &repo->packs;
1037        while (*lst != target)
1038                lst = &((*lst)->next);
1039        *lst = (*lst)->next;
1040
1041        if (verify_pack(target, 0))
1042                return -1;
1043        install_packed_git(target);
1044
1045        return 0;
1046}
1047
1048static void abort_object_request(struct object_request *obj_req)
1049{
1050        if (obj_req->local >= 0) {
1051                close(obj_req->local);
1052                obj_req->local = -1;
1053        }
1054        unlink(obj_req->tmpfile);
1055        if (obj_req->slot) {
1056                release_active_slot(obj_req->slot);
1057                obj_req->slot = NULL;
1058        }
1059        release_object_request(obj_req);
1060}
1061
1062static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1063{
1064        char *hex = sha1_to_hex(sha1);
1065        int ret = 0;
1066        struct object_request *obj_req = object_queue_head;
1067
1068        while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1069                obj_req = obj_req->next;
1070        if (obj_req == NULL)
1071                return error("Couldn't find request for %s in the queue", hex);
1072
1073        if (has_sha1_file(obj_req->sha1)) {
1074                abort_object_request(obj_req);
1075                return 0;
1076        }
1077
1078#ifdef USE_CURL_MULTI
1079        while (obj_req->state == WAITING) {
1080                step_active_slots();
1081        }
1082#else
1083        start_object_request(obj_req);
1084#endif
1085
1086        while (obj_req->state == ACTIVE) {
1087                run_active_slot(obj_req->slot);
1088        }
1089        if (obj_req->local != -1) {
1090                close(obj_req->local); obj_req->local = -1;
1091        }
1092
1093        if (obj_req->state == ABORTED) {
1094                ret = error("Request for %s aborted", hex);
1095        } else if (obj_req->curl_result != CURLE_OK &&
1096                   obj_req->http_code != 416) {
1097                if (obj_req->http_code == 404 ||
1098                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1099                        ret = -1; /* Be silent, it is probably in a pack. */
1100                else
1101                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1102                                    obj_req->errorstr, obj_req->curl_result,
1103                                    obj_req->http_code, hex);
1104        } else if (obj_req->zret != Z_STREAM_END) {
1105                corrupt_object_found++;
1106                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1107        } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1108                ret = error("File %s has bad hash", hex);
1109        } else if (obj_req->rename < 0) {
1110                ret = error("unable to write sha1 filename %s",
1111                            obj_req->filename);
1112        }
1113
1114        release_object_request(obj_req);
1115        return ret;
1116}
1117
1118int fetch(unsigned char *sha1)
1119{
1120        struct alt_base *altbase = alt;
1121
1122        if (!fetch_object(altbase, sha1))
1123                return 0;
1124        while (altbase) {
1125                if (!fetch_pack(altbase, sha1))
1126                        return 0;
1127                fetch_alternates(alt->base);
1128                altbase = altbase->next;
1129        }
1130        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1131                     alt->base);
1132}
1133
1134static inline int needs_quote(int ch)
1135{
1136        if (((ch >= 'A') && (ch <= 'Z'))
1137                        || ((ch >= 'a') && (ch <= 'z'))
1138                        || ((ch >= '0') && (ch <= '9'))
1139                        || (ch == '/')
1140                        || (ch == '-')
1141                        || (ch == '.'))
1142                return 0;
1143        return 1;
1144}
1145
1146static inline int hex(int v)
1147{
1148        if (v < 10) return '0' + v;
1149        else return 'A' + v - 10;
1150}
1151
1152static char *quote_ref_url(const char *base, const char *ref)
1153{
1154        const char *cp;
1155        char *dp, *qref;
1156        int len, baselen, ch;
1157
1158        baselen = strlen(base);
1159        len = baselen + 6; /* "refs/" + NUL */
1160        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1161                if (needs_quote(ch))
1162                        len += 2; /* extra two hex plus replacement % */
1163        qref = xmalloc(len);
1164        memcpy(qref, base, baselen);
1165        memcpy(qref + baselen, "refs/", 5);
1166        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1167                if (needs_quote(ch)) {
1168                        *dp++ = '%';
1169                        *dp++ = hex((ch >> 4) & 0xF);
1170                        *dp++ = hex(ch & 0xF);
1171                }
1172                else
1173                        *dp++ = ch;
1174        }
1175        *dp = 0;
1176
1177        return qref;
1178}
1179
1180int fetch_ref(char *ref, unsigned char *sha1)
1181{
1182        char *url;
1183        char hex[42];
1184        struct buffer buffer;
1185        const char *base = alt->base;
1186        struct active_request_slot *slot;
1187        struct slot_results results;
1188        buffer.size = 41;
1189        buffer.posn = 0;
1190        buffer.buffer = hex;
1191        hex[41] = '\0';
1192
1193        url = quote_ref_url(base, ref);
1194        slot = get_active_slot();
1195        slot->results = &results;
1196        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1197        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1198        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1199        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1200        if (start_active_slot(slot)) {
1201                run_active_slot(slot);
1202                if (results.curl_result != CURLE_OK)
1203                        return error("Couldn't get %s for %s\n%s",
1204                                     url, ref, curl_errorstr);
1205        } else {
1206                return error("Unable to start request");
1207        }
1208
1209        hex[40] = '\0';
1210        get_sha1_hex(hex, sha1);
1211        return 0;
1212}
1213
1214int main(int argc, const char **argv)
1215{
1216        int commits;
1217        const char **write_ref = NULL;
1218        char **commit_id;
1219        const char *url;
1220        char *path;
1221        int arg = 1;
1222        int rc = 0;
1223
1224        setup_ident();
1225        setup_git_directory();
1226        git_config(git_default_config);
1227
1228        while (arg < argc && argv[arg][0] == '-') {
1229                if (argv[arg][1] == 't') {
1230                        get_tree = 1;
1231                } else if (argv[arg][1] == 'c') {
1232                        get_history = 1;
1233                } else if (argv[arg][1] == 'a') {
1234                        get_all = 1;
1235                        get_tree = 1;
1236                        get_history = 1;
1237                } else if (argv[arg][1] == 'v') {
1238                        get_verbosely = 1;
1239                } else if (argv[arg][1] == 'w') {
1240                        write_ref = &argv[arg + 1];
1241                        arg++;
1242                } else if (!strcmp(argv[arg], "--recover")) {
1243                        get_recover = 1;
1244                } else if (!strcmp(argv[arg], "--stdin")) {
1245                        commits_on_stdin = 1;
1246                }
1247                arg++;
1248        }
1249        if (argc < arg + 2 - commits_on_stdin) {
1250                usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1251                return 1;
1252        }
1253        if (commits_on_stdin) {
1254                commits = pull_targets_stdin(&commit_id, &write_ref);
1255        } else {
1256                commit_id = (char **) &argv[arg++];
1257                commits = 1;
1258        }
1259        url = argv[arg];
1260
1261        http_init();
1262
1263        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1264
1265        alt = xmalloc(sizeof(*alt));
1266        alt->base = url;
1267        alt->got_indices = 0;
1268        alt->packs = NULL;
1269        alt->next = NULL;
1270        path = strstr(url, "//");
1271        if (path) {
1272                path = strchr(path+2, '/');
1273                if (path)
1274                        alt->path_len = strlen(path);
1275        }
1276
1277        if (pull(commits, commit_id, write_ref, url))
1278                rc = 1;
1279
1280        http_cleanup();
1281
1282        curl_slist_free_all(no_pragma_header);
1283
1284        if (commits_on_stdin)
1285                pull_targets_free(commits, commit_id, write_ref);
1286
1287        if (corrupt_object_found) {
1288                fprintf(stderr,
1289"Some loose object were found to be corrupt, but they might be just\n"
1290"a false '404 Not Found' error message sent with incorrect HTTP\n"
1291"status code.  Suggest running git fsck-objects.\n");
1292        }
1293        return rc;
1294}