http-fetch.con commit git-svn: add UTF-8 message test (7a97de4)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int got_alternates = -1;
  40static int corrupt_object_found = 0;
  41
  42static struct curl_slist *no_pragma_header;
  43
  44struct alt_base
  45{
  46        char *base;
  47        int path_len;
  48        int got_indices;
  49        struct packed_git *packs;
  50        struct alt_base *next;
  51};
  52
  53static struct alt_base *alt = NULL;
  54
  55enum object_request_state {
  56        WAITING,
  57        ABORTED,
  58        ACTIVE,
  59        COMPLETE,
  60};
  61
  62struct object_request
  63{
  64        unsigned char sha1[20];
  65        struct alt_base *repo;
  66        char *url;
  67        char filename[PATH_MAX];
  68        char tmpfile[PATH_MAX];
  69        int local;
  70        enum object_request_state state;
  71        CURLcode curl_result;
  72        char errorstr[CURL_ERROR_SIZE];
  73        long http_code;
  74        unsigned char real_sha1[20];
  75        SHA_CTX c;
  76        z_stream stream;
  77        int zret;
  78        int rename;
  79        struct active_request_slot *slot;
  80        struct object_request *next;
  81};
  82
  83struct alternates_request {
  84        char *base;
  85        char *url;
  86        struct buffer *buffer;
  87        struct active_request_slot *slot;
  88        int http_specific;
  89};
  90
  91#ifndef NO_EXPAT
  92struct xml_ctx
  93{
  94        char *name;
  95        int len;
  96        char *cdata;
  97        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
  98        void *userData;
  99};
 100
 101struct remote_ls_ctx
 102{
 103        struct alt_base *repo;
 104        char *path;
 105        void (*userFunc)(struct remote_ls_ctx *ls);
 106        void *userData;
 107        int flags;
 108        char *dentry_name;
 109        int dentry_flags;
 110        int rc;
 111        struct remote_ls_ctx *parent;
 112};
 113#endif
 114
 115static struct object_request *object_queue_head = NULL;
 116
 117static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 118                               void *data)
 119{
 120        unsigned char expn[4096];
 121        size_t size = eltsize * nmemb;
 122        int posn = 0;
 123        struct object_request *obj_req = (struct object_request *)data;
 124        do {
 125                ssize_t retval = write(obj_req->local,
 126                                       ptr + posn, size - posn);
 127                if (retval < 0)
 128                        return posn;
 129                posn += retval;
 130        } while (posn < size);
 131
 132        obj_req->stream.avail_in = size;
 133        obj_req->stream.next_in = ptr;
 134        do {
 135                obj_req->stream.next_out = expn;
 136                obj_req->stream.avail_out = sizeof(expn);
 137                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 138                SHA1_Update(&obj_req->c, expn,
 139                            sizeof(expn) - obj_req->stream.avail_out);
 140        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 141        data_received++;
 142        return size;
 143}
 144
 145static void fetch_alternates(char *base);
 146
 147static void process_object_response(void *callback_data);
 148
 149static void start_object_request(struct object_request *obj_req)
 150{
 151        char *hex = sha1_to_hex(obj_req->sha1);
 152        char prevfile[PATH_MAX];
 153        char *url;
 154        char *posn;
 155        int prevlocal;
 156        unsigned char prev_buf[PREV_BUF_SIZE];
 157        ssize_t prev_read = 0;
 158        long prev_posn = 0;
 159        char range[RANGE_HEADER_SIZE];
 160        struct curl_slist *range_header = NULL;
 161        struct active_request_slot *slot;
 162
 163        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 164        unlink(prevfile);
 165        rename(obj_req->tmpfile, prevfile);
 166        unlink(obj_req->tmpfile);
 167
 168        if (obj_req->local != -1)
 169                error("fd leakage in start: %d", obj_req->local);
 170        obj_req->local = open(obj_req->tmpfile,
 171                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 172        /* This could have failed due to the "lazy directory creation";
 173         * try to mkdir the last path component.
 174         */
 175        if (obj_req->local < 0 && errno == ENOENT) {
 176                char *dir = strrchr(obj_req->tmpfile, '/');
 177                if (dir) {
 178                        *dir = 0;
 179                        mkdir(obj_req->tmpfile, 0777);
 180                        *dir = '/';
 181                }
 182                obj_req->local = open(obj_req->tmpfile,
 183                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 184        }
 185
 186        if (obj_req->local < 0) {
 187                obj_req->state = ABORTED;
 188                error("Couldn't create temporary file %s for %s: %s",
 189                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 190                return;
 191        }
 192
 193        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 194
 195        inflateInit(&obj_req->stream);
 196
 197        SHA1_Init(&obj_req->c);
 198
 199        url = xmalloc(strlen(obj_req->repo->base) + 50);
 200        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 201        strcpy(url, obj_req->repo->base);
 202        posn = url + strlen(obj_req->repo->base);
 203        strcpy(posn, "objects/");
 204        posn += 8;
 205        memcpy(posn, hex, 2);
 206        posn += 2;
 207        *(posn++) = '/';
 208        strcpy(posn, hex + 2);
 209        strcpy(obj_req->url, url);
 210
 211        /* If a previous temp file is present, process what was already
 212           fetched. */
 213        prevlocal = open(prevfile, O_RDONLY);
 214        if (prevlocal != -1) {
 215                do {
 216                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 217                        if (prev_read>0) {
 218                                if (fwrite_sha1_file(prev_buf,
 219                                                     1,
 220                                                     prev_read,
 221                                                     obj_req) == prev_read) {
 222                                        prev_posn += prev_read;
 223                                } else {
 224                                        prev_read = -1;
 225                                }
 226                        }
 227                } while (prev_read > 0);
 228                close(prevlocal);
 229        }
 230        unlink(prevfile);
 231
 232        /* Reset inflate/SHA1 if there was an error reading the previous temp
 233           file; also rewind to the beginning of the local file. */
 234        if (prev_read == -1) {
 235                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 236                inflateInit(&obj_req->stream);
 237                SHA1_Init(&obj_req->c);
 238                if (prev_posn>0) {
 239                        prev_posn = 0;
 240                        lseek(obj_req->local, SEEK_SET, 0);
 241                        ftruncate(obj_req->local, 0);
 242                }
 243        }
 244
 245        slot = get_active_slot();
 246        slot->callback_func = process_object_response;
 247        slot->callback_data = obj_req;
 248        obj_req->slot = slot;
 249
 250        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 251        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 252        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 253        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 254        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 255
 256        /* If we have successfully processed data from a previous fetch
 257           attempt, only fetch the data we don't already have. */
 258        if (prev_posn>0) {
 259                if (get_verbosely)
 260                        fprintf(stderr,
 261                                "Resuming fetch of object %s at byte %ld\n",
 262                                hex, prev_posn);
 263                sprintf(range, "Range: bytes=%ld-", prev_posn);
 264                range_header = curl_slist_append(range_header, range);
 265                curl_easy_setopt(slot->curl,
 266                                 CURLOPT_HTTPHEADER, range_header);
 267        }
 268
 269        /* Try to get the request started, abort the request on error */
 270        obj_req->state = ACTIVE;
 271        if (!start_active_slot(slot)) {
 272                obj_req->state = ABORTED;
 273                obj_req->slot = NULL;
 274                close(obj_req->local); obj_req->local = -1;
 275                free(obj_req->url);
 276                return;
 277        }
 278}
 279
 280static void finish_object_request(struct object_request *obj_req)
 281{
 282        struct stat st;
 283
 284        fchmod(obj_req->local, 0444);
 285        close(obj_req->local); obj_req->local = -1;
 286
 287        if (obj_req->http_code == 416) {
 288                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 289        } else if (obj_req->curl_result != CURLE_OK) {
 290                if (stat(obj_req->tmpfile, &st) == 0)
 291                        if (st.st_size == 0)
 292                                unlink(obj_req->tmpfile);
 293                return;
 294        }
 295
 296        inflateEnd(&obj_req->stream);
 297        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 298        if (obj_req->zret != Z_STREAM_END) {
 299                unlink(obj_req->tmpfile);
 300                return;
 301        }
 302        if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 303                unlink(obj_req->tmpfile);
 304                return;
 305        }
 306        obj_req->rename =
 307                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 308
 309        if (obj_req->rename == 0)
 310                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 311}
 312
 313static void process_object_response(void *callback_data)
 314{
 315        struct object_request *obj_req =
 316                (struct object_request *)callback_data;
 317
 318        obj_req->curl_result = obj_req->slot->curl_result;
 319        obj_req->http_code = obj_req->slot->http_code;
 320        obj_req->slot = NULL;
 321        obj_req->state = COMPLETE;
 322
 323        /* Use alternates if necessary */
 324        if (obj_req->http_code == 404 ||
 325            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 326                fetch_alternates(alt->base);
 327                if (obj_req->repo->next != NULL) {
 328                        obj_req->repo =
 329                                obj_req->repo->next;
 330                        close(obj_req->local);
 331                        obj_req->local = -1;
 332                        start_object_request(obj_req);
 333                        return;
 334                }
 335        }
 336
 337        finish_object_request(obj_req);
 338}
 339
 340static void release_object_request(struct object_request *obj_req)
 341{
 342        struct object_request *entry = object_queue_head;
 343
 344        if (obj_req->local != -1)
 345                error("fd leakage in release: %d", obj_req->local);
 346        if (obj_req == object_queue_head) {
 347                object_queue_head = obj_req->next;
 348        } else {
 349                while (entry->next != NULL && entry->next != obj_req)
 350                        entry = entry->next;
 351                if (entry->next == obj_req)
 352                        entry->next = entry->next->next;
 353        }
 354
 355        free(obj_req->url);
 356        free(obj_req);
 357}
 358
 359#ifdef USE_CURL_MULTI
 360void fill_active_slots(void)
 361{
 362        struct object_request *obj_req = object_queue_head;
 363        struct active_request_slot *slot = active_queue_head;
 364        int num_transfers;
 365
 366        while (active_requests < max_requests && obj_req != NULL) {
 367                if (obj_req->state == WAITING) {
 368                        if (has_sha1_file(obj_req->sha1))
 369                                obj_req->state = COMPLETE;
 370                        else
 371                                start_object_request(obj_req);
 372                        curl_multi_perform(curlm, &num_transfers);
 373                }
 374                obj_req = obj_req->next;
 375        }
 376
 377        while (slot != NULL) {
 378                if (!slot->in_use && slot->curl != NULL) {
 379                        curl_easy_cleanup(slot->curl);
 380                        slot->curl = NULL;
 381                }
 382                slot = slot->next;
 383        }
 384}
 385#endif
 386
 387void prefetch(unsigned char *sha1)
 388{
 389        struct object_request *newreq;
 390        struct object_request *tail;
 391        char *filename = sha1_file_name(sha1);
 392
 393        newreq = xmalloc(sizeof(*newreq));
 394        memcpy(newreq->sha1, sha1, 20);
 395        newreq->repo = alt;
 396        newreq->url = NULL;
 397        newreq->local = -1;
 398        newreq->state = WAITING;
 399        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 400        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 401                 "%s.temp", filename);
 402        newreq->slot = NULL;
 403        newreq->next = NULL;
 404
 405        if (object_queue_head == NULL) {
 406                object_queue_head = newreq;
 407        } else {
 408                tail = object_queue_head;
 409                while (tail->next != NULL) {
 410                        tail = tail->next;
 411                }
 412                tail->next = newreq;
 413        }
 414
 415#ifdef USE_CURL_MULTI
 416        fill_active_slots();
 417        step_active_slots();
 418#endif
 419}
 420
 421static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 422{
 423        char *hex = sha1_to_hex(sha1);
 424        char *filename;
 425        char *url;
 426        char tmpfile[PATH_MAX];
 427        long prev_posn = 0;
 428        char range[RANGE_HEADER_SIZE];
 429        struct curl_slist *range_header = NULL;
 430
 431        FILE *indexfile;
 432        struct active_request_slot *slot;
 433        struct slot_results results;
 434
 435        if (has_pack_index(sha1))
 436                return 0;
 437
 438        if (get_verbosely)
 439                fprintf(stderr, "Getting index for pack %s\n", hex);
 440
 441        url = xmalloc(strlen(repo->base) + 64);
 442        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 443
 444        filename = sha1_pack_index_name(sha1);
 445        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 446        indexfile = fopen(tmpfile, "a");
 447        if (!indexfile)
 448                return error("Unable to open local file %s for pack index",
 449                             filename);
 450
 451        slot = get_active_slot();
 452        slot->results = &results;
 453        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 454        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 455        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 456        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 457        slot->local = indexfile;
 458
 459        /* If there is data present from a previous transfer attempt,
 460           resume where it left off */
 461        prev_posn = ftell(indexfile);
 462        if (prev_posn>0) {
 463                if (get_verbosely)
 464                        fprintf(stderr,
 465                                "Resuming fetch of index for pack %s at byte %ld\n",
 466                                hex, prev_posn);
 467                sprintf(range, "Range: bytes=%ld-", prev_posn);
 468                range_header = curl_slist_append(range_header, range);
 469                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 470        }
 471
 472        if (start_active_slot(slot)) {
 473                run_active_slot(slot);
 474                if (results.curl_result != CURLE_OK) {
 475                        fclose(indexfile);
 476                        return error("Unable to get pack index %s\n%s", url,
 477                                     curl_errorstr);
 478                }
 479        } else {
 480                fclose(indexfile);
 481                return error("Unable to start request");
 482        }
 483
 484        fclose(indexfile);
 485
 486        return move_temp_to_file(tmpfile, filename);
 487}
 488
 489static int setup_index(struct alt_base *repo, unsigned char *sha1)
 490{
 491        struct packed_git *new_pack;
 492        if (has_pack_file(sha1))
 493                return 0; // don't list this as something we can get
 494
 495        if (fetch_index(repo, sha1))
 496                return -1;
 497
 498        new_pack = parse_pack_index(sha1);
 499        new_pack->next = repo->packs;
 500        repo->packs = new_pack;
 501        return 0;
 502}
 503
 504static void process_alternates_response(void *callback_data)
 505{
 506        struct alternates_request *alt_req =
 507                (struct alternates_request *)callback_data;
 508        struct active_request_slot *slot = alt_req->slot;
 509        struct alt_base *tail = alt;
 510        char *base = alt_req->base;
 511        static const char null_byte = '\0';
 512        char *data;
 513        int i = 0;
 514
 515        if (alt_req->http_specific) {
 516                if (slot->curl_result != CURLE_OK ||
 517                    !alt_req->buffer->posn) {
 518
 519                        /* Try reusing the slot to get non-http alternates */
 520                        alt_req->http_specific = 0;
 521                        sprintf(alt_req->url, "%s/objects/info/alternates",
 522                                base);
 523                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 524                                         alt_req->url);
 525                        active_requests++;
 526                        slot->in_use = 1;
 527                        if (slot->finished != NULL)
 528                                (*slot->finished) = 0;
 529                        if (!start_active_slot(slot)) {
 530                                got_alternates = -1;
 531                                slot->in_use = 0;
 532                                if (slot->finished != NULL)
 533                                        (*slot->finished) = 1;
 534                        }
 535                        return;
 536                }
 537        } else if (slot->curl_result != CURLE_OK) {
 538                if (slot->http_code != 404 &&
 539                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 540                        got_alternates = -1;
 541                        return;
 542                }
 543        }
 544
 545        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 546        alt_req->buffer->posn--;
 547        data = alt_req->buffer->buffer;
 548
 549        while (i < alt_req->buffer->posn) {
 550                int posn = i;
 551                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 552                        posn++;
 553                if (data[posn] == '\n') {
 554                        int okay = 0;
 555                        int serverlen = 0;
 556                        struct alt_base *newalt;
 557                        char *target = NULL;
 558                        char *path;
 559                        if (data[i] == '/') {
 560                                serverlen = strchr(base + 8, '/') - base;
 561                                okay = 1;
 562                        } else if (!memcmp(data + i, "../", 3)) {
 563                                i += 3;
 564                                serverlen = strlen(base);
 565                                while (i + 2 < posn &&
 566                                       !memcmp(data + i, "../", 3)) {
 567                                        do {
 568                                                serverlen--;
 569                                        } while (serverlen &&
 570                                                 base[serverlen - 1] != '/');
 571                                        i += 3;
 572                                }
 573                                // If the server got removed, give up.
 574                                okay = strchr(base, ':') - base + 3 <
 575                                        serverlen;
 576                        } else if (alt_req->http_specific) {
 577                                char *colon = strchr(data + i, ':');
 578                                char *slash = strchr(data + i, '/');
 579                                if (colon && slash && colon < data + posn &&
 580                                    slash < data + posn && colon < slash) {
 581                                        okay = 1;
 582                                }
 583                        }
 584                        // skip 'objects' at end
 585                        if (okay) {
 586                                target = xmalloc(serverlen + posn - i - 6);
 587                                strncpy(target, base, serverlen);
 588                                strncpy(target + serverlen, data + i,
 589                                        posn - i - 7);
 590                                target[serverlen + posn - i - 7] = '\0';
 591                                if (get_verbosely)
 592                                        fprintf(stderr,
 593                                                "Also look at %s\n", target);
 594                                newalt = xmalloc(sizeof(*newalt));
 595                                newalt->next = NULL;
 596                                newalt->base = target;
 597                                newalt->got_indices = 0;
 598                                newalt->packs = NULL;
 599                                path = strstr(target, "//");
 600                                if (path) {
 601                                        path = strchr(path+2, '/');
 602                                        if (path)
 603                                                newalt->path_len = strlen(path);
 604                                }
 605
 606                                while (tail->next != NULL)
 607                                        tail = tail->next;
 608                                tail->next = newalt;
 609                        }
 610                }
 611                i = posn + 1;
 612        }
 613
 614        got_alternates = 1;
 615}
 616
 617static void fetch_alternates(char *base)
 618{
 619        struct buffer buffer;
 620        char *url;
 621        char *data;
 622        struct active_request_slot *slot;
 623        struct alternates_request alt_req;
 624
 625        /* If another request has already started fetching alternates,
 626           wait for them to arrive and return to processing this request's
 627           curl message */
 628#ifdef USE_CURL_MULTI
 629        while (got_alternates == 0) {
 630                step_active_slots();
 631        }
 632#endif
 633
 634        /* Nothing to do if they've already been fetched */
 635        if (got_alternates == 1)
 636                return;
 637
 638        /* Start the fetch */
 639        got_alternates = 0;
 640
 641        data = xmalloc(4096);
 642        buffer.size = 4096;
 643        buffer.posn = 0;
 644        buffer.buffer = data;
 645
 646        if (get_verbosely)
 647                fprintf(stderr, "Getting alternates list for %s\n", base);
 648
 649        url = xmalloc(strlen(base) + 31);
 650        sprintf(url, "%s/objects/info/http-alternates", base);
 651
 652        /* Use a callback to process the result, since another request
 653           may fail and need to have alternates loaded before continuing */
 654        slot = get_active_slot();
 655        slot->callback_func = process_alternates_response;
 656        slot->callback_data = &alt_req;
 657
 658        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 659        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 660        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 661
 662        alt_req.base = base;
 663        alt_req.url = url;
 664        alt_req.buffer = &buffer;
 665        alt_req.http_specific = 1;
 666        alt_req.slot = slot;
 667
 668        if (start_active_slot(slot))
 669                run_active_slot(slot);
 670        else
 671                got_alternates = -1;
 672
 673        free(data);
 674        free(url);
 675}
 676
 677#ifndef NO_EXPAT
 678static void
 679xml_start_tag(void *userData, const char *name, const char **atts)
 680{
 681        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 682        const char *c = strchr(name, ':');
 683        int new_len;
 684
 685        if (c == NULL)
 686                c = name;
 687        else
 688                c++;
 689
 690        new_len = strlen(ctx->name) + strlen(c) + 2;
 691
 692        if (new_len > ctx->len) {
 693                ctx->name = xrealloc(ctx->name, new_len);
 694                ctx->len = new_len;
 695        }
 696        strcat(ctx->name, ".");
 697        strcat(ctx->name, c);
 698
 699        if (ctx->cdata) {
 700                free(ctx->cdata);
 701                ctx->cdata = NULL;
 702        }
 703
 704        ctx->userFunc(ctx, 0);
 705}
 706
 707static void
 708xml_end_tag(void *userData, const char *name)
 709{
 710        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 711        const char *c = strchr(name, ':');
 712        char *ep;
 713
 714        ctx->userFunc(ctx, 1);
 715
 716        if (c == NULL)
 717                c = name;
 718        else
 719                c++;
 720
 721        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 722        *ep = 0;
 723}
 724
 725static void
 726xml_cdata(void *userData, const XML_Char *s, int len)
 727{
 728        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 729        if (ctx->cdata)
 730                free(ctx->cdata);
 731        ctx->cdata = xcalloc(len+1, 1);
 732        strncpy(ctx->cdata, s, len);
 733}
 734
 735static int remote_ls(struct alt_base *repo, const char *path, int flags,
 736                     void (*userFunc)(struct remote_ls_ctx *ls),
 737                     void *userData);
 738
 739static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 740{
 741        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 742
 743        if (tag_closed) {
 744                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 745                        if (ls->dentry_flags & IS_DIR) {
 746                                if (ls->flags & PROCESS_DIRS) {
 747                                        ls->userFunc(ls);
 748                                }
 749                                if (strcmp(ls->dentry_name, ls->path) &&
 750                                    ls->flags & RECURSIVE) {
 751                                        ls->rc = remote_ls(ls->repo,
 752                                                           ls->dentry_name,
 753                                                           ls->flags,
 754                                                           ls->userFunc,
 755                                                           ls->userData);
 756                                }
 757                        } else if (ls->flags & PROCESS_FILES) {
 758                                ls->userFunc(ls);
 759                        }
 760                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 761                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 762                                                  ls->repo->path_len + 1);
 763                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 764                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 765                        ls->dentry_flags |= IS_DIR;
 766                }
 767        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 768                if (ls->dentry_name) {
 769                        free(ls->dentry_name);
 770                }
 771                ls->dentry_name = NULL;
 772                ls->dentry_flags = 0;
 773        }
 774}
 775
 776static int remote_ls(struct alt_base *repo, const char *path, int flags,
 777                     void (*userFunc)(struct remote_ls_ctx *ls),
 778                     void *userData)
 779{
 780        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 781        struct active_request_slot *slot;
 782        struct slot_results results;
 783        struct buffer in_buffer;
 784        struct buffer out_buffer;
 785        char *in_data;
 786        char *out_data;
 787        XML_Parser parser = XML_ParserCreate(NULL);
 788        enum XML_Status result;
 789        struct curl_slist *dav_headers = NULL;
 790        struct xml_ctx ctx;
 791        struct remote_ls_ctx ls;
 792
 793        ls.flags = flags;
 794        ls.repo = repo;
 795        ls.path = strdup(path);
 796        ls.dentry_name = NULL;
 797        ls.dentry_flags = 0;
 798        ls.userData = userData;
 799        ls.userFunc = userFunc;
 800        ls.rc = 0;
 801
 802        sprintf(url, "%s%s", repo->base, path);
 803
 804        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 805        out_data = xmalloc(out_buffer.size + 1);
 806        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 807        out_buffer.posn = 0;
 808        out_buffer.buffer = out_data;
 809
 810        in_buffer.size = 4096;
 811        in_data = xmalloc(in_buffer.size);
 812        in_buffer.posn = 0;
 813        in_buffer.buffer = in_data;
 814
 815        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 816        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 817
 818        slot = get_active_slot();
 819        slot->results = &results;
 820        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 821        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 822        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 823        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 824        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 825        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 826        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 827        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 828        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 829
 830        if (start_active_slot(slot)) {
 831                run_active_slot(slot);
 832                if (results.curl_result == CURLE_OK) {
 833                        ctx.name = xcalloc(10, 1);
 834                        ctx.len = 0;
 835                        ctx.cdata = NULL;
 836                        ctx.userFunc = handle_remote_ls_ctx;
 837                        ctx.userData = &ls;
 838                        XML_SetUserData(parser, &ctx);
 839                        XML_SetElementHandler(parser, xml_start_tag,
 840                                              xml_end_tag);
 841                        XML_SetCharacterDataHandler(parser, xml_cdata);
 842                        result = XML_Parse(parser, in_buffer.buffer,
 843                                           in_buffer.posn, 1);
 844                        free(ctx.name);
 845
 846                        if (result != XML_STATUS_OK) {
 847                                ls.rc = error("XML error: %s",
 848                                              XML_ErrorString(
 849                                                      XML_GetErrorCode(parser)));
 850                        }
 851                } else {
 852                        ls.rc = -1;
 853                }
 854        } else {
 855                ls.rc = error("Unable to start PROPFIND request");
 856        }
 857
 858        free(ls.path);
 859        free(url);
 860        free(out_data);
 861        free(in_buffer.buffer);
 862        curl_slist_free_all(dav_headers);
 863
 864        return ls.rc;
 865}
 866
 867static void process_ls_pack(struct remote_ls_ctx *ls)
 868{
 869        unsigned char sha1[20];
 870
 871        if (strlen(ls->dentry_name) == 63 &&
 872            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 873            !strncmp(ls->dentry_name+58, ".pack", 5)) {
 874                get_sha1_hex(ls->dentry_name + 18, sha1);
 875                setup_index(ls->repo, sha1);
 876        }
 877}
 878#endif
 879
 880static int fetch_indices(struct alt_base *repo)
 881{
 882        unsigned char sha1[20];
 883        char *url;
 884        struct buffer buffer;
 885        char *data;
 886        int i = 0;
 887
 888        struct active_request_slot *slot;
 889        struct slot_results results;
 890
 891        if (repo->got_indices)
 892                return 0;
 893
 894        data = xmalloc(4096);
 895        buffer.size = 4096;
 896        buffer.posn = 0;
 897        buffer.buffer = data;
 898
 899        if (get_verbosely)
 900                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 901
 902#ifndef NO_EXPAT
 903        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 904                      process_ls_pack, NULL) == 0)
 905                return 0;
 906#endif
 907
 908        url = xmalloc(strlen(repo->base) + 21);
 909        sprintf(url, "%s/objects/info/packs", repo->base);
 910
 911        slot = get_active_slot();
 912        slot->results = &results;
 913        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 914        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 915        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 916        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 917        if (start_active_slot(slot)) {
 918                run_active_slot(slot);
 919                if (results.curl_result != CURLE_OK) {
 920                        if (results.http_code == 404 ||
 921                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 922                                repo->got_indices = 1;
 923                                free(buffer.buffer);
 924                                return 0;
 925                        } else {
 926                                repo->got_indices = 0;
 927                                free(buffer.buffer);
 928                                return error("%s", curl_errorstr);
 929                        }
 930                }
 931        } else {
 932                repo->got_indices = 0;
 933                free(buffer.buffer);
 934                return error("Unable to start request");
 935        }
 936
 937        data = buffer.buffer;
 938        while (i < buffer.posn) {
 939                switch (data[i]) {
 940                case 'P':
 941                        i++;
 942                        if (i + 52 <= buffer.posn &&
 943                            !strncmp(data + i, " pack-", 6) &&
 944                            !strncmp(data + i + 46, ".pack\n", 6)) {
 945                                get_sha1_hex(data + i + 6, sha1);
 946                                setup_index(repo, sha1);
 947                                i += 51;
 948                                break;
 949                        }
 950                default:
 951                        while (i < buffer.posn && data[i] != '\n')
 952                                i++;
 953                }
 954                i++;
 955        }
 956
 957        free(buffer.buffer);
 958        repo->got_indices = 1;
 959        return 0;
 960}
 961
 962static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 963{
 964        char *url;
 965        struct packed_git *target;
 966        struct packed_git **lst;
 967        FILE *packfile;
 968        char *filename;
 969        char tmpfile[PATH_MAX];
 970        int ret;
 971        long prev_posn = 0;
 972        char range[RANGE_HEADER_SIZE];
 973        struct curl_slist *range_header = NULL;
 974
 975        struct active_request_slot *slot;
 976        struct slot_results results;
 977
 978        if (fetch_indices(repo))
 979                return -1;
 980        target = find_sha1_pack(sha1, repo->packs);
 981        if (!target)
 982                return -1;
 983
 984        if (get_verbosely) {
 985                fprintf(stderr, "Getting pack %s\n",
 986                        sha1_to_hex(target->sha1));
 987                fprintf(stderr, " which contains %s\n",
 988                        sha1_to_hex(sha1));
 989        }
 990
 991        url = xmalloc(strlen(repo->base) + 65);
 992        sprintf(url, "%s/objects/pack/pack-%s.pack",
 993                repo->base, sha1_to_hex(target->sha1));
 994
 995        filename = sha1_pack_name(target->sha1);
 996        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 997        packfile = fopen(tmpfile, "a");
 998        if (!packfile)
 999                return error("Unable to open local file %s for pack",
1000                             filename);
1001
1002        slot = get_active_slot();
1003        slot->results = &results;
1004        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1005        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1006        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1007        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1008        slot->local = packfile;
1009
1010        /* If there is data present from a previous transfer attempt,
1011           resume where it left off */
1012        prev_posn = ftell(packfile);
1013        if (prev_posn>0) {
1014                if (get_verbosely)
1015                        fprintf(stderr,
1016                                "Resuming fetch of pack %s at byte %ld\n",
1017                                sha1_to_hex(target->sha1), prev_posn);
1018                sprintf(range, "Range: bytes=%ld-", prev_posn);
1019                range_header = curl_slist_append(range_header, range);
1020                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1021        }
1022
1023        if (start_active_slot(slot)) {
1024                run_active_slot(slot);
1025                if (results.curl_result != CURLE_OK) {
1026                        fclose(packfile);
1027                        return error("Unable to get pack file %s\n%s", url,
1028                                     curl_errorstr);
1029                }
1030        } else {
1031                fclose(packfile);
1032                return error("Unable to start request");
1033        }
1034
1035        fclose(packfile);
1036
1037        ret = move_temp_to_file(tmpfile, filename);
1038        if (ret)
1039                return ret;
1040
1041        lst = &repo->packs;
1042        while (*lst != target)
1043                lst = &((*lst)->next);
1044        *lst = (*lst)->next;
1045
1046        if (verify_pack(target, 0))
1047                return -1;
1048        install_packed_git(target);
1049
1050        return 0;
1051}
1052
1053static void abort_object_request(struct object_request *obj_req)
1054{
1055        if (obj_req->local >= 0) {
1056                close(obj_req->local);
1057                obj_req->local = -1;
1058        }
1059        unlink(obj_req->tmpfile);
1060        if (obj_req->slot) {
1061                release_active_slot(obj_req->slot);
1062                obj_req->slot = NULL;
1063        }
1064        release_object_request(obj_req);
1065}
1066
1067static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1068{
1069        char *hex = sha1_to_hex(sha1);
1070        int ret = 0;
1071        struct object_request *obj_req = object_queue_head;
1072
1073        while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1074                obj_req = obj_req->next;
1075        if (obj_req == NULL)
1076                return error("Couldn't find request for %s in the queue", hex);
1077
1078        if (has_sha1_file(obj_req->sha1)) {
1079                abort_object_request(obj_req);
1080                return 0;
1081        }
1082
1083#ifdef USE_CURL_MULTI
1084        while (obj_req->state == WAITING) {
1085                step_active_slots();
1086        }
1087#else
1088        start_object_request(obj_req);
1089#endif
1090
1091        while (obj_req->state == ACTIVE) {
1092                run_active_slot(obj_req->slot);
1093        }
1094        if (obj_req->local != -1) {
1095                close(obj_req->local); obj_req->local = -1;
1096        }
1097
1098        if (obj_req->state == ABORTED) {
1099                ret = error("Request for %s aborted", hex);
1100        } else if (obj_req->curl_result != CURLE_OK &&
1101                   obj_req->http_code != 416) {
1102                if (obj_req->http_code == 404 ||
1103                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1104                        ret = -1; /* Be silent, it is probably in a pack. */
1105                else
1106                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1107                                    obj_req->errorstr, obj_req->curl_result,
1108                                    obj_req->http_code, hex);
1109        } else if (obj_req->zret != Z_STREAM_END) {
1110                corrupt_object_found++;
1111                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1112        } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1113                ret = error("File %s has bad hash", hex);
1114        } else if (obj_req->rename < 0) {
1115                ret = error("unable to write sha1 filename %s",
1116                            obj_req->filename);
1117        }
1118
1119        release_object_request(obj_req);
1120        return ret;
1121}
1122
1123int fetch(unsigned char *sha1)
1124{
1125        struct alt_base *altbase = alt;
1126
1127        if (!fetch_object(altbase, sha1))
1128                return 0;
1129        while (altbase) {
1130                if (!fetch_pack(altbase, sha1))
1131                        return 0;
1132                fetch_alternates(alt->base);
1133                altbase = altbase->next;
1134        }
1135        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1136                     alt->base);
1137}
1138
1139static inline int needs_quote(int ch)
1140{
1141        switch (ch) {
1142        case '/': case '-': case '.':
1143        case 'A'...'Z': case 'a'...'z': case '0'...'9':
1144                return 0;
1145        default:
1146                return 1;
1147        }
1148}
1149
1150static inline int hex(int v)
1151{
1152        if (v < 10) return '0' + v;
1153        else return 'A' + v - 10;
1154}
1155
1156static char *quote_ref_url(const char *base, const char *ref)
1157{
1158        const char *cp;
1159        char *dp, *qref;
1160        int len, baselen, ch;
1161
1162        baselen = strlen(base);
1163        len = baselen + 6; /* "refs/" + NUL */
1164        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1165                if (needs_quote(ch))
1166                        len += 2; /* extra two hex plus replacement % */
1167        qref = xmalloc(len);
1168        memcpy(qref, base, baselen);
1169        memcpy(qref + baselen, "refs/", 5);
1170        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1171                if (needs_quote(ch)) {
1172                        *dp++ = '%';
1173                        *dp++ = hex((ch >> 4) & 0xF);
1174                        *dp++ = hex(ch & 0xF);
1175                }
1176                else
1177                        *dp++ = ch;
1178        }
1179        *dp = 0;
1180
1181        return qref;
1182}
1183
1184int fetch_ref(char *ref, unsigned char *sha1)
1185{
1186        char *url;
1187        char hex[42];
1188        struct buffer buffer;
1189        char *base = alt->base;
1190        struct active_request_slot *slot;
1191        struct slot_results results;
1192        buffer.size = 41;
1193        buffer.posn = 0;
1194        buffer.buffer = hex;
1195        hex[41] = '\0';
1196
1197        url = quote_ref_url(base, ref);
1198        slot = get_active_slot();
1199        slot->results = &results;
1200        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1201        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1202        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1203        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1204        if (start_active_slot(slot)) {
1205                run_active_slot(slot);
1206                if (results.curl_result != CURLE_OK)
1207                        return error("Couldn't get %s for %s\n%s",
1208                                     url, ref, curl_errorstr);
1209        } else {
1210                return error("Unable to start request");
1211        }
1212
1213        hex[40] = '\0';
1214        get_sha1_hex(hex, sha1);
1215        return 0;
1216}
1217
1218int main(int argc, char **argv)
1219{
1220        char *commit_id;
1221        char *url;
1222        char *path;
1223        int arg = 1;
1224        int rc = 0;
1225
1226        setup_git_directory();
1227        git_config(git_default_config);
1228
1229        while (arg < argc && argv[arg][0] == '-') {
1230                if (argv[arg][1] == 't') {
1231                        get_tree = 1;
1232                } else if (argv[arg][1] == 'c') {
1233                        get_history = 1;
1234                } else if (argv[arg][1] == 'a') {
1235                        get_all = 1;
1236                        get_tree = 1;
1237                        get_history = 1;
1238                } else if (argv[arg][1] == 'v') {
1239                        get_verbosely = 1;
1240                } else if (argv[arg][1] == 'w') {
1241                        write_ref = argv[arg + 1];
1242                        arg++;
1243                } else if (!strcmp(argv[arg], "--recover")) {
1244                        get_recover = 1;
1245                }
1246                arg++;
1247        }
1248        if (argc < arg + 2) {
1249                usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1250                return 1;
1251        }
1252        commit_id = argv[arg];
1253        url = argv[arg + 1];
1254        write_ref_log_details = url;
1255
1256        http_init();
1257
1258        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1259
1260        alt = xmalloc(sizeof(*alt));
1261        alt->base = url;
1262        alt->got_indices = 0;
1263        alt->packs = NULL;
1264        alt->next = NULL;
1265        path = strstr(url, "//");
1266        if (path) {
1267                path = strchr(path+2, '/');
1268                if (path)
1269                        alt->path_len = strlen(path);
1270        }
1271
1272        if (pull(commit_id))
1273                rc = 1;
1274
1275        http_cleanup();
1276
1277        curl_slist_free_all(no_pragma_header);
1278
1279        if (corrupt_object_found) {
1280                fprintf(stderr,
1281"Some loose object were found to be corrupt, but they might be just\n"
1282"a false '404 Not Found' error message sent with incorrect HTTP\n"
1283"status code.  Suggest running git fsck-objects.\n");
1284        }
1285        return rc;
1286}