http-fetch.con commit Merge branch 'jn/web' into next (811476d)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int got_alternates = -1;
  40static int corrupt_object_found = 0;
  41
  42static struct curl_slist *no_pragma_header;
  43
  44struct alt_base
  45{
  46        char *base;
  47        int path_len;
  48        int got_indices;
  49        struct packed_git *packs;
  50        struct alt_base *next;
  51};
  52
  53static struct alt_base *alt = NULL;
  54
  55enum object_request_state {
  56        WAITING,
  57        ABORTED,
  58        ACTIVE,
  59        COMPLETE,
  60};
  61
  62struct object_request
  63{
  64        unsigned char sha1[20];
  65        struct alt_base *repo;
  66        char *url;
  67        char filename[PATH_MAX];
  68        char tmpfile[PATH_MAX];
  69        int local;
  70        enum object_request_state state;
  71        CURLcode curl_result;
  72        char errorstr[CURL_ERROR_SIZE];
  73        long http_code;
  74        unsigned char real_sha1[20];
  75        SHA_CTX c;
  76        z_stream stream;
  77        int zret;
  78        int rename;
  79        struct active_request_slot *slot;
  80        struct object_request *next;
  81};
  82
  83struct alternates_request {
  84        char *base;
  85        char *url;
  86        struct buffer *buffer;
  87        struct active_request_slot *slot;
  88        int http_specific;
  89};
  90
  91#ifndef NO_EXPAT
  92struct xml_ctx
  93{
  94        char *name;
  95        int len;
  96        char *cdata;
  97        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
  98        void *userData;
  99};
 100
 101struct remote_ls_ctx
 102{
 103        struct alt_base *repo;
 104        char *path;
 105        void (*userFunc)(struct remote_ls_ctx *ls);
 106        void *userData;
 107        int flags;
 108        char *dentry_name;
 109        int dentry_flags;
 110        int rc;
 111        struct remote_ls_ctx *parent;
 112};
 113#endif
 114
 115static struct object_request *object_queue_head = NULL;
 116
 117static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 118                               void *data)
 119{
 120        unsigned char expn[4096];
 121        size_t size = eltsize * nmemb;
 122        int posn = 0;
 123        struct object_request *obj_req = (struct object_request *)data;
 124        do {
 125                ssize_t retval = write(obj_req->local,
 126                                       (char *) ptr + posn, size - posn);
 127                if (retval < 0)
 128                        return posn;
 129                posn += retval;
 130        } while (posn < size);
 131
 132        obj_req->stream.avail_in = size;
 133        obj_req->stream.next_in = ptr;
 134        do {
 135                obj_req->stream.next_out = expn;
 136                obj_req->stream.avail_out = sizeof(expn);
 137                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 138                SHA1_Update(&obj_req->c, expn,
 139                            sizeof(expn) - obj_req->stream.avail_out);
 140        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 141        data_received++;
 142        return size;
 143}
 144
 145static void fetch_alternates(char *base);
 146
 147static void process_object_response(void *callback_data);
 148
 149static void start_object_request(struct object_request *obj_req)
 150{
 151        char *hex = sha1_to_hex(obj_req->sha1);
 152        char prevfile[PATH_MAX];
 153        char *url;
 154        char *posn;
 155        int prevlocal;
 156        unsigned char prev_buf[PREV_BUF_SIZE];
 157        ssize_t prev_read = 0;
 158        long prev_posn = 0;
 159        char range[RANGE_HEADER_SIZE];
 160        struct curl_slist *range_header = NULL;
 161        struct active_request_slot *slot;
 162
 163        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 164        unlink(prevfile);
 165        rename(obj_req->tmpfile, prevfile);
 166        unlink(obj_req->tmpfile);
 167
 168        if (obj_req->local != -1)
 169                error("fd leakage in start: %d", obj_req->local);
 170        obj_req->local = open(obj_req->tmpfile,
 171                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 172        /* This could have failed due to the "lazy directory creation";
 173         * try to mkdir the last path component.
 174         */
 175        if (obj_req->local < 0 && errno == ENOENT) {
 176                char *dir = strrchr(obj_req->tmpfile, '/');
 177                if (dir) {
 178                        *dir = 0;
 179                        mkdir(obj_req->tmpfile, 0777);
 180                        *dir = '/';
 181                }
 182                obj_req->local = open(obj_req->tmpfile,
 183                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 184        }
 185
 186        if (obj_req->local < 0) {
 187                obj_req->state = ABORTED;
 188                error("Couldn't create temporary file %s for %s: %s",
 189                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 190                return;
 191        }
 192
 193        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 194
 195        inflateInit(&obj_req->stream);
 196
 197        SHA1_Init(&obj_req->c);
 198
 199        url = xmalloc(strlen(obj_req->repo->base) + 50);
 200        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 201        strcpy(url, obj_req->repo->base);
 202        posn = url + strlen(obj_req->repo->base);
 203        strcpy(posn, "objects/");
 204        posn += 8;
 205        memcpy(posn, hex, 2);
 206        posn += 2;
 207        *(posn++) = '/';
 208        strcpy(posn, hex + 2);
 209        strcpy(obj_req->url, url);
 210
 211        /* If a previous temp file is present, process what was already
 212           fetched. */
 213        prevlocal = open(prevfile, O_RDONLY);
 214        if (prevlocal != -1) {
 215                do {
 216                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 217                        if (prev_read>0) {
 218                                if (fwrite_sha1_file(prev_buf,
 219                                                     1,
 220                                                     prev_read,
 221                                                     obj_req) == prev_read) {
 222                                        prev_posn += prev_read;
 223                                } else {
 224                                        prev_read = -1;
 225                                }
 226                        }
 227                } while (prev_read > 0);
 228                close(prevlocal);
 229        }
 230        unlink(prevfile);
 231
 232        /* Reset inflate/SHA1 if there was an error reading the previous temp
 233           file; also rewind to the beginning of the local file. */
 234        if (prev_read == -1) {
 235                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 236                inflateInit(&obj_req->stream);
 237                SHA1_Init(&obj_req->c);
 238                if (prev_posn>0) {
 239                        prev_posn = 0;
 240                        lseek(obj_req->local, SEEK_SET, 0);
 241                        ftruncate(obj_req->local, 0);
 242                }
 243        }
 244
 245        slot = get_active_slot();
 246        slot->callback_func = process_object_response;
 247        slot->callback_data = obj_req;
 248        obj_req->slot = slot;
 249
 250        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 251        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 252        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 253        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 254        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 255
 256        /* If we have successfully processed data from a previous fetch
 257           attempt, only fetch the data we don't already have. */
 258        if (prev_posn>0) {
 259                if (get_verbosely)
 260                        fprintf(stderr,
 261                                "Resuming fetch of object %s at byte %ld\n",
 262                                hex, prev_posn);
 263                sprintf(range, "Range: bytes=%ld-", prev_posn);
 264                range_header = curl_slist_append(range_header, range);
 265                curl_easy_setopt(slot->curl,
 266                                 CURLOPT_HTTPHEADER, range_header);
 267        }
 268
 269        /* Try to get the request started, abort the request on error */
 270        obj_req->state = ACTIVE;
 271        if (!start_active_slot(slot)) {
 272                obj_req->state = ABORTED;
 273                obj_req->slot = NULL;
 274                close(obj_req->local); obj_req->local = -1;
 275                free(obj_req->url);
 276                return;
 277        }
 278}
 279
 280static void finish_object_request(struct object_request *obj_req)
 281{
 282        struct stat st;
 283
 284        fchmod(obj_req->local, 0444);
 285        close(obj_req->local); obj_req->local = -1;
 286
 287        if (obj_req->http_code == 416) {
 288                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 289        } else if (obj_req->curl_result != CURLE_OK) {
 290                if (stat(obj_req->tmpfile, &st) == 0)
 291                        if (st.st_size == 0)
 292                                unlink(obj_req->tmpfile);
 293                return;
 294        }
 295
 296        inflateEnd(&obj_req->stream);
 297        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 298        if (obj_req->zret != Z_STREAM_END) {
 299                unlink(obj_req->tmpfile);
 300                return;
 301        }
 302        if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 303                unlink(obj_req->tmpfile);
 304                return;
 305        }
 306        obj_req->rename =
 307                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 308
 309        if (obj_req->rename == 0)
 310                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 311}
 312
 313static void process_object_response(void *callback_data)
 314{
 315        struct object_request *obj_req =
 316                (struct object_request *)callback_data;
 317
 318        obj_req->curl_result = obj_req->slot->curl_result;
 319        obj_req->http_code = obj_req->slot->http_code;
 320        obj_req->slot = NULL;
 321        obj_req->state = COMPLETE;
 322
 323        /* Use alternates if necessary */
 324        if (obj_req->http_code == 404 ||
 325            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 326                fetch_alternates(alt->base);
 327                if (obj_req->repo->next != NULL) {
 328                        obj_req->repo =
 329                                obj_req->repo->next;
 330                        close(obj_req->local);
 331                        obj_req->local = -1;
 332                        start_object_request(obj_req);
 333                        return;
 334                }
 335        }
 336
 337        finish_object_request(obj_req);
 338}
 339
 340static void release_object_request(struct object_request *obj_req)
 341{
 342        struct object_request *entry = object_queue_head;
 343
 344        if (obj_req->local != -1)
 345                error("fd leakage in release: %d", obj_req->local);
 346        if (obj_req == object_queue_head) {
 347                object_queue_head = obj_req->next;
 348        } else {
 349                while (entry->next != NULL && entry->next != obj_req)
 350                        entry = entry->next;
 351                if (entry->next == obj_req)
 352                        entry->next = entry->next->next;
 353        }
 354
 355        free(obj_req->url);
 356        free(obj_req);
 357}
 358
 359#ifdef USE_CURL_MULTI
 360void fill_active_slots(void)
 361{
 362        struct object_request *obj_req = object_queue_head;
 363        struct active_request_slot *slot = active_queue_head;
 364        int num_transfers;
 365
 366        while (active_requests < max_requests && obj_req != NULL) {
 367                if (obj_req->state == WAITING) {
 368                        if (has_sha1_file(obj_req->sha1))
 369                                obj_req->state = COMPLETE;
 370                        else
 371                                start_object_request(obj_req);
 372                        curl_multi_perform(curlm, &num_transfers);
 373                }
 374                obj_req = obj_req->next;
 375        }
 376
 377        while (slot != NULL) {
 378                if (!slot->in_use && slot->curl != NULL) {
 379                        curl_easy_cleanup(slot->curl);
 380                        slot->curl = NULL;
 381                }
 382                slot = slot->next;
 383        }
 384}
 385#endif
 386
 387void prefetch(unsigned char *sha1)
 388{
 389        struct object_request *newreq;
 390        struct object_request *tail;
 391        char *filename = sha1_file_name(sha1);
 392
 393        newreq = xmalloc(sizeof(*newreq));
 394        memcpy(newreq->sha1, sha1, 20);
 395        newreq->repo = alt;
 396        newreq->url = NULL;
 397        newreq->local = -1;
 398        newreq->state = WAITING;
 399        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 400        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 401                 "%s.temp", filename);
 402        newreq->slot = NULL;
 403        newreq->next = NULL;
 404
 405        if (object_queue_head == NULL) {
 406                object_queue_head = newreq;
 407        } else {
 408                tail = object_queue_head;
 409                while (tail->next != NULL) {
 410                        tail = tail->next;
 411                }
 412                tail->next = newreq;
 413        }
 414
 415#ifdef USE_CURL_MULTI
 416        fill_active_slots();
 417        step_active_slots();
 418#endif
 419}
 420
 421static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 422{
 423        char *hex = sha1_to_hex(sha1);
 424        char *filename;
 425        char *url;
 426        char tmpfile[PATH_MAX];
 427        long prev_posn = 0;
 428        char range[RANGE_HEADER_SIZE];
 429        struct curl_slist *range_header = NULL;
 430
 431        FILE *indexfile;
 432        struct active_request_slot *slot;
 433        struct slot_results results;
 434
 435        if (has_pack_index(sha1))
 436                return 0;
 437
 438        if (get_verbosely)
 439                fprintf(stderr, "Getting index for pack %s\n", hex);
 440
 441        url = xmalloc(strlen(repo->base) + 64);
 442        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 443
 444        filename = sha1_pack_index_name(sha1);
 445        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 446        indexfile = fopen(tmpfile, "a");
 447        if (!indexfile)
 448                return error("Unable to open local file %s for pack index",
 449                             filename);
 450
 451        slot = get_active_slot();
 452        slot->results = &results;
 453        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 454        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 455        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 456        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 457        slot->local = indexfile;
 458
 459        /* If there is data present from a previous transfer attempt,
 460           resume where it left off */
 461        prev_posn = ftell(indexfile);
 462        if (prev_posn>0) {
 463                if (get_verbosely)
 464                        fprintf(stderr,
 465                                "Resuming fetch of index for pack %s at byte %ld\n",
 466                                hex, prev_posn);
 467                sprintf(range, "Range: bytes=%ld-", prev_posn);
 468                range_header = curl_slist_append(range_header, range);
 469                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 470        }
 471
 472        if (start_active_slot(slot)) {
 473                run_active_slot(slot);
 474                if (results.curl_result != CURLE_OK) {
 475                        fclose(indexfile);
 476                        return error("Unable to get pack index %s\n%s", url,
 477                                     curl_errorstr);
 478                }
 479        } else {
 480                fclose(indexfile);
 481                return error("Unable to start request");
 482        }
 483
 484        fclose(indexfile);
 485
 486        return move_temp_to_file(tmpfile, filename);
 487}
 488
 489static int setup_index(struct alt_base *repo, unsigned char *sha1)
 490{
 491        struct packed_git *new_pack;
 492        if (has_pack_file(sha1))
 493                return 0; // don't list this as something we can get
 494
 495        if (fetch_index(repo, sha1))
 496                return -1;
 497
 498        new_pack = parse_pack_index(sha1);
 499        new_pack->next = repo->packs;
 500        repo->packs = new_pack;
 501        return 0;
 502}
 503
 504static void process_alternates_response(void *callback_data)
 505{
 506        struct alternates_request *alt_req =
 507                (struct alternates_request *)callback_data;
 508        struct active_request_slot *slot = alt_req->slot;
 509        struct alt_base *tail = alt;
 510        char *base = alt_req->base;
 511        static const char null_byte = '\0';
 512        char *data;
 513        int i = 0;
 514
 515        if (alt_req->http_specific) {
 516                if (slot->curl_result != CURLE_OK ||
 517                    !alt_req->buffer->posn) {
 518
 519                        /* Try reusing the slot to get non-http alternates */
 520                        alt_req->http_specific = 0;
 521                        sprintf(alt_req->url, "%s/objects/info/alternates",
 522                                base);
 523                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 524                                         alt_req->url);
 525                        active_requests++;
 526                        slot->in_use = 1;
 527                        if (slot->finished != NULL)
 528                                (*slot->finished) = 0;
 529                        if (!start_active_slot(slot)) {
 530                                got_alternates = -1;
 531                                slot->in_use = 0;
 532                                if (slot->finished != NULL)
 533                                        (*slot->finished) = 1;
 534                        }
 535                        return;
 536                }
 537        } else if (slot->curl_result != CURLE_OK) {
 538                if (slot->http_code != 404 &&
 539                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 540                        got_alternates = -1;
 541                        return;
 542                }
 543        }
 544
 545        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 546        alt_req->buffer->posn--;
 547        data = alt_req->buffer->buffer;
 548
 549        while (i < alt_req->buffer->posn) {
 550                int posn = i;
 551                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 552                        posn++;
 553                if (data[posn] == '\n') {
 554                        int okay = 0;
 555                        int serverlen = 0;
 556                        struct alt_base *newalt;
 557                        char *target = NULL;
 558                        char *path;
 559                        if (data[i] == '/') {
 560                                serverlen = strchr(base + 8, '/') - base;
 561                                okay = 1;
 562                        } else if (!memcmp(data + i, "../", 3)) {
 563                                i += 3;
 564                                serverlen = strlen(base);
 565                                while (i + 2 < posn &&
 566                                       !memcmp(data + i, "../", 3)) {
 567                                        do {
 568                                                serverlen--;
 569                                        } while (serverlen &&
 570                                                 base[serverlen - 1] != '/');
 571                                        i += 3;
 572                                }
 573                                // If the server got removed, give up.
 574                                okay = strchr(base, ':') - base + 3 <
 575                                        serverlen;
 576                        } else if (alt_req->http_specific) {
 577                                char *colon = strchr(data + i, ':');
 578                                char *slash = strchr(data + i, '/');
 579                                if (colon && slash && colon < data + posn &&
 580                                    slash < data + posn && colon < slash) {
 581                                        okay = 1;
 582                                }
 583                        }
 584                        // skip 'objects' at end
 585                        if (okay) {
 586                                target = xmalloc(serverlen + posn - i - 6);
 587                                safe_strncpy(target, base, serverlen);
 588                                safe_strncpy(target + serverlen, data + i, posn - i - 6);
 589                                if (get_verbosely)
 590                                        fprintf(stderr,
 591                                                "Also look at %s\n", target);
 592                                newalt = xmalloc(sizeof(*newalt));
 593                                newalt->next = NULL;
 594                                newalt->base = target;
 595                                newalt->got_indices = 0;
 596                                newalt->packs = NULL;
 597                                path = strstr(target, "//");
 598                                if (path) {
 599                                        path = strchr(path+2, '/');
 600                                        if (path)
 601                                                newalt->path_len = strlen(path);
 602                                }
 603
 604                                while (tail->next != NULL)
 605                                        tail = tail->next;
 606                                tail->next = newalt;
 607                        }
 608                }
 609                i = posn + 1;
 610        }
 611
 612        got_alternates = 1;
 613}
 614
 615static void fetch_alternates(char *base)
 616{
 617        struct buffer buffer;
 618        char *url;
 619        char *data;
 620        struct active_request_slot *slot;
 621        struct alternates_request alt_req;
 622
 623        /* If another request has already started fetching alternates,
 624           wait for them to arrive and return to processing this request's
 625           curl message */
 626#ifdef USE_CURL_MULTI
 627        while (got_alternates == 0) {
 628                step_active_slots();
 629        }
 630#endif
 631
 632        /* Nothing to do if they've already been fetched */
 633        if (got_alternates == 1)
 634                return;
 635
 636        /* Start the fetch */
 637        got_alternates = 0;
 638
 639        data = xmalloc(4096);
 640        buffer.size = 4096;
 641        buffer.posn = 0;
 642        buffer.buffer = data;
 643
 644        if (get_verbosely)
 645                fprintf(stderr, "Getting alternates list for %s\n", base);
 646
 647        url = xmalloc(strlen(base) + 31);
 648        sprintf(url, "%s/objects/info/http-alternates", base);
 649
 650        /* Use a callback to process the result, since another request
 651           may fail and need to have alternates loaded before continuing */
 652        slot = get_active_slot();
 653        slot->callback_func = process_alternates_response;
 654        slot->callback_data = &alt_req;
 655
 656        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 657        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 658        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 659
 660        alt_req.base = base;
 661        alt_req.url = url;
 662        alt_req.buffer = &buffer;
 663        alt_req.http_specific = 1;
 664        alt_req.slot = slot;
 665
 666        if (start_active_slot(slot))
 667                run_active_slot(slot);
 668        else
 669                got_alternates = -1;
 670
 671        free(data);
 672        free(url);
 673}
 674
 675#ifndef NO_EXPAT
 676static void
 677xml_start_tag(void *userData, const char *name, const char **atts)
 678{
 679        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 680        const char *c = strchr(name, ':');
 681        int new_len;
 682
 683        if (c == NULL)
 684                c = name;
 685        else
 686                c++;
 687
 688        new_len = strlen(ctx->name) + strlen(c) + 2;
 689
 690        if (new_len > ctx->len) {
 691                ctx->name = xrealloc(ctx->name, new_len);
 692                ctx->len = new_len;
 693        }
 694        strcat(ctx->name, ".");
 695        strcat(ctx->name, c);
 696
 697        if (ctx->cdata) {
 698                free(ctx->cdata);
 699                ctx->cdata = NULL;
 700        }
 701
 702        ctx->userFunc(ctx, 0);
 703}
 704
 705static void
 706xml_end_tag(void *userData, const char *name)
 707{
 708        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 709        const char *c = strchr(name, ':');
 710        char *ep;
 711
 712        ctx->userFunc(ctx, 1);
 713
 714        if (c == NULL)
 715                c = name;
 716        else
 717                c++;
 718
 719        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 720        *ep = 0;
 721}
 722
 723static void
 724xml_cdata(void *userData, const XML_Char *s, int len)
 725{
 726        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 727        if (ctx->cdata)
 728                free(ctx->cdata);
 729        ctx->cdata = xmalloc(len + 1);
 730        safe_strncpy(ctx->cdata, s, len + 1);
 731}
 732
 733static int remote_ls(struct alt_base *repo, const char *path, int flags,
 734                     void (*userFunc)(struct remote_ls_ctx *ls),
 735                     void *userData);
 736
 737static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 738{
 739        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 740
 741        if (tag_closed) {
 742                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 743                        if (ls->dentry_flags & IS_DIR) {
 744                                if (ls->flags & PROCESS_DIRS) {
 745                                        ls->userFunc(ls);
 746                                }
 747                                if (strcmp(ls->dentry_name, ls->path) &&
 748                                    ls->flags & RECURSIVE) {
 749                                        ls->rc = remote_ls(ls->repo,
 750                                                           ls->dentry_name,
 751                                                           ls->flags,
 752                                                           ls->userFunc,
 753                                                           ls->userData);
 754                                }
 755                        } else if (ls->flags & PROCESS_FILES) {
 756                                ls->userFunc(ls);
 757                        }
 758                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 759                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 760                                                  ls->repo->path_len + 1);
 761                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 762                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 763                        ls->dentry_flags |= IS_DIR;
 764                }
 765        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 766                if (ls->dentry_name) {
 767                        free(ls->dentry_name);
 768                }
 769                ls->dentry_name = NULL;
 770                ls->dentry_flags = 0;
 771        }
 772}
 773
 774static int remote_ls(struct alt_base *repo, const char *path, int flags,
 775                     void (*userFunc)(struct remote_ls_ctx *ls),
 776                     void *userData)
 777{
 778        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 779        struct active_request_slot *slot;
 780        struct slot_results results;
 781        struct buffer in_buffer;
 782        struct buffer out_buffer;
 783        char *in_data;
 784        char *out_data;
 785        XML_Parser parser = XML_ParserCreate(NULL);
 786        enum XML_Status result;
 787        struct curl_slist *dav_headers = NULL;
 788        struct xml_ctx ctx;
 789        struct remote_ls_ctx ls;
 790
 791        ls.flags = flags;
 792        ls.repo = repo;
 793        ls.path = strdup(path);
 794        ls.dentry_name = NULL;
 795        ls.dentry_flags = 0;
 796        ls.userData = userData;
 797        ls.userFunc = userFunc;
 798        ls.rc = 0;
 799
 800        sprintf(url, "%s%s", repo->base, path);
 801
 802        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 803        out_data = xmalloc(out_buffer.size + 1);
 804        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 805        out_buffer.posn = 0;
 806        out_buffer.buffer = out_data;
 807
 808        in_buffer.size = 4096;
 809        in_data = xmalloc(in_buffer.size);
 810        in_buffer.posn = 0;
 811        in_buffer.buffer = in_data;
 812
 813        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 814        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 815
 816        slot = get_active_slot();
 817        slot->results = &results;
 818        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 819        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 820        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 821        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 822        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 823        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 824        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 825        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 826        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 827
 828        if (start_active_slot(slot)) {
 829                run_active_slot(slot);
 830                if (results.curl_result == CURLE_OK) {
 831                        ctx.name = xcalloc(10, 1);
 832                        ctx.len = 0;
 833                        ctx.cdata = NULL;
 834                        ctx.userFunc = handle_remote_ls_ctx;
 835                        ctx.userData = &ls;
 836                        XML_SetUserData(parser, &ctx);
 837                        XML_SetElementHandler(parser, xml_start_tag,
 838                                              xml_end_tag);
 839                        XML_SetCharacterDataHandler(parser, xml_cdata);
 840                        result = XML_Parse(parser, in_buffer.buffer,
 841                                           in_buffer.posn, 1);
 842                        free(ctx.name);
 843
 844                        if (result != XML_STATUS_OK) {
 845                                ls.rc = error("XML error: %s",
 846                                              XML_ErrorString(
 847                                                      XML_GetErrorCode(parser)));
 848                        }
 849                } else {
 850                        ls.rc = -1;
 851                }
 852        } else {
 853                ls.rc = error("Unable to start PROPFIND request");
 854        }
 855
 856        free(ls.path);
 857        free(url);
 858        free(out_data);
 859        free(in_buffer.buffer);
 860        curl_slist_free_all(dav_headers);
 861
 862        return ls.rc;
 863}
 864
 865static void process_ls_pack(struct remote_ls_ctx *ls)
 866{
 867        unsigned char sha1[20];
 868
 869        if (strlen(ls->dentry_name) == 63 &&
 870            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 871            !strncmp(ls->dentry_name+58, ".pack", 5)) {
 872                get_sha1_hex(ls->dentry_name + 18, sha1);
 873                setup_index(ls->repo, sha1);
 874        }
 875}
 876#endif
 877
 878static int fetch_indices(struct alt_base *repo)
 879{
 880        unsigned char sha1[20];
 881        char *url;
 882        struct buffer buffer;
 883        char *data;
 884        int i = 0;
 885
 886        struct active_request_slot *slot;
 887        struct slot_results results;
 888
 889        if (repo->got_indices)
 890                return 0;
 891
 892        data = xmalloc(4096);
 893        buffer.size = 4096;
 894        buffer.posn = 0;
 895        buffer.buffer = data;
 896
 897        if (get_verbosely)
 898                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 899
 900#ifndef NO_EXPAT
 901        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 902                      process_ls_pack, NULL) == 0)
 903                return 0;
 904#endif
 905
 906        url = xmalloc(strlen(repo->base) + 21);
 907        sprintf(url, "%s/objects/info/packs", repo->base);
 908
 909        slot = get_active_slot();
 910        slot->results = &results;
 911        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 912        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 913        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 914        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 915        if (start_active_slot(slot)) {
 916                run_active_slot(slot);
 917                if (results.curl_result != CURLE_OK) {
 918                        if (results.http_code == 404 ||
 919                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 920                                repo->got_indices = 1;
 921                                free(buffer.buffer);
 922                                return 0;
 923                        } else {
 924                                repo->got_indices = 0;
 925                                free(buffer.buffer);
 926                                return error("%s", curl_errorstr);
 927                        }
 928                }
 929        } else {
 930                repo->got_indices = 0;
 931                free(buffer.buffer);
 932                return error("Unable to start request");
 933        }
 934
 935        data = buffer.buffer;
 936        while (i < buffer.posn) {
 937                switch (data[i]) {
 938                case 'P':
 939                        i++;
 940                        if (i + 52 <= buffer.posn &&
 941                            !strncmp(data + i, " pack-", 6) &&
 942                            !strncmp(data + i + 46, ".pack\n", 6)) {
 943                                get_sha1_hex(data + i + 6, sha1);
 944                                setup_index(repo, sha1);
 945                                i += 51;
 946                                break;
 947                        }
 948                default:
 949                        while (i < buffer.posn && data[i] != '\n')
 950                                i++;
 951                }
 952                i++;
 953        }
 954
 955        free(buffer.buffer);
 956        repo->got_indices = 1;
 957        return 0;
 958}
 959
 960static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 961{
 962        char *url;
 963        struct packed_git *target;
 964        struct packed_git **lst;
 965        FILE *packfile;
 966        char *filename;
 967        char tmpfile[PATH_MAX];
 968        int ret;
 969        long prev_posn = 0;
 970        char range[RANGE_HEADER_SIZE];
 971        struct curl_slist *range_header = NULL;
 972
 973        struct active_request_slot *slot;
 974        struct slot_results results;
 975
 976        if (fetch_indices(repo))
 977                return -1;
 978        target = find_sha1_pack(sha1, repo->packs);
 979        if (!target)
 980                return -1;
 981
 982        if (get_verbosely) {
 983                fprintf(stderr, "Getting pack %s\n",
 984                        sha1_to_hex(target->sha1));
 985                fprintf(stderr, " which contains %s\n",
 986                        sha1_to_hex(sha1));
 987        }
 988
 989        url = xmalloc(strlen(repo->base) + 65);
 990        sprintf(url, "%s/objects/pack/pack-%s.pack",
 991                repo->base, sha1_to_hex(target->sha1));
 992
 993        filename = sha1_pack_name(target->sha1);
 994        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 995        packfile = fopen(tmpfile, "a");
 996        if (!packfile)
 997                return error("Unable to open local file %s for pack",
 998                             filename);
 999
1000        slot = get_active_slot();
1001        slot->results = &results;
1002        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1003        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1004        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1005        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1006        slot->local = packfile;
1007
1008        /* If there is data present from a previous transfer attempt,
1009           resume where it left off */
1010        prev_posn = ftell(packfile);
1011        if (prev_posn>0) {
1012                if (get_verbosely)
1013                        fprintf(stderr,
1014                                "Resuming fetch of pack %s at byte %ld\n",
1015                                sha1_to_hex(target->sha1), prev_posn);
1016                sprintf(range, "Range: bytes=%ld-", prev_posn);
1017                range_header = curl_slist_append(range_header, range);
1018                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1019        }
1020
1021        if (start_active_slot(slot)) {
1022                run_active_slot(slot);
1023                if (results.curl_result != CURLE_OK) {
1024                        fclose(packfile);
1025                        return error("Unable to get pack file %s\n%s", url,
1026                                     curl_errorstr);
1027                }
1028        } else {
1029                fclose(packfile);
1030                return error("Unable to start request");
1031        }
1032
1033        fclose(packfile);
1034
1035        ret = move_temp_to_file(tmpfile, filename);
1036        if (ret)
1037                return ret;
1038
1039        lst = &repo->packs;
1040        while (*lst != target)
1041                lst = &((*lst)->next);
1042        *lst = (*lst)->next;
1043
1044        if (verify_pack(target, 0))
1045                return -1;
1046        install_packed_git(target);
1047
1048        return 0;
1049}
1050
1051static void abort_object_request(struct object_request *obj_req)
1052{
1053        if (obj_req->local >= 0) {
1054                close(obj_req->local);
1055                obj_req->local = -1;
1056        }
1057        unlink(obj_req->tmpfile);
1058        if (obj_req->slot) {
1059                release_active_slot(obj_req->slot);
1060                obj_req->slot = NULL;
1061        }
1062        release_object_request(obj_req);
1063}
1064
1065static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1066{
1067        char *hex = sha1_to_hex(sha1);
1068        int ret = 0;
1069        struct object_request *obj_req = object_queue_head;
1070
1071        while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1072                obj_req = obj_req->next;
1073        if (obj_req == NULL)
1074                return error("Couldn't find request for %s in the queue", hex);
1075
1076        if (has_sha1_file(obj_req->sha1)) {
1077                abort_object_request(obj_req);
1078                return 0;
1079        }
1080
1081#ifdef USE_CURL_MULTI
1082        while (obj_req->state == WAITING) {
1083                step_active_slots();
1084        }
1085#else
1086        start_object_request(obj_req);
1087#endif
1088
1089        while (obj_req->state == ACTIVE) {
1090                run_active_slot(obj_req->slot);
1091        }
1092        if (obj_req->local != -1) {
1093                close(obj_req->local); obj_req->local = -1;
1094        }
1095
1096        if (obj_req->state == ABORTED) {
1097                ret = error("Request for %s aborted", hex);
1098        } else if (obj_req->curl_result != CURLE_OK &&
1099                   obj_req->http_code != 416) {
1100                if (obj_req->http_code == 404 ||
1101                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1102                        ret = -1; /* Be silent, it is probably in a pack. */
1103                else
1104                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1105                                    obj_req->errorstr, obj_req->curl_result,
1106                                    obj_req->http_code, hex);
1107        } else if (obj_req->zret != Z_STREAM_END) {
1108                corrupt_object_found++;
1109                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1110        } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1111                ret = error("File %s has bad hash", hex);
1112        } else if (obj_req->rename < 0) {
1113                ret = error("unable to write sha1 filename %s",
1114                            obj_req->filename);
1115        }
1116
1117        release_object_request(obj_req);
1118        return ret;
1119}
1120
1121int fetch(unsigned char *sha1)
1122{
1123        struct alt_base *altbase = alt;
1124
1125        if (!fetch_object(altbase, sha1))
1126                return 0;
1127        while (altbase) {
1128                if (!fetch_pack(altbase, sha1))
1129                        return 0;
1130                fetch_alternates(alt->base);
1131                altbase = altbase->next;
1132        }
1133        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1134                     alt->base);
1135}
1136
1137static inline int needs_quote(int ch)
1138{
1139        if (((ch >= 'A') && (ch <= 'Z'))
1140                        || ((ch >= 'a') && (ch <= 'z'))
1141                        || ((ch >= '0') && (ch <= '9'))
1142                        || (ch == '/')
1143                        || (ch == '-')
1144                        || (ch == '.'))
1145                return 0;
1146        return 1;
1147}
1148
1149static inline int hex(int v)
1150{
1151        if (v < 10) return '0' + v;
1152        else return 'A' + v - 10;
1153}
1154
1155static char *quote_ref_url(const char *base, const char *ref)
1156{
1157        const char *cp;
1158        char *dp, *qref;
1159        int len, baselen, ch;
1160
1161        baselen = strlen(base);
1162        len = baselen + 6; /* "refs/" + NUL */
1163        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164                if (needs_quote(ch))
1165                        len += 2; /* extra two hex plus replacement % */
1166        qref = xmalloc(len);
1167        memcpy(qref, base, baselen);
1168        memcpy(qref + baselen, "refs/", 5);
1169        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170                if (needs_quote(ch)) {
1171                        *dp++ = '%';
1172                        *dp++ = hex((ch >> 4) & 0xF);
1173                        *dp++ = hex(ch & 0xF);
1174                }
1175                else
1176                        *dp++ = ch;
1177        }
1178        *dp = 0;
1179
1180        return qref;
1181}
1182
1183int fetch_ref(char *ref, unsigned char *sha1)
1184{
1185        char *url;
1186        char hex[42];
1187        struct buffer buffer;
1188        char *base = alt->base;
1189        struct active_request_slot *slot;
1190        struct slot_results results;
1191        buffer.size = 41;
1192        buffer.posn = 0;
1193        buffer.buffer = hex;
1194        hex[41] = '\0';
1195
1196        url = quote_ref_url(base, ref);
1197        slot = get_active_slot();
1198        slot->results = &results;
1199        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203        if (start_active_slot(slot)) {
1204                run_active_slot(slot);
1205                if (results.curl_result != CURLE_OK)
1206                        return error("Couldn't get %s for %s\n%s",
1207                                     url, ref, curl_errorstr);
1208        } else {
1209                return error("Unable to start request");
1210        }
1211
1212        hex[40] = '\0';
1213        get_sha1_hex(hex, sha1);
1214        return 0;
1215}
1216
1217int main(int argc, char **argv)
1218{
1219        char *commit_id;
1220        char *url;
1221        char *path;
1222        int arg = 1;
1223        int rc = 0;
1224
1225        setup_git_directory();
1226        git_config(git_default_config);
1227
1228        while (arg < argc && argv[arg][0] == '-') {
1229                if (argv[arg][1] == 't') {
1230                        get_tree = 1;
1231                } else if (argv[arg][1] == 'c') {
1232                        get_history = 1;
1233                } else if (argv[arg][1] == 'a') {
1234                        get_all = 1;
1235                        get_tree = 1;
1236                        get_history = 1;
1237                } else if (argv[arg][1] == 'v') {
1238                        get_verbosely = 1;
1239                } else if (argv[arg][1] == 'w') {
1240                        write_ref = argv[arg + 1];
1241                        arg++;
1242                } else if (!strcmp(argv[arg], "--recover")) {
1243                        get_recover = 1;
1244                }
1245                arg++;
1246        }
1247        if (argc < arg + 2) {
1248                usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1249                return 1;
1250        }
1251        commit_id = argv[arg];
1252        url = argv[arg + 1];
1253        write_ref_log_details = url;
1254
1255        http_init();
1256
1257        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1258
1259        alt = xmalloc(sizeof(*alt));
1260        alt->base = url;
1261        alt->got_indices = 0;
1262        alt->packs = NULL;
1263        alt->next = NULL;
1264        path = strstr(url, "//");
1265        if (path) {
1266                path = strchr(path+2, '/');
1267                if (path)
1268                        alt->path_len = strlen(path);
1269        }
1270
1271        if (pull(commit_id))
1272                rc = 1;
1273
1274        http_cleanup();
1275
1276        curl_slist_free_all(no_pragma_header);
1277
1278        if (corrupt_object_found) {
1279                fprintf(stderr,
1280"Some loose object were found to be corrupt, but they might be just\n"
1281"a false '404 Not Found' error message sent with incorrect HTTP\n"
1282"status code.  Suggest running git fsck-objects.\n");
1283        }
1284        return rc;
1285}