http-fetch.con commit simple euristic for further free packing improvements (4e8da19)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int got_alternates = -1;
  40static int corrupt_object_found = 0;
  41
  42static struct curl_slist *no_pragma_header;
  43
  44struct alt_base
  45{
  46        char *base;
  47        int path_len;
  48        int got_indices;
  49        struct packed_git *packs;
  50        struct alt_base *next;
  51};
  52
  53static struct alt_base *alt = NULL;
  54
  55enum object_request_state {
  56        WAITING,
  57        ABORTED,
  58        ACTIVE,
  59        COMPLETE,
  60};
  61
  62struct object_request
  63{
  64        unsigned char sha1[20];
  65        struct alt_base *repo;
  66        char *url;
  67        char filename[PATH_MAX];
  68        char tmpfile[PATH_MAX];
  69        int local;
  70        enum object_request_state state;
  71        CURLcode curl_result;
  72        char errorstr[CURL_ERROR_SIZE];
  73        long http_code;
  74        unsigned char real_sha1[20];
  75        SHA_CTX c;
  76        z_stream stream;
  77        int zret;
  78        int rename;
  79        struct active_request_slot *slot;
  80        struct object_request *next;
  81};
  82
  83struct alternates_request {
  84        char *base;
  85        char *url;
  86        struct buffer *buffer;
  87        struct active_request_slot *slot;
  88        int http_specific;
  89};
  90
  91#ifndef NO_EXPAT
  92struct xml_ctx
  93{
  94        char *name;
  95        int len;
  96        char *cdata;
  97        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
  98        void *userData;
  99};
 100
 101struct remote_ls_ctx
 102{
 103        struct alt_base *repo;
 104        char *path;
 105        void (*userFunc)(struct remote_ls_ctx *ls);
 106        void *userData;
 107        int flags;
 108        char *dentry_name;
 109        int dentry_flags;
 110        int rc;
 111        struct remote_ls_ctx *parent;
 112};
 113#endif
 114
 115static struct object_request *object_queue_head = NULL;
 116
 117static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 118                               void *data)
 119{
 120        unsigned char expn[4096];
 121        size_t size = eltsize * nmemb;
 122        int posn = 0;
 123        struct object_request *obj_req = (struct object_request *)data;
 124        do {
 125                ssize_t retval = write(obj_req->local,
 126                                       ptr + posn, size - posn);
 127                if (retval < 0)
 128                        return posn;
 129                posn += retval;
 130        } while (posn < size);
 131
 132        obj_req->stream.avail_in = size;
 133        obj_req->stream.next_in = ptr;
 134        do {
 135                obj_req->stream.next_out = expn;
 136                obj_req->stream.avail_out = sizeof(expn);
 137                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 138                SHA1_Update(&obj_req->c, expn,
 139                            sizeof(expn) - obj_req->stream.avail_out);
 140        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 141        data_received++;
 142        return size;
 143}
 144
 145static void fetch_alternates(char *base);
 146
 147static void process_object_response(void *callback_data);
 148
 149static void start_object_request(struct object_request *obj_req)
 150{
 151        char *hex = sha1_to_hex(obj_req->sha1);
 152        char prevfile[PATH_MAX];
 153        char *url;
 154        char *posn;
 155        int prevlocal;
 156        unsigned char prev_buf[PREV_BUF_SIZE];
 157        ssize_t prev_read = 0;
 158        long prev_posn = 0;
 159        char range[RANGE_HEADER_SIZE];
 160        struct curl_slist *range_header = NULL;
 161        struct active_request_slot *slot;
 162
 163        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 164        unlink(prevfile);
 165        rename(obj_req->tmpfile, prevfile);
 166        unlink(obj_req->tmpfile);
 167
 168        if (obj_req->local != -1)
 169                error("fd leakage in start: %d", obj_req->local);
 170        obj_req->local = open(obj_req->tmpfile,
 171                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 172        /* This could have failed due to the "lazy directory creation";
 173         * try to mkdir the last path component.
 174         */
 175        if (obj_req->local < 0 && errno == ENOENT) {
 176                char *dir = strrchr(obj_req->tmpfile, '/');
 177                if (dir) {
 178                        *dir = 0;
 179                        mkdir(obj_req->tmpfile, 0777);
 180                        *dir = '/';
 181                }
 182                obj_req->local = open(obj_req->tmpfile,
 183                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 184        }
 185
 186        if (obj_req->local < 0) {
 187                obj_req->state = ABORTED;
 188                error("Couldn't create temporary file %s for %s: %s",
 189                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 190                return;
 191        }
 192
 193        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 194
 195        inflateInit(&obj_req->stream);
 196
 197        SHA1_Init(&obj_req->c);
 198
 199        url = xmalloc(strlen(obj_req->repo->base) + 50);
 200        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 201        strcpy(url, obj_req->repo->base);
 202        posn = url + strlen(obj_req->repo->base);
 203        strcpy(posn, "objects/");
 204        posn += 8;
 205        memcpy(posn, hex, 2);
 206        posn += 2;
 207        *(posn++) = '/';
 208        strcpy(posn, hex + 2);
 209        strcpy(obj_req->url, url);
 210
 211        /* If a previous temp file is present, process what was already
 212           fetched. */
 213        prevlocal = open(prevfile, O_RDONLY);
 214        if (prevlocal != -1) {
 215                do {
 216                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 217                        if (prev_read>0) {
 218                                if (fwrite_sha1_file(prev_buf,
 219                                                     1,
 220                                                     prev_read,
 221                                                     obj_req) == prev_read) {
 222                                        prev_posn += prev_read;
 223                                } else {
 224                                        prev_read = -1;
 225                                }
 226                        }
 227                } while (prev_read > 0);
 228                close(prevlocal);
 229        }
 230        unlink(prevfile);
 231
 232        /* Reset inflate/SHA1 if there was an error reading the previous temp
 233           file; also rewind to the beginning of the local file. */
 234        if (prev_read == -1) {
 235                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 236                inflateInit(&obj_req->stream);
 237                SHA1_Init(&obj_req->c);
 238                if (prev_posn>0) {
 239                        prev_posn = 0;
 240                        lseek(obj_req->local, SEEK_SET, 0);
 241                        ftruncate(obj_req->local, 0);
 242                }
 243        }
 244
 245        slot = get_active_slot();
 246        slot->callback_func = process_object_response;
 247        slot->callback_data = obj_req;
 248        obj_req->slot = slot;
 249
 250        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 251        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 252        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 253        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 254        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 255
 256        /* If we have successfully processed data from a previous fetch
 257           attempt, only fetch the data we don't already have. */
 258        if (prev_posn>0) {
 259                if (get_verbosely)
 260                        fprintf(stderr,
 261                                "Resuming fetch of object %s at byte %ld\n",
 262                                hex, prev_posn);
 263                sprintf(range, "Range: bytes=%ld-", prev_posn);
 264                range_header = curl_slist_append(range_header, range);
 265                curl_easy_setopt(slot->curl,
 266                                 CURLOPT_HTTPHEADER, range_header);
 267        }
 268
 269        /* Try to get the request started, abort the request on error */
 270        obj_req->state = ACTIVE;
 271        if (!start_active_slot(slot)) {
 272                obj_req->state = ABORTED;
 273                obj_req->slot = NULL;
 274                close(obj_req->local); obj_req->local = -1;
 275                free(obj_req->url);
 276                return;
 277        }
 278}
 279
 280static void finish_object_request(struct object_request *obj_req)
 281{
 282        struct stat st;
 283
 284        fchmod(obj_req->local, 0444);
 285        close(obj_req->local); obj_req->local = -1;
 286
 287        if (obj_req->http_code == 416) {
 288                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 289        } else if (obj_req->curl_result != CURLE_OK) {
 290                if (stat(obj_req->tmpfile, &st) == 0)
 291                        if (st.st_size == 0)
 292                                unlink(obj_req->tmpfile);
 293                return;
 294        }
 295
 296        inflateEnd(&obj_req->stream);
 297        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 298        if (obj_req->zret != Z_STREAM_END) {
 299                unlink(obj_req->tmpfile);
 300                return;
 301        }
 302        if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 303                unlink(obj_req->tmpfile);
 304                return;
 305        }
 306        obj_req->rename =
 307                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 308
 309        if (obj_req->rename == 0)
 310                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 311}
 312
 313static void process_object_response(void *callback_data)
 314{
 315        struct object_request *obj_req =
 316                (struct object_request *)callback_data;
 317
 318        obj_req->curl_result = obj_req->slot->curl_result;
 319        obj_req->http_code = obj_req->slot->http_code;
 320        obj_req->slot = NULL;
 321        obj_req->state = COMPLETE;
 322
 323        /* Use alternates if necessary */
 324        if (obj_req->http_code == 404 ||
 325            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 326                fetch_alternates(alt->base);
 327                if (obj_req->repo->next != NULL) {
 328                        obj_req->repo =
 329                                obj_req->repo->next;
 330                        close(obj_req->local);
 331                        obj_req->local = -1;
 332                        start_object_request(obj_req);
 333                        return;
 334                }
 335        }
 336
 337        finish_object_request(obj_req);
 338}
 339
 340static void release_object_request(struct object_request *obj_req)
 341{
 342        struct object_request *entry = object_queue_head;
 343
 344        if (obj_req->local != -1)
 345                error("fd leakage in release: %d", obj_req->local);
 346        if (obj_req == object_queue_head) {
 347                object_queue_head = obj_req->next;
 348        } else {
 349                while (entry->next != NULL && entry->next != obj_req)
 350                        entry = entry->next;
 351                if (entry->next == obj_req)
 352                        entry->next = entry->next->next;
 353        }
 354
 355        free(obj_req->url);
 356        free(obj_req);
 357}
 358
 359#ifdef USE_CURL_MULTI
 360void fill_active_slots(void)
 361{
 362        struct object_request *obj_req = object_queue_head;
 363        struct active_request_slot *slot = active_queue_head;
 364        int num_transfers;
 365
 366        while (active_requests < max_requests && obj_req != NULL) {
 367                if (obj_req->state == WAITING) {
 368                        if (has_sha1_file(obj_req->sha1))
 369                                obj_req->state = COMPLETE;
 370                        else
 371                                start_object_request(obj_req);
 372                        curl_multi_perform(curlm, &num_transfers);
 373                }
 374                obj_req = obj_req->next;
 375        }
 376
 377        while (slot != NULL) {
 378                if (!slot->in_use && slot->curl != NULL) {
 379                        curl_easy_cleanup(slot->curl);
 380                        slot->curl = NULL;
 381                }
 382                slot = slot->next;
 383        }
 384}
 385#endif
 386
 387void prefetch(unsigned char *sha1)
 388{
 389        struct object_request *newreq;
 390        struct object_request *tail;
 391        char *filename = sha1_file_name(sha1);
 392
 393        newreq = xmalloc(sizeof(*newreq));
 394        memcpy(newreq->sha1, sha1, 20);
 395        newreq->repo = alt;
 396        newreq->url = NULL;
 397        newreq->local = -1;
 398        newreq->state = WAITING;
 399        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 400        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 401                 "%s.temp", filename);
 402        newreq->next = NULL;
 403
 404        if (object_queue_head == NULL) {
 405                object_queue_head = newreq;
 406        } else {
 407                tail = object_queue_head;
 408                while (tail->next != NULL) {
 409                        tail = tail->next;
 410                }
 411                tail->next = newreq;
 412        }
 413
 414#ifdef USE_CURL_MULTI
 415        fill_active_slots();
 416        step_active_slots();
 417#endif
 418}
 419
 420static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 421{
 422        char *hex = sha1_to_hex(sha1);
 423        char *filename;
 424        char *url;
 425        char tmpfile[PATH_MAX];
 426        long prev_posn = 0;
 427        char range[RANGE_HEADER_SIZE];
 428        struct curl_slist *range_header = NULL;
 429
 430        FILE *indexfile;
 431        struct active_request_slot *slot;
 432        struct slot_results results;
 433
 434        if (has_pack_index(sha1))
 435                return 0;
 436
 437        if (get_verbosely)
 438                fprintf(stderr, "Getting index for pack %s\n", hex);
 439
 440        url = xmalloc(strlen(repo->base) + 64);
 441        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 442
 443        filename = sha1_pack_index_name(sha1);
 444        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 445        indexfile = fopen(tmpfile, "a");
 446        if (!indexfile)
 447                return error("Unable to open local file %s for pack index",
 448                             filename);
 449
 450        slot = get_active_slot();
 451        slot->results = &results;
 452        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 453        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 454        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 455        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 456        slot->local = indexfile;
 457
 458        /* If there is data present from a previous transfer attempt,
 459           resume where it left off */
 460        prev_posn = ftell(indexfile);
 461        if (prev_posn>0) {
 462                if (get_verbosely)
 463                        fprintf(stderr,
 464                                "Resuming fetch of index for pack %s at byte %ld\n",
 465                                hex, prev_posn);
 466                sprintf(range, "Range: bytes=%ld-", prev_posn);
 467                range_header = curl_slist_append(range_header, range);
 468                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 469        }
 470
 471        if (start_active_slot(slot)) {
 472                run_active_slot(slot);
 473                if (results.curl_result != CURLE_OK) {
 474                        fclose(indexfile);
 475                        return error("Unable to get pack index %s\n%s", url,
 476                                     curl_errorstr);
 477                }
 478        } else {
 479                fclose(indexfile);
 480                return error("Unable to start request");
 481        }
 482
 483        fclose(indexfile);
 484
 485        return move_temp_to_file(tmpfile, filename);
 486}
 487
 488static int setup_index(struct alt_base *repo, unsigned char *sha1)
 489{
 490        struct packed_git *new_pack;
 491        if (has_pack_file(sha1))
 492                return 0; // don't list this as something we can get
 493
 494        if (fetch_index(repo, sha1))
 495                return -1;
 496
 497        new_pack = parse_pack_index(sha1);
 498        new_pack->next = repo->packs;
 499        repo->packs = new_pack;
 500        return 0;
 501}
 502
 503static void process_alternates_response(void *callback_data)
 504{
 505        struct alternates_request *alt_req =
 506                (struct alternates_request *)callback_data;
 507        struct active_request_slot *slot = alt_req->slot;
 508        struct alt_base *tail = alt;
 509        char *base = alt_req->base;
 510        static const char null_byte = '\0';
 511        char *data;
 512        int i = 0;
 513
 514        if (alt_req->http_specific) {
 515                if (slot->curl_result != CURLE_OK ||
 516                    !alt_req->buffer->posn) {
 517
 518                        /* Try reusing the slot to get non-http alternates */
 519                        alt_req->http_specific = 0;
 520                        sprintf(alt_req->url, "%s/objects/info/alternates",
 521                                base);
 522                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 523                                         alt_req->url);
 524                        active_requests++;
 525                        slot->in_use = 1;
 526                        if (slot->finished != NULL)
 527                                (*slot->finished) = 0;
 528                        if (!start_active_slot(slot)) {
 529                                got_alternates = -1;
 530                                slot->in_use = 0;
 531                                if (slot->finished != NULL)
 532                                        (*slot->finished) = 1;
 533                        }
 534                        return;
 535                }
 536        } else if (slot->curl_result != CURLE_OK) {
 537                if (slot->http_code != 404 &&
 538                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 539                        got_alternates = -1;
 540                        return;
 541                }
 542        }
 543
 544        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 545        alt_req->buffer->posn--;
 546        data = alt_req->buffer->buffer;
 547
 548        while (i < alt_req->buffer->posn) {
 549                int posn = i;
 550                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 551                        posn++;
 552                if (data[posn] == '\n') {
 553                        int okay = 0;
 554                        int serverlen = 0;
 555                        struct alt_base *newalt;
 556                        char *target = NULL;
 557                        char *path;
 558                        if (data[i] == '/') {
 559                                serverlen = strchr(base + 8, '/') - base;
 560                                okay = 1;
 561                        } else if (!memcmp(data + i, "../", 3)) {
 562                                i += 3;
 563                                serverlen = strlen(base);
 564                                while (i + 2 < posn &&
 565                                       !memcmp(data + i, "../", 3)) {
 566                                        do {
 567                                                serverlen--;
 568                                        } while (serverlen &&
 569                                                 base[serverlen - 1] != '/');
 570                                        i += 3;
 571                                }
 572                                // If the server got removed, give up.
 573                                okay = strchr(base, ':') - base + 3 <
 574                                        serverlen;
 575                        } else if (alt_req->http_specific) {
 576                                char *colon = strchr(data + i, ':');
 577                                char *slash = strchr(data + i, '/');
 578                                if (colon && slash && colon < data + posn &&
 579                                    slash < data + posn && colon < slash) {
 580                                        okay = 1;
 581                                }
 582                        }
 583                        // skip 'objects' at end
 584                        if (okay) {
 585                                target = xmalloc(serverlen + posn - i - 6);
 586                                strncpy(target, base, serverlen);
 587                                strncpy(target + serverlen, data + i,
 588                                        posn - i - 7);
 589                                target[serverlen + posn - i - 7] = '\0';
 590                                if (get_verbosely)
 591                                        fprintf(stderr,
 592                                                "Also look at %s\n", target);
 593                                newalt = xmalloc(sizeof(*newalt));
 594                                newalt->next = NULL;
 595                                newalt->base = target;
 596                                newalt->got_indices = 0;
 597                                newalt->packs = NULL;
 598                                path = strstr(target, "//");
 599                                if (path) {
 600                                        path = strchr(path+2, '/');
 601                                        if (path)
 602                                                newalt->path_len = strlen(path);
 603                                }
 604
 605                                while (tail->next != NULL)
 606                                        tail = tail->next;
 607                                tail->next = newalt;
 608                        }
 609                }
 610                i = posn + 1;
 611        }
 612
 613        got_alternates = 1;
 614}
 615
 616static void fetch_alternates(char *base)
 617{
 618        struct buffer buffer;
 619        char *url;
 620        char *data;
 621        struct active_request_slot *slot;
 622        struct alternates_request alt_req;
 623
 624        /* If another request has already started fetching alternates,
 625           wait for them to arrive and return to processing this request's
 626           curl message */
 627#ifdef USE_CURL_MULTI
 628        while (got_alternates == 0) {
 629                step_active_slots();
 630        }
 631#endif
 632
 633        /* Nothing to do if they've already been fetched */
 634        if (got_alternates == 1)
 635                return;
 636
 637        /* Start the fetch */
 638        got_alternates = 0;
 639
 640        data = xmalloc(4096);
 641        buffer.size = 4096;
 642        buffer.posn = 0;
 643        buffer.buffer = data;
 644
 645        if (get_verbosely)
 646                fprintf(stderr, "Getting alternates list for %s\n", base);
 647
 648        url = xmalloc(strlen(base) + 31);
 649        sprintf(url, "%s/objects/info/http-alternates", base);
 650
 651        /* Use a callback to process the result, since another request
 652           may fail and need to have alternates loaded before continuing */
 653        slot = get_active_slot();
 654        slot->callback_func = process_alternates_response;
 655        slot->callback_data = &alt_req;
 656
 657        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 658        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 659        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 660
 661        alt_req.base = base;
 662        alt_req.url = url;
 663        alt_req.buffer = &buffer;
 664        alt_req.http_specific = 1;
 665        alt_req.slot = slot;
 666
 667        if (start_active_slot(slot))
 668                run_active_slot(slot);
 669        else
 670                got_alternates = -1;
 671
 672        free(data);
 673        free(url);
 674}
 675
 676#ifndef NO_EXPAT
 677static void
 678xml_start_tag(void *userData, const char *name, const char **atts)
 679{
 680        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 681        const char *c = strchr(name, ':');
 682        int new_len;
 683
 684        if (c == NULL)
 685                c = name;
 686        else
 687                c++;
 688
 689        new_len = strlen(ctx->name) + strlen(c) + 2;
 690
 691        if (new_len > ctx->len) {
 692                ctx->name = xrealloc(ctx->name, new_len);
 693                ctx->len = new_len;
 694        }
 695        strcat(ctx->name, ".");
 696        strcat(ctx->name, c);
 697
 698        if (ctx->cdata) {
 699                free(ctx->cdata);
 700                ctx->cdata = NULL;
 701        }
 702
 703        ctx->userFunc(ctx, 0);
 704}
 705
 706static void
 707xml_end_tag(void *userData, const char *name)
 708{
 709        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 710        const char *c = strchr(name, ':');
 711        char *ep;
 712
 713        ctx->userFunc(ctx, 1);
 714
 715        if (c == NULL)
 716                c = name;
 717        else
 718                c++;
 719
 720        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 721        *ep = 0;
 722}
 723
 724static void
 725xml_cdata(void *userData, const XML_Char *s, int len)
 726{
 727        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 728        if (ctx->cdata)
 729                free(ctx->cdata);
 730        ctx->cdata = xcalloc(len+1, 1);
 731        strncpy(ctx->cdata, s, len);
 732}
 733
 734static int remote_ls(struct alt_base *repo, const char *path, int flags,
 735                     void (*userFunc)(struct remote_ls_ctx *ls),
 736                     void *userData);
 737
 738static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 739{
 740        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 741
 742        if (tag_closed) {
 743                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 744                        if (ls->dentry_flags & IS_DIR) {
 745                                if (ls->flags & PROCESS_DIRS) {
 746                                        ls->userFunc(ls);
 747                                }
 748                                if (strcmp(ls->dentry_name, ls->path) &&
 749                                    ls->flags & RECURSIVE) {
 750                                        ls->rc = remote_ls(ls->repo,
 751                                                           ls->dentry_name,
 752                                                           ls->flags,
 753                                                           ls->userFunc,
 754                                                           ls->userData);
 755                                }
 756                        } else if (ls->flags & PROCESS_FILES) {
 757                                ls->userFunc(ls);
 758                        }
 759                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 760                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 761                                                  ls->repo->path_len + 1);
 762                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 763                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 764                        ls->dentry_flags |= IS_DIR;
 765                }
 766        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 767                if (ls->dentry_name) {
 768                        free(ls->dentry_name);
 769                }
 770                ls->dentry_name = NULL;
 771                ls->dentry_flags = 0;
 772        }
 773}
 774
 775static int remote_ls(struct alt_base *repo, const char *path, int flags,
 776                     void (*userFunc)(struct remote_ls_ctx *ls),
 777                     void *userData)
 778{
 779        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 780        struct active_request_slot *slot;
 781        struct slot_results results;
 782        struct buffer in_buffer;
 783        struct buffer out_buffer;
 784        char *in_data;
 785        char *out_data;
 786        XML_Parser parser = XML_ParserCreate(NULL);
 787        enum XML_Status result;
 788        struct curl_slist *dav_headers = NULL;
 789        struct xml_ctx ctx;
 790        struct remote_ls_ctx ls;
 791
 792        ls.flags = flags;
 793        ls.repo = repo;
 794        ls.path = strdup(path);
 795        ls.dentry_name = NULL;
 796        ls.dentry_flags = 0;
 797        ls.userData = userData;
 798        ls.userFunc = userFunc;
 799        ls.rc = 0;
 800
 801        sprintf(url, "%s%s", repo->base, path);
 802
 803        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 804        out_data = xmalloc(out_buffer.size + 1);
 805        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 806        out_buffer.posn = 0;
 807        out_buffer.buffer = out_data;
 808
 809        in_buffer.size = 4096;
 810        in_data = xmalloc(in_buffer.size);
 811        in_buffer.posn = 0;
 812        in_buffer.buffer = in_data;
 813
 814        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 815        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 816
 817        slot = get_active_slot();
 818        slot->results = &results;
 819        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 820        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 821        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 822        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 823        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 824        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 825        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 826        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 827        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 828
 829        if (start_active_slot(slot)) {
 830                run_active_slot(slot);
 831                if (results.curl_result == CURLE_OK) {
 832                        ctx.name = xcalloc(10, 1);
 833                        ctx.len = 0;
 834                        ctx.cdata = NULL;
 835                        ctx.userFunc = handle_remote_ls_ctx;
 836                        ctx.userData = &ls;
 837                        XML_SetUserData(parser, &ctx);
 838                        XML_SetElementHandler(parser, xml_start_tag,
 839                                              xml_end_tag);
 840                        XML_SetCharacterDataHandler(parser, xml_cdata);
 841                        result = XML_Parse(parser, in_buffer.buffer,
 842                                           in_buffer.posn, 1);
 843                        free(ctx.name);
 844
 845                        if (result != XML_STATUS_OK) {
 846                                ls.rc = error("XML error: %s",
 847                                              XML_ErrorString(
 848                                                      XML_GetErrorCode(parser)));
 849                        }
 850                } else {
 851                        ls.rc = -1;
 852                }
 853        } else {
 854                ls.rc = error("Unable to start PROPFIND request");
 855        }
 856
 857        free(ls.path);
 858        free(url);
 859        free(out_data);
 860        free(in_buffer.buffer);
 861        curl_slist_free_all(dav_headers);
 862
 863        return ls.rc;
 864}
 865
 866static void process_ls_pack(struct remote_ls_ctx *ls)
 867{
 868        unsigned char sha1[20];
 869
 870        if (strlen(ls->dentry_name) == 63 &&
 871            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 872            !strncmp(ls->dentry_name+58, ".pack", 5)) {
 873                get_sha1_hex(ls->dentry_name + 18, sha1);
 874                setup_index(ls->repo, sha1);
 875        }
 876}
 877#endif
 878
 879static int fetch_indices(struct alt_base *repo)
 880{
 881        unsigned char sha1[20];
 882        char *url;
 883        struct buffer buffer;
 884        char *data;
 885        int i = 0;
 886
 887        struct active_request_slot *slot;
 888        struct slot_results results;
 889
 890        if (repo->got_indices)
 891                return 0;
 892
 893        data = xmalloc(4096);
 894        buffer.size = 4096;
 895        buffer.posn = 0;
 896        buffer.buffer = data;
 897
 898        if (get_verbosely)
 899                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 900
 901#ifndef NO_EXPAT
 902        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 903                      process_ls_pack, NULL) == 0)
 904                return 0;
 905#endif
 906
 907        url = xmalloc(strlen(repo->base) + 21);
 908        sprintf(url, "%s/objects/info/packs", repo->base);
 909
 910        slot = get_active_slot();
 911        slot->results = &results;
 912        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 913        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 914        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 915        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 916        if (start_active_slot(slot)) {
 917                run_active_slot(slot);
 918                if (results.curl_result != CURLE_OK) {
 919                        if (results.http_code == 404 ||
 920                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 921                                repo->got_indices = 1;
 922                                free(buffer.buffer);
 923                                return 0;
 924                        } else {
 925                                repo->got_indices = 0;
 926                                free(buffer.buffer);
 927                                return error("%s", curl_errorstr);
 928                        }
 929                }
 930        } else {
 931                repo->got_indices = 0;
 932                free(buffer.buffer);
 933                return error("Unable to start request");
 934        }
 935
 936        data = buffer.buffer;
 937        while (i < buffer.posn) {
 938                switch (data[i]) {
 939                case 'P':
 940                        i++;
 941                        if (i + 52 <= buffer.posn &&
 942                            !strncmp(data + i, " pack-", 6) &&
 943                            !strncmp(data + i + 46, ".pack\n", 6)) {
 944                                get_sha1_hex(data + i + 6, sha1);
 945                                setup_index(repo, sha1);
 946                                i += 51;
 947                                break;
 948                        }
 949                default:
 950                        while (i < buffer.posn && data[i] != '\n')
 951                                i++;
 952                }
 953                i++;
 954        }
 955
 956        free(buffer.buffer);
 957        repo->got_indices = 1;
 958        return 0;
 959}
 960
 961static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 962{
 963        char *url;
 964        struct packed_git *target;
 965        struct packed_git **lst;
 966        FILE *packfile;
 967        char *filename;
 968        char tmpfile[PATH_MAX];
 969        int ret;
 970        long prev_posn = 0;
 971        char range[RANGE_HEADER_SIZE];
 972        struct curl_slist *range_header = NULL;
 973
 974        struct active_request_slot *slot;
 975        struct slot_results results;
 976
 977        if (fetch_indices(repo))
 978                return -1;
 979        target = find_sha1_pack(sha1, repo->packs);
 980        if (!target)
 981                return -1;
 982
 983        if (get_verbosely) {
 984                fprintf(stderr, "Getting pack %s\n",
 985                        sha1_to_hex(target->sha1));
 986                fprintf(stderr, " which contains %s\n",
 987                        sha1_to_hex(sha1));
 988        }
 989
 990        url = xmalloc(strlen(repo->base) + 65);
 991        sprintf(url, "%s/objects/pack/pack-%s.pack",
 992                repo->base, sha1_to_hex(target->sha1));
 993
 994        filename = sha1_pack_name(target->sha1);
 995        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 996        packfile = fopen(tmpfile, "a");
 997        if (!packfile)
 998                return error("Unable to open local file %s for pack",
 999                             filename);
1000
1001        slot = get_active_slot();
1002        slot->results = &results;
1003        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1004        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1005        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1006        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1007        slot->local = packfile;
1008
1009        /* If there is data present from a previous transfer attempt,
1010           resume where it left off */
1011        prev_posn = ftell(packfile);
1012        if (prev_posn>0) {
1013                if (get_verbosely)
1014                        fprintf(stderr,
1015                                "Resuming fetch of pack %s at byte %ld\n",
1016                                sha1_to_hex(target->sha1), prev_posn);
1017                sprintf(range, "Range: bytes=%ld-", prev_posn);
1018                range_header = curl_slist_append(range_header, range);
1019                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1020        }
1021
1022        if (start_active_slot(slot)) {
1023                run_active_slot(slot);
1024                if (results.curl_result != CURLE_OK) {
1025                        fclose(packfile);
1026                        return error("Unable to get pack file %s\n%s", url,
1027                                     curl_errorstr);
1028                }
1029        } else {
1030                fclose(packfile);
1031                return error("Unable to start request");
1032        }
1033
1034        fclose(packfile);
1035
1036        ret = move_temp_to_file(tmpfile, filename);
1037        if (ret)
1038                return ret;
1039
1040        lst = &repo->packs;
1041        while (*lst != target)
1042                lst = &((*lst)->next);
1043        *lst = (*lst)->next;
1044
1045        if (verify_pack(target, 0))
1046                return -1;
1047        install_packed_git(target);
1048
1049        return 0;
1050}
1051
1052static void abort_object_request(struct object_request *obj_req)
1053{
1054        if (obj_req->local >= 0) {
1055                close(obj_req->local);
1056                obj_req->local = -1;
1057        }
1058        unlink(obj_req->tmpfile);
1059        if (obj_req->slot) {
1060                release_active_slot(obj_req->slot);
1061                obj_req->slot = NULL;
1062        }
1063        release_object_request(obj_req);
1064}
1065
1066static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1067{
1068        char *hex = sha1_to_hex(sha1);
1069        int ret = 0;
1070        struct object_request *obj_req = object_queue_head;
1071
1072        while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1073                obj_req = obj_req->next;
1074        if (obj_req == NULL)
1075                return error("Couldn't find request for %s in the queue", hex);
1076
1077        if (has_sha1_file(obj_req->sha1)) {
1078                abort_object_request(obj_req);
1079                return 0;
1080        }
1081
1082#ifdef USE_CURL_MULTI
1083        while (obj_req->state == WAITING) {
1084                step_active_slots();
1085        }
1086#else
1087        start_object_request(obj_req);
1088#endif
1089
1090        while (obj_req->state == ACTIVE) {
1091                run_active_slot(obj_req->slot);
1092        }
1093        if (obj_req->local != -1) {
1094                close(obj_req->local); obj_req->local = -1;
1095        }
1096
1097        if (obj_req->state == ABORTED) {
1098                ret = error("Request for %s aborted", hex);
1099        } else if (obj_req->curl_result != CURLE_OK &&
1100                   obj_req->http_code != 416) {
1101                if (obj_req->http_code == 404 ||
1102                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1103                        ret = -1; /* Be silent, it is probably in a pack. */
1104                else
1105                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1106                                    obj_req->errorstr, obj_req->curl_result,
1107                                    obj_req->http_code, hex);
1108        } else if (obj_req->zret != Z_STREAM_END) {
1109                corrupt_object_found++;
1110                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1111        } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1112                ret = error("File %s has bad hash", hex);
1113        } else if (obj_req->rename < 0) {
1114                ret = error("unable to write sha1 filename %s",
1115                            obj_req->filename);
1116        }
1117
1118        release_object_request(obj_req);
1119        return ret;
1120}
1121
1122int fetch(unsigned char *sha1)
1123{
1124        struct alt_base *altbase = alt;
1125
1126        if (!fetch_object(altbase, sha1))
1127                return 0;
1128        while (altbase) {
1129                if (!fetch_pack(altbase, sha1))
1130                        return 0;
1131                fetch_alternates(alt->base);
1132                altbase = altbase->next;
1133        }
1134        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1135                     alt->base);
1136}
1137
1138static inline int needs_quote(int ch)
1139{
1140        switch (ch) {
1141        case '/': case '-': case '.':
1142        case 'A'...'Z': case 'a'...'z': case '0'...'9':
1143                return 0;
1144        default:
1145                return 1;
1146        }
1147}
1148
1149static inline int hex(int v)
1150{
1151        if (v < 10) return '0' + v;
1152        else return 'A' + v - 10;
1153}
1154
1155static char *quote_ref_url(const char *base, const char *ref)
1156{
1157        const char *cp;
1158        char *dp, *qref;
1159        int len, baselen, ch;
1160
1161        baselen = strlen(base);
1162        len = baselen + 6; /* "refs/" + NUL */
1163        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164                if (needs_quote(ch))
1165                        len += 2; /* extra two hex plus replacement % */
1166        qref = xmalloc(len);
1167        memcpy(qref, base, baselen);
1168        memcpy(qref + baselen, "refs/", 5);
1169        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170                if (needs_quote(ch)) {
1171                        *dp++ = '%';
1172                        *dp++ = hex((ch >> 4) & 0xF);
1173                        *dp++ = hex(ch & 0xF);
1174                }
1175                else
1176                        *dp++ = ch;
1177        }
1178        *dp = 0;
1179
1180        return qref;
1181}
1182
1183int fetch_ref(char *ref, unsigned char *sha1)
1184{
1185        char *url;
1186        char hex[42];
1187        struct buffer buffer;
1188        char *base = alt->base;
1189        struct active_request_slot *slot;
1190        struct slot_results results;
1191        buffer.size = 41;
1192        buffer.posn = 0;
1193        buffer.buffer = hex;
1194        hex[41] = '\0';
1195
1196        url = quote_ref_url(base, ref);
1197        slot = get_active_slot();
1198        slot->results = &results;
1199        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203        if (start_active_slot(slot)) {
1204                run_active_slot(slot);
1205                if (results.curl_result != CURLE_OK)
1206                        return error("Couldn't get %s for %s\n%s",
1207                                     url, ref, curl_errorstr);
1208        } else {
1209                return error("Unable to start request");
1210        }
1211
1212        hex[40] = '\0';
1213        get_sha1_hex(hex, sha1);
1214        return 0;
1215}
1216
1217int main(int argc, char **argv)
1218{
1219        char *commit_id;
1220        char *url;
1221        char *path;
1222        int arg = 1;
1223        int rc = 0;
1224
1225        setup_git_directory();
1226
1227        while (arg < argc && argv[arg][0] == '-') {
1228                if (argv[arg][1] == 't') {
1229                        get_tree = 1;
1230                } else if (argv[arg][1] == 'c') {
1231                        get_history = 1;
1232                } else if (argv[arg][1] == 'a') {
1233                        get_all = 1;
1234                        get_tree = 1;
1235                        get_history = 1;
1236                } else if (argv[arg][1] == 'v') {
1237                        get_verbosely = 1;
1238                } else if (argv[arg][1] == 'w') {
1239                        write_ref = argv[arg + 1];
1240                        arg++;
1241                } else if (!strcmp(argv[arg], "--recover")) {
1242                        get_recover = 1;
1243                }
1244                arg++;
1245        }
1246        if (argc < arg + 2) {
1247                usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1248                return 1;
1249        }
1250        commit_id = argv[arg];
1251        url = argv[arg + 1];
1252
1253        http_init();
1254
1255        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1256
1257        alt = xmalloc(sizeof(*alt));
1258        alt->base = url;
1259        alt->got_indices = 0;
1260        alt->packs = NULL;
1261        alt->next = NULL;
1262        path = strstr(url, "//");
1263        if (path) {
1264                path = strchr(path+2, '/');
1265                if (path)
1266                        alt->path_len = strlen(path);
1267        }
1268
1269        if (pull(commit_id))
1270                rc = 1;
1271
1272        curl_slist_free_all(no_pragma_header);
1273
1274        http_cleanup();
1275
1276        if (corrupt_object_found) {
1277                fprintf(stderr,
1278"Some loose object were found to be corrupt, but they might be just\n"
1279"a false '404 Not Found' error message sent with incorrect HTTP\n"
1280"status code.  Suggest running git fsck-objects.\n");
1281        }
1282        return rc;
1283}