http-fetch.con commit gitweb: Remove characters entities entirely when shortening string -- correction (0349b46)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int commits_on_stdin = 0;
  40
  41static int got_alternates = -1;
  42static int corrupt_object_found = 0;
  43
  44static struct curl_slist *no_pragma_header;
  45
  46struct alt_base
  47{
  48        const char *base;
  49        int path_len;
  50        int got_indices;
  51        struct packed_git *packs;
  52        struct alt_base *next;
  53};
  54
  55static struct alt_base *alt = NULL;
  56
  57enum object_request_state {
  58        WAITING,
  59        ABORTED,
  60        ACTIVE,
  61        COMPLETE,
  62};
  63
  64struct object_request
  65{
  66        unsigned char sha1[20];
  67        struct alt_base *repo;
  68        char *url;
  69        char filename[PATH_MAX];
  70        char tmpfile[PATH_MAX];
  71        int local;
  72        enum object_request_state state;
  73        CURLcode curl_result;
  74        char errorstr[CURL_ERROR_SIZE];
  75        long http_code;
  76        unsigned char real_sha1[20];
  77        SHA_CTX c;
  78        z_stream stream;
  79        int zret;
  80        int rename;
  81        struct active_request_slot *slot;
  82        struct object_request *next;
  83};
  84
  85struct alternates_request {
  86        const char *base;
  87        char *url;
  88        struct buffer *buffer;
  89        struct active_request_slot *slot;
  90        int http_specific;
  91};
  92
  93#ifndef NO_EXPAT
  94struct xml_ctx
  95{
  96        char *name;
  97        int len;
  98        char *cdata;
  99        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
 100        void *userData;
 101};
 102
 103struct remote_ls_ctx
 104{
 105        struct alt_base *repo;
 106        char *path;
 107        void (*userFunc)(struct remote_ls_ctx *ls);
 108        void *userData;
 109        int flags;
 110        char *dentry_name;
 111        int dentry_flags;
 112        int rc;
 113        struct remote_ls_ctx *parent;
 114};
 115#endif
 116
 117static struct object_request *object_queue_head = NULL;
 118
 119static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 120                               void *data)
 121{
 122        unsigned char expn[4096];
 123        size_t size = eltsize * nmemb;
 124        int posn = 0;
 125        struct object_request *obj_req = (struct object_request *)data;
 126        do {
 127                ssize_t retval = write(obj_req->local,
 128                                       (char *) ptr + posn, size - posn);
 129                if (retval < 0)
 130                        return posn;
 131                posn += retval;
 132        } while (posn < size);
 133
 134        obj_req->stream.avail_in = size;
 135        obj_req->stream.next_in = ptr;
 136        do {
 137                obj_req->stream.next_out = expn;
 138                obj_req->stream.avail_out = sizeof(expn);
 139                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 140                SHA1_Update(&obj_req->c, expn,
 141                            sizeof(expn) - obj_req->stream.avail_out);
 142        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 143        data_received++;
 144        return size;
 145}
 146
 147static void fetch_alternates(const char *base);
 148
 149static void process_object_response(void *callback_data);
 150
 151static void start_object_request(struct object_request *obj_req)
 152{
 153        char *hex = sha1_to_hex(obj_req->sha1);
 154        char prevfile[PATH_MAX];
 155        char *url;
 156        char *posn;
 157        int prevlocal;
 158        unsigned char prev_buf[PREV_BUF_SIZE];
 159        ssize_t prev_read = 0;
 160        long prev_posn = 0;
 161        char range[RANGE_HEADER_SIZE];
 162        struct curl_slist *range_header = NULL;
 163        struct active_request_slot *slot;
 164
 165        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 166        unlink(prevfile);
 167        rename(obj_req->tmpfile, prevfile);
 168        unlink(obj_req->tmpfile);
 169
 170        if (obj_req->local != -1)
 171                error("fd leakage in start: %d", obj_req->local);
 172        obj_req->local = open(obj_req->tmpfile,
 173                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 174        /* This could have failed due to the "lazy directory creation";
 175         * try to mkdir the last path component.
 176         */
 177        if (obj_req->local < 0 && errno == ENOENT) {
 178                char *dir = strrchr(obj_req->tmpfile, '/');
 179                if (dir) {
 180                        *dir = 0;
 181                        mkdir(obj_req->tmpfile, 0777);
 182                        *dir = '/';
 183                }
 184                obj_req->local = open(obj_req->tmpfile,
 185                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 186        }
 187
 188        if (obj_req->local < 0) {
 189                obj_req->state = ABORTED;
 190                error("Couldn't create temporary file %s for %s: %s",
 191                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 192                return;
 193        }
 194
 195        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 196
 197        inflateInit(&obj_req->stream);
 198
 199        SHA1_Init(&obj_req->c);
 200
 201        url = xmalloc(strlen(obj_req->repo->base) + 50);
 202        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 203        strcpy(url, obj_req->repo->base);
 204        posn = url + strlen(obj_req->repo->base);
 205        strcpy(posn, "objects/");
 206        posn += 8;
 207        memcpy(posn, hex, 2);
 208        posn += 2;
 209        *(posn++) = '/';
 210        strcpy(posn, hex + 2);
 211        strcpy(obj_req->url, url);
 212
 213        /* If a previous temp file is present, process what was already
 214           fetched. */
 215        prevlocal = open(prevfile, O_RDONLY);
 216        if (prevlocal != -1) {
 217                do {
 218                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 219                        if (prev_read>0) {
 220                                if (fwrite_sha1_file(prev_buf,
 221                                                     1,
 222                                                     prev_read,
 223                                                     obj_req) == prev_read) {
 224                                        prev_posn += prev_read;
 225                                } else {
 226                                        prev_read = -1;
 227                                }
 228                        }
 229                } while (prev_read > 0);
 230                close(prevlocal);
 231        }
 232        unlink(prevfile);
 233
 234        /* Reset inflate/SHA1 if there was an error reading the previous temp
 235           file; also rewind to the beginning of the local file. */
 236        if (prev_read == -1) {
 237                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 238                inflateInit(&obj_req->stream);
 239                SHA1_Init(&obj_req->c);
 240                if (prev_posn>0) {
 241                        prev_posn = 0;
 242                        lseek(obj_req->local, SEEK_SET, 0);
 243                        ftruncate(obj_req->local, 0);
 244                }
 245        }
 246
 247        slot = get_active_slot();
 248        slot->callback_func = process_object_response;
 249        slot->callback_data = obj_req;
 250        obj_req->slot = slot;
 251
 252        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 253        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 254        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 255        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 256        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 257
 258        /* If we have successfully processed data from a previous fetch
 259           attempt, only fetch the data we don't already have. */
 260        if (prev_posn>0) {
 261                if (get_verbosely)
 262                        fprintf(stderr,
 263                                "Resuming fetch of object %s at byte %ld\n",
 264                                hex, prev_posn);
 265                sprintf(range, "Range: bytes=%ld-", prev_posn);
 266                range_header = curl_slist_append(range_header, range);
 267                curl_easy_setopt(slot->curl,
 268                                 CURLOPT_HTTPHEADER, range_header);
 269        }
 270
 271        /* Try to get the request started, abort the request on error */
 272        obj_req->state = ACTIVE;
 273        if (!start_active_slot(slot)) {
 274                obj_req->state = ABORTED;
 275                obj_req->slot = NULL;
 276                close(obj_req->local); obj_req->local = -1;
 277                free(obj_req->url);
 278                return;
 279        }
 280}
 281
 282static void finish_object_request(struct object_request *obj_req)
 283{
 284        struct stat st;
 285
 286        fchmod(obj_req->local, 0444);
 287        close(obj_req->local); obj_req->local = -1;
 288
 289        if (obj_req->http_code == 416) {
 290                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 291        } else if (obj_req->curl_result != CURLE_OK) {
 292                if (stat(obj_req->tmpfile, &st) == 0)
 293                        if (st.st_size == 0)
 294                                unlink(obj_req->tmpfile);
 295                return;
 296        }
 297
 298        inflateEnd(&obj_req->stream);
 299        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 300        if (obj_req->zret != Z_STREAM_END) {
 301                unlink(obj_req->tmpfile);
 302                return;
 303        }
 304        if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 305                unlink(obj_req->tmpfile);
 306                return;
 307        }
 308        obj_req->rename =
 309                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 310
 311        if (obj_req->rename == 0)
 312                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 313}
 314
 315static void process_object_response(void *callback_data)
 316{
 317        struct object_request *obj_req =
 318                (struct object_request *)callback_data;
 319
 320        obj_req->curl_result = obj_req->slot->curl_result;
 321        obj_req->http_code = obj_req->slot->http_code;
 322        obj_req->slot = NULL;
 323        obj_req->state = COMPLETE;
 324
 325        /* Use alternates if necessary */
 326        if (obj_req->http_code == 404 ||
 327            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 328                fetch_alternates(alt->base);
 329                if (obj_req->repo->next != NULL) {
 330                        obj_req->repo =
 331                                obj_req->repo->next;
 332                        close(obj_req->local);
 333                        obj_req->local = -1;
 334                        start_object_request(obj_req);
 335                        return;
 336                }
 337        }
 338
 339        finish_object_request(obj_req);
 340}
 341
 342static void release_object_request(struct object_request *obj_req)
 343{
 344        struct object_request *entry = object_queue_head;
 345
 346        if (obj_req->local != -1)
 347                error("fd leakage in release: %d", obj_req->local);
 348        if (obj_req == object_queue_head) {
 349                object_queue_head = obj_req->next;
 350        } else {
 351                while (entry->next != NULL && entry->next != obj_req)
 352                        entry = entry->next;
 353                if (entry->next == obj_req)
 354                        entry->next = entry->next->next;
 355        }
 356
 357        free(obj_req->url);
 358        free(obj_req);
 359}
 360
 361#ifdef USE_CURL_MULTI
 362void fill_active_slots(void)
 363{
 364        struct object_request *obj_req = object_queue_head;
 365        struct active_request_slot *slot = active_queue_head;
 366        int num_transfers;
 367
 368        while (active_requests < max_requests && obj_req != NULL) {
 369                if (obj_req->state == WAITING) {
 370                        if (has_sha1_file(obj_req->sha1))
 371                                obj_req->state = COMPLETE;
 372                        else
 373                                start_object_request(obj_req);
 374                        curl_multi_perform(curlm, &num_transfers);
 375                }
 376                obj_req = obj_req->next;
 377        }
 378
 379        while (slot != NULL) {
 380                if (!slot->in_use && slot->curl != NULL) {
 381                        curl_easy_cleanup(slot->curl);
 382                        slot->curl = NULL;
 383                }
 384                slot = slot->next;
 385        }
 386}
 387#endif
 388
 389void prefetch(unsigned char *sha1)
 390{
 391        struct object_request *newreq;
 392        struct object_request *tail;
 393        char *filename = sha1_file_name(sha1);
 394
 395        newreq = xmalloc(sizeof(*newreq));
 396        memcpy(newreq->sha1, sha1, 20);
 397        newreq->repo = alt;
 398        newreq->url = NULL;
 399        newreq->local = -1;
 400        newreq->state = WAITING;
 401        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 402        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 403                 "%s.temp", filename);
 404        newreq->slot = NULL;
 405        newreq->next = NULL;
 406
 407        if (object_queue_head == NULL) {
 408                object_queue_head = newreq;
 409        } else {
 410                tail = object_queue_head;
 411                while (tail->next != NULL) {
 412                        tail = tail->next;
 413                }
 414                tail->next = newreq;
 415        }
 416
 417#ifdef USE_CURL_MULTI
 418        fill_active_slots();
 419        step_active_slots();
 420#endif
 421}
 422
 423static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 424{
 425        char *hex = sha1_to_hex(sha1);
 426        char *filename;
 427        char *url;
 428        char tmpfile[PATH_MAX];
 429        long prev_posn = 0;
 430        char range[RANGE_HEADER_SIZE];
 431        struct curl_slist *range_header = NULL;
 432
 433        FILE *indexfile;
 434        struct active_request_slot *slot;
 435        struct slot_results results;
 436
 437        if (has_pack_index(sha1))
 438                return 0;
 439
 440        if (get_verbosely)
 441                fprintf(stderr, "Getting index for pack %s\n", hex);
 442
 443        url = xmalloc(strlen(repo->base) + 64);
 444        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 445
 446        filename = sha1_pack_index_name(sha1);
 447        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 448        indexfile = fopen(tmpfile, "a");
 449        if (!indexfile)
 450                return error("Unable to open local file %s for pack index",
 451                             filename);
 452
 453        slot = get_active_slot();
 454        slot->results = &results;
 455        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 456        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 457        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 458        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 459        slot->local = indexfile;
 460
 461        /* If there is data present from a previous transfer attempt,
 462           resume where it left off */
 463        prev_posn = ftell(indexfile);
 464        if (prev_posn>0) {
 465                if (get_verbosely)
 466                        fprintf(stderr,
 467                                "Resuming fetch of index for pack %s at byte %ld\n",
 468                                hex, prev_posn);
 469                sprintf(range, "Range: bytes=%ld-", prev_posn);
 470                range_header = curl_slist_append(range_header, range);
 471                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 472        }
 473
 474        if (start_active_slot(slot)) {
 475                run_active_slot(slot);
 476                if (results.curl_result != CURLE_OK) {
 477                        fclose(indexfile);
 478                        return error("Unable to get pack index %s\n%s", url,
 479                                     curl_errorstr);
 480                }
 481        } else {
 482                fclose(indexfile);
 483                return error("Unable to start request");
 484        }
 485
 486        fclose(indexfile);
 487
 488        return move_temp_to_file(tmpfile, filename);
 489}
 490
 491static int setup_index(struct alt_base *repo, unsigned char *sha1)
 492{
 493        struct packed_git *new_pack;
 494        if (has_pack_file(sha1))
 495                return 0; /* don't list this as something we can get */
 496
 497        if (fetch_index(repo, sha1))
 498                return -1;
 499
 500        new_pack = parse_pack_index(sha1);
 501        new_pack->next = repo->packs;
 502        repo->packs = new_pack;
 503        return 0;
 504}
 505
 506static void process_alternates_response(void *callback_data)
 507{
 508        struct alternates_request *alt_req =
 509                (struct alternates_request *)callback_data;
 510        struct active_request_slot *slot = alt_req->slot;
 511        struct alt_base *tail = alt;
 512        const char *base = alt_req->base;
 513        static const char null_byte = '\0';
 514        char *data;
 515        int i = 0;
 516
 517        if (alt_req->http_specific) {
 518                if (slot->curl_result != CURLE_OK ||
 519                    !alt_req->buffer->posn) {
 520
 521                        /* Try reusing the slot to get non-http alternates */
 522                        alt_req->http_specific = 0;
 523                        sprintf(alt_req->url, "%s/objects/info/alternates",
 524                                base);
 525                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 526                                         alt_req->url);
 527                        active_requests++;
 528                        slot->in_use = 1;
 529                        if (slot->finished != NULL)
 530                                (*slot->finished) = 0;
 531                        if (!start_active_slot(slot)) {
 532                                got_alternates = -1;
 533                                slot->in_use = 0;
 534                                if (slot->finished != NULL)
 535                                        (*slot->finished) = 1;
 536                        }
 537                        return;
 538                }
 539        } else if (slot->curl_result != CURLE_OK) {
 540                if (slot->http_code != 404 &&
 541                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 542                        got_alternates = -1;
 543                        return;
 544                }
 545        }
 546
 547        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 548        alt_req->buffer->posn--;
 549        data = alt_req->buffer->buffer;
 550
 551        while (i < alt_req->buffer->posn) {
 552                int posn = i;
 553                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 554                        posn++;
 555                if (data[posn] == '\n') {
 556                        int okay = 0;
 557                        int serverlen = 0;
 558                        struct alt_base *newalt;
 559                        char *target = NULL;
 560                        char *path;
 561                        if (data[i] == '/') {
 562                                serverlen = strchr(base + 8, '/') - base;
 563                                okay = 1;
 564                        } else if (!memcmp(data + i, "../", 3)) {
 565                                i += 3;
 566                                serverlen = strlen(base);
 567                                while (i + 2 < posn &&
 568                                       !memcmp(data + i, "../", 3)) {
 569                                        do {
 570                                                serverlen--;
 571                                        } while (serverlen &&
 572                                                 base[serverlen - 1] != '/');
 573                                        i += 3;
 574                                }
 575                                /* If the server got removed, give up. */
 576                                okay = strchr(base, ':') - base + 3 <
 577                                        serverlen;
 578                        } else if (alt_req->http_specific) {
 579                                char *colon = strchr(data + i, ':');
 580                                char *slash = strchr(data + i, '/');
 581                                if (colon && slash && colon < data + posn &&
 582                                    slash < data + posn && colon < slash) {
 583                                        okay = 1;
 584                                }
 585                        }
 586                        /* skip 'objects' at end */
 587                        if (okay) {
 588                                target = xmalloc(serverlen + posn - i - 6);
 589                                strlcpy(target, base, serverlen);
 590                                strlcpy(target + serverlen, data + i, posn - i - 6);
 591                                if (get_verbosely)
 592                                        fprintf(stderr,
 593                                                "Also look at %s\n", target);
 594                                newalt = xmalloc(sizeof(*newalt));
 595                                newalt->next = NULL;
 596                                newalt->base = target;
 597                                newalt->got_indices = 0;
 598                                newalt->packs = NULL;
 599                                path = strstr(target, "//");
 600                                if (path) {
 601                                        path = strchr(path+2, '/');
 602                                        if (path)
 603                                                newalt->path_len = strlen(path);
 604                                }
 605
 606                                while (tail->next != NULL)
 607                                        tail = tail->next;
 608                                tail->next = newalt;
 609                        }
 610                }
 611                i = posn + 1;
 612        }
 613
 614        got_alternates = 1;
 615}
 616
 617static void fetch_alternates(const char *base)
 618{
 619        struct buffer buffer;
 620        char *url;
 621        char *data;
 622        struct active_request_slot *slot;
 623        struct alternates_request alt_req;
 624
 625        /* If another request has already started fetching alternates,
 626           wait for them to arrive and return to processing this request's
 627           curl message */
 628#ifdef USE_CURL_MULTI
 629        while (got_alternates == 0) {
 630                step_active_slots();
 631        }
 632#endif
 633
 634        /* Nothing to do if they've already been fetched */
 635        if (got_alternates == 1)
 636                return;
 637
 638        /* Start the fetch */
 639        got_alternates = 0;
 640
 641        data = xmalloc(4096);
 642        buffer.size = 4096;
 643        buffer.posn = 0;
 644        buffer.buffer = data;
 645
 646        if (get_verbosely)
 647                fprintf(stderr, "Getting alternates list for %s\n", base);
 648
 649        url = xmalloc(strlen(base) + 31);
 650        sprintf(url, "%s/objects/info/http-alternates", base);
 651
 652        /* Use a callback to process the result, since another request
 653           may fail and need to have alternates loaded before continuing */
 654        slot = get_active_slot();
 655        slot->callback_func = process_alternates_response;
 656        slot->callback_data = &alt_req;
 657
 658        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 659        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 660        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 661
 662        alt_req.base = base;
 663        alt_req.url = url;
 664        alt_req.buffer = &buffer;
 665        alt_req.http_specific = 1;
 666        alt_req.slot = slot;
 667
 668        if (start_active_slot(slot))
 669                run_active_slot(slot);
 670        else
 671                got_alternates = -1;
 672
 673        free(data);
 674        free(url);
 675}
 676
 677#ifndef NO_EXPAT
 678static void
 679xml_start_tag(void *userData, const char *name, const char **atts)
 680{
 681        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 682        const char *c = strchr(name, ':');
 683        int new_len;
 684
 685        if (c == NULL)
 686                c = name;
 687        else
 688                c++;
 689
 690        new_len = strlen(ctx->name) + strlen(c) + 2;
 691
 692        if (new_len > ctx->len) {
 693                ctx->name = xrealloc(ctx->name, new_len);
 694                ctx->len = new_len;
 695        }
 696        strcat(ctx->name, ".");
 697        strcat(ctx->name, c);
 698
 699        if (ctx->cdata) {
 700                free(ctx->cdata);
 701                ctx->cdata = NULL;
 702        }
 703
 704        ctx->userFunc(ctx, 0);
 705}
 706
 707static void
 708xml_end_tag(void *userData, const char *name)
 709{
 710        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 711        const char *c = strchr(name, ':');
 712        char *ep;
 713
 714        ctx->userFunc(ctx, 1);
 715
 716        if (c == NULL)
 717                c = name;
 718        else
 719                c++;
 720
 721        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 722        *ep = 0;
 723}
 724
 725static void
 726xml_cdata(void *userData, const XML_Char *s, int len)
 727{
 728        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 729        if (ctx->cdata)
 730                free(ctx->cdata);
 731        ctx->cdata = xmalloc(len + 1);
 732        strlcpy(ctx->cdata, s, len + 1);
 733}
 734
 735static int remote_ls(struct alt_base *repo, const char *path, int flags,
 736                     void (*userFunc)(struct remote_ls_ctx *ls),
 737                     void *userData);
 738
 739static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 740{
 741        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 742
 743        if (tag_closed) {
 744                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 745                        if (ls->dentry_flags & IS_DIR) {
 746                                if (ls->flags & PROCESS_DIRS) {
 747                                        ls->userFunc(ls);
 748                                }
 749                                if (strcmp(ls->dentry_name, ls->path) &&
 750                                    ls->flags & RECURSIVE) {
 751                                        ls->rc = remote_ls(ls->repo,
 752                                                           ls->dentry_name,
 753                                                           ls->flags,
 754                                                           ls->userFunc,
 755                                                           ls->userData);
 756                                }
 757                        } else if (ls->flags & PROCESS_FILES) {
 758                                ls->userFunc(ls);
 759                        }
 760                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 761                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 762                                                  ls->repo->path_len + 1);
 763                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 764                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 765                        ls->dentry_flags |= IS_DIR;
 766                }
 767        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 768                if (ls->dentry_name) {
 769                        free(ls->dentry_name);
 770                }
 771                ls->dentry_name = NULL;
 772                ls->dentry_flags = 0;
 773        }
 774}
 775
 776static int remote_ls(struct alt_base *repo, const char *path, int flags,
 777                     void (*userFunc)(struct remote_ls_ctx *ls),
 778                     void *userData)
 779{
 780        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 781        struct active_request_slot *slot;
 782        struct slot_results results;
 783        struct buffer in_buffer;
 784        struct buffer out_buffer;
 785        char *in_data;
 786        char *out_data;
 787        XML_Parser parser = XML_ParserCreate(NULL);
 788        enum XML_Status result;
 789        struct curl_slist *dav_headers = NULL;
 790        struct xml_ctx ctx;
 791        struct remote_ls_ctx ls;
 792
 793        ls.flags = flags;
 794        ls.repo = repo;
 795        ls.path = strdup(path);
 796        ls.dentry_name = NULL;
 797        ls.dentry_flags = 0;
 798        ls.userData = userData;
 799        ls.userFunc = userFunc;
 800        ls.rc = 0;
 801
 802        sprintf(url, "%s%s", repo->base, path);
 803
 804        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 805        out_data = xmalloc(out_buffer.size + 1);
 806        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 807        out_buffer.posn = 0;
 808        out_buffer.buffer = out_data;
 809
 810        in_buffer.size = 4096;
 811        in_data = xmalloc(in_buffer.size);
 812        in_buffer.posn = 0;
 813        in_buffer.buffer = in_data;
 814
 815        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 816        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 817
 818        slot = get_active_slot();
 819        slot->results = &results;
 820        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 821        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 822        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 823        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 824        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 825        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 826        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 827        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 828        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 829
 830        if (start_active_slot(slot)) {
 831                run_active_slot(slot);
 832                if (results.curl_result == CURLE_OK) {
 833                        ctx.name = xcalloc(10, 1);
 834                        ctx.len = 0;
 835                        ctx.cdata = NULL;
 836                        ctx.userFunc = handle_remote_ls_ctx;
 837                        ctx.userData = &ls;
 838                        XML_SetUserData(parser, &ctx);
 839                        XML_SetElementHandler(parser, xml_start_tag,
 840                                              xml_end_tag);
 841                        XML_SetCharacterDataHandler(parser, xml_cdata);
 842                        result = XML_Parse(parser, in_buffer.buffer,
 843                                           in_buffer.posn, 1);
 844                        free(ctx.name);
 845
 846                        if (result != XML_STATUS_OK) {
 847                                ls.rc = error("XML error: %s",
 848                                              XML_ErrorString(
 849                                                      XML_GetErrorCode(parser)));
 850                        }
 851                } else {
 852                        ls.rc = -1;
 853                }
 854        } else {
 855                ls.rc = error("Unable to start PROPFIND request");
 856        }
 857
 858        free(ls.path);
 859        free(url);
 860        free(out_data);
 861        free(in_buffer.buffer);
 862        curl_slist_free_all(dav_headers);
 863
 864        return ls.rc;
 865}
 866
 867static void process_ls_pack(struct remote_ls_ctx *ls)
 868{
 869        unsigned char sha1[20];
 870
 871        if (strlen(ls->dentry_name) == 63 &&
 872            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 873            !strncmp(ls->dentry_name+58, ".pack", 5)) {
 874                get_sha1_hex(ls->dentry_name + 18, sha1);
 875                setup_index(ls->repo, sha1);
 876        }
 877}
 878#endif
 879
 880static int fetch_indices(struct alt_base *repo)
 881{
 882        unsigned char sha1[20];
 883        char *url;
 884        struct buffer buffer;
 885        char *data;
 886        int i = 0;
 887
 888        struct active_request_slot *slot;
 889        struct slot_results results;
 890
 891        if (repo->got_indices)
 892                return 0;
 893
 894        data = xmalloc(4096);
 895        buffer.size = 4096;
 896        buffer.posn = 0;
 897        buffer.buffer = data;
 898
 899        if (get_verbosely)
 900                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 901
 902#ifndef NO_EXPAT
 903        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 904                      process_ls_pack, NULL) == 0)
 905                return 0;
 906#endif
 907
 908        url = xmalloc(strlen(repo->base) + 21);
 909        sprintf(url, "%s/objects/info/packs", repo->base);
 910
 911        slot = get_active_slot();
 912        slot->results = &results;
 913        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 914        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 915        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 916        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 917        if (start_active_slot(slot)) {
 918                run_active_slot(slot);
 919                if (results.curl_result != CURLE_OK) {
 920                        if (results.http_code == 404 ||
 921                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 922                                repo->got_indices = 1;
 923                                free(buffer.buffer);
 924                                return 0;
 925                        } else {
 926                                repo->got_indices = 0;
 927                                free(buffer.buffer);
 928                                return error("%s", curl_errorstr);
 929                        }
 930                }
 931        } else {
 932                repo->got_indices = 0;
 933                free(buffer.buffer);
 934                return error("Unable to start request");
 935        }
 936
 937        data = buffer.buffer;
 938        while (i < buffer.posn) {
 939                switch (data[i]) {
 940                case 'P':
 941                        i++;
 942                        if (i + 52 <= buffer.posn &&
 943                            !strncmp(data + i, " pack-", 6) &&
 944                            !strncmp(data + i + 46, ".pack\n", 6)) {
 945                                get_sha1_hex(data + i + 6, sha1);
 946                                setup_index(repo, sha1);
 947                                i += 51;
 948                                break;
 949                        }
 950                default:
 951                        while (i < buffer.posn && data[i] != '\n')
 952                                i++;
 953                }
 954                i++;
 955        }
 956
 957        free(buffer.buffer);
 958        repo->got_indices = 1;
 959        return 0;
 960}
 961
 962static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 963{
 964        char *url;
 965        struct packed_git *target;
 966        struct packed_git **lst;
 967        FILE *packfile;
 968        char *filename;
 969        char tmpfile[PATH_MAX];
 970        int ret;
 971        long prev_posn = 0;
 972        char range[RANGE_HEADER_SIZE];
 973        struct curl_slist *range_header = NULL;
 974
 975        struct active_request_slot *slot;
 976        struct slot_results results;
 977
 978        if (fetch_indices(repo))
 979                return -1;
 980        target = find_sha1_pack(sha1, repo->packs);
 981        if (!target)
 982                return -1;
 983
 984        if (get_verbosely) {
 985                fprintf(stderr, "Getting pack %s\n",
 986                        sha1_to_hex(target->sha1));
 987                fprintf(stderr, " which contains %s\n",
 988                        sha1_to_hex(sha1));
 989        }
 990
 991        url = xmalloc(strlen(repo->base) + 65);
 992        sprintf(url, "%s/objects/pack/pack-%s.pack",
 993                repo->base, sha1_to_hex(target->sha1));
 994
 995        filename = sha1_pack_name(target->sha1);
 996        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 997        packfile = fopen(tmpfile, "a");
 998        if (!packfile)
 999                return error("Unable to open local file %s for pack",
1000                             filename);
1001
1002        slot = get_active_slot();
1003        slot->results = &results;
1004        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1005        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1006        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1007        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1008        slot->local = packfile;
1009
1010        /* If there is data present from a previous transfer attempt,
1011           resume where it left off */
1012        prev_posn = ftell(packfile);
1013        if (prev_posn>0) {
1014                if (get_verbosely)
1015                        fprintf(stderr,
1016                                "Resuming fetch of pack %s at byte %ld\n",
1017                                sha1_to_hex(target->sha1), prev_posn);
1018                sprintf(range, "Range: bytes=%ld-", prev_posn);
1019                range_header = curl_slist_append(range_header, range);
1020                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1021        }
1022
1023        if (start_active_slot(slot)) {
1024                run_active_slot(slot);
1025                if (results.curl_result != CURLE_OK) {
1026                        fclose(packfile);
1027                        return error("Unable to get pack file %s\n%s", url,
1028                                     curl_errorstr);
1029                }
1030        } else {
1031                fclose(packfile);
1032                return error("Unable to start request");
1033        }
1034
1035        fclose(packfile);
1036
1037        ret = move_temp_to_file(tmpfile, filename);
1038        if (ret)
1039                return ret;
1040
1041        lst = &repo->packs;
1042        while (*lst != target)
1043                lst = &((*lst)->next);
1044        *lst = (*lst)->next;
1045
1046        if (verify_pack(target, 0))
1047                return -1;
1048        install_packed_git(target);
1049
1050        return 0;
1051}
1052
1053static void abort_object_request(struct object_request *obj_req)
1054{
1055        if (obj_req->local >= 0) {
1056                close(obj_req->local);
1057                obj_req->local = -1;
1058        }
1059        unlink(obj_req->tmpfile);
1060        if (obj_req->slot) {
1061                release_active_slot(obj_req->slot);
1062                obj_req->slot = NULL;
1063        }
1064        release_object_request(obj_req);
1065}
1066
1067static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1068{
1069        char *hex = sha1_to_hex(sha1);
1070        int ret = 0;
1071        struct object_request *obj_req = object_queue_head;
1072
1073        while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1074                obj_req = obj_req->next;
1075        if (obj_req == NULL)
1076                return error("Couldn't find request for %s in the queue", hex);
1077
1078        if (has_sha1_file(obj_req->sha1)) {
1079                abort_object_request(obj_req);
1080                return 0;
1081        }
1082
1083#ifdef USE_CURL_MULTI
1084        while (obj_req->state == WAITING) {
1085                step_active_slots();
1086        }
1087#else
1088        start_object_request(obj_req);
1089#endif
1090
1091        while (obj_req->state == ACTIVE) {
1092                run_active_slot(obj_req->slot);
1093        }
1094        if (obj_req->local != -1) {
1095                close(obj_req->local); obj_req->local = -1;
1096        }
1097
1098        if (obj_req->state == ABORTED) {
1099                ret = error("Request for %s aborted", hex);
1100        } else if (obj_req->curl_result != CURLE_OK &&
1101                   obj_req->http_code != 416) {
1102                if (obj_req->http_code == 404 ||
1103                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1104                        ret = -1; /* Be silent, it is probably in a pack. */
1105                else
1106                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1107                                    obj_req->errorstr, obj_req->curl_result,
1108                                    obj_req->http_code, hex);
1109        } else if (obj_req->zret != Z_STREAM_END) {
1110                corrupt_object_found++;
1111                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1112        } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1113                ret = error("File %s has bad hash", hex);
1114        } else if (obj_req->rename < 0) {
1115                ret = error("unable to write sha1 filename %s",
1116                            obj_req->filename);
1117        }
1118
1119        release_object_request(obj_req);
1120        return ret;
1121}
1122
1123int fetch(unsigned char *sha1)
1124{
1125        struct alt_base *altbase = alt;
1126
1127        if (!fetch_object(altbase, sha1))
1128                return 0;
1129        while (altbase) {
1130                if (!fetch_pack(altbase, sha1))
1131                        return 0;
1132                fetch_alternates(alt->base);
1133                altbase = altbase->next;
1134        }
1135        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1136                     alt->base);
1137}
1138
1139static inline int needs_quote(int ch)
1140{
1141        if (((ch >= 'A') && (ch <= 'Z'))
1142                        || ((ch >= 'a') && (ch <= 'z'))
1143                        || ((ch >= '0') && (ch <= '9'))
1144                        || (ch == '/')
1145                        || (ch == '-')
1146                        || (ch == '.'))
1147                return 0;
1148        return 1;
1149}
1150
1151static inline int hex(int v)
1152{
1153        if (v < 10) return '0' + v;
1154        else return 'A' + v - 10;
1155}
1156
1157static char *quote_ref_url(const char *base, const char *ref)
1158{
1159        const char *cp;
1160        char *dp, *qref;
1161        int len, baselen, ch;
1162
1163        baselen = strlen(base);
1164        len = baselen + 6; /* "refs/" + NUL */
1165        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1166                if (needs_quote(ch))
1167                        len += 2; /* extra two hex plus replacement % */
1168        qref = xmalloc(len);
1169        memcpy(qref, base, baselen);
1170        memcpy(qref + baselen, "refs/", 5);
1171        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1172                if (needs_quote(ch)) {
1173                        *dp++ = '%';
1174                        *dp++ = hex((ch >> 4) & 0xF);
1175                        *dp++ = hex(ch & 0xF);
1176                }
1177                else
1178                        *dp++ = ch;
1179        }
1180        *dp = 0;
1181
1182        return qref;
1183}
1184
1185int fetch_ref(char *ref, unsigned char *sha1)
1186{
1187        char *url;
1188        char hex[42];
1189        struct buffer buffer;
1190        const char *base = alt->base;
1191        struct active_request_slot *slot;
1192        struct slot_results results;
1193        buffer.size = 41;
1194        buffer.posn = 0;
1195        buffer.buffer = hex;
1196        hex[41] = '\0';
1197
1198        url = quote_ref_url(base, ref);
1199        slot = get_active_slot();
1200        slot->results = &results;
1201        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1202        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1203        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1204        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1205        if (start_active_slot(slot)) {
1206                run_active_slot(slot);
1207                if (results.curl_result != CURLE_OK)
1208                        return error("Couldn't get %s for %s\n%s",
1209                                     url, ref, curl_errorstr);
1210        } else {
1211                return error("Unable to start request");
1212        }
1213
1214        hex[40] = '\0';
1215        get_sha1_hex(hex, sha1);
1216        return 0;
1217}
1218
1219int main(int argc, const char **argv)
1220{
1221        int commits;
1222        const char **write_ref = NULL;
1223        char **commit_id;
1224        const char *url;
1225        char *path;
1226        int arg = 1;
1227        int rc = 0;
1228
1229        setup_git_directory();
1230        git_config(git_default_config);
1231
1232        while (arg < argc && argv[arg][0] == '-') {
1233                if (argv[arg][1] == 't') {
1234                        get_tree = 1;
1235                } else if (argv[arg][1] == 'c') {
1236                        get_history = 1;
1237                } else if (argv[arg][1] == 'a') {
1238                        get_all = 1;
1239                        get_tree = 1;
1240                        get_history = 1;
1241                } else if (argv[arg][1] == 'v') {
1242                        get_verbosely = 1;
1243                } else if (argv[arg][1] == 'w') {
1244                        write_ref = &argv[arg + 1];
1245                        arg++;
1246                } else if (!strcmp(argv[arg], "--recover")) {
1247                        get_recover = 1;
1248                } else if (!strcmp(argv[arg], "--stdin")) {
1249                        commits_on_stdin = 1;
1250                }
1251                arg++;
1252        }
1253        if (argc < arg + 2 - commits_on_stdin) {
1254                usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1255                return 1;
1256        }
1257        if (commits_on_stdin) {
1258                commits = pull_targets_stdin(&commit_id, &write_ref);
1259        } else {
1260                commit_id = (char **) &argv[arg++];
1261                commits = 1;
1262        }
1263        url = argv[arg];
1264
1265        http_init();
1266
1267        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1268
1269        alt = xmalloc(sizeof(*alt));
1270        alt->base = url;
1271        alt->got_indices = 0;
1272        alt->packs = NULL;
1273        alt->next = NULL;
1274        path = strstr(url, "//");
1275        if (path) {
1276                path = strchr(path+2, '/');
1277                if (path)
1278                        alt->path_len = strlen(path);
1279        }
1280
1281        if (pull(commits, commit_id, write_ref, url))
1282                rc = 1;
1283
1284        http_cleanup();
1285
1286        curl_slist_free_all(no_pragma_header);
1287
1288        if (commits_on_stdin)
1289                pull_targets_free(commits, commit_id, write_ref);
1290
1291        if (corrupt_object_found) {
1292                fprintf(stderr,
1293"Some loose object were found to be corrupt, but they might be just\n"
1294"a false '404 Not Found' error message sent with incorrect HTTP\n"
1295"status code.  Suggest running git fsck-objects.\n");
1296        }
1297        return rc;
1298}