http-fetch.con commit clone: the given repository dir should be relative to $PWD (ced78b3)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int commits_on_stdin;
  40
  41static int got_alternates = -1;
  42static int corrupt_object_found;
  43
  44static struct curl_slist *no_pragma_header;
  45
  46struct alt_base
  47{
  48        const char *base;
  49        int path_len;
  50        int got_indices;
  51        struct packed_git *packs;
  52        struct alt_base *next;
  53};
  54
  55static struct alt_base *alt;
  56
  57enum object_request_state {
  58        WAITING,
  59        ABORTED,
  60        ACTIVE,
  61        COMPLETE,
  62};
  63
  64struct object_request
  65{
  66        unsigned char sha1[20];
  67        struct alt_base *repo;
  68        char *url;
  69        char filename[PATH_MAX];
  70        char tmpfile[PATH_MAX];
  71        int local;
  72        enum object_request_state state;
  73        CURLcode curl_result;
  74        char errorstr[CURL_ERROR_SIZE];
  75        long http_code;
  76        unsigned char real_sha1[20];
  77        SHA_CTX c;
  78        z_stream stream;
  79        int zret;
  80        int rename;
  81        struct active_request_slot *slot;
  82        struct object_request *next;
  83};
  84
  85struct alternates_request {
  86        const char *base;
  87        char *url;
  88        struct buffer *buffer;
  89        struct active_request_slot *slot;
  90        int http_specific;
  91};
  92
  93#ifndef NO_EXPAT
  94struct xml_ctx
  95{
  96        char *name;
  97        int len;
  98        char *cdata;
  99        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
 100        void *userData;
 101};
 102
 103struct remote_ls_ctx
 104{
 105        struct alt_base *repo;
 106        char *path;
 107        void (*userFunc)(struct remote_ls_ctx *ls);
 108        void *userData;
 109        int flags;
 110        char *dentry_name;
 111        int dentry_flags;
 112        int rc;
 113        struct remote_ls_ctx *parent;
 114};
 115#endif
 116
 117static struct object_request *object_queue_head;
 118
 119static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 120                               void *data)
 121{
 122        unsigned char expn[4096];
 123        size_t size = eltsize * nmemb;
 124        int posn = 0;
 125        struct object_request *obj_req = (struct object_request *)data;
 126        do {
 127                ssize_t retval = write(obj_req->local,
 128                                       (char *) ptr + posn, size - posn);
 129                if (retval < 0)
 130                        return posn;
 131                posn += retval;
 132        } while (posn < size);
 133
 134        obj_req->stream.avail_in = size;
 135        obj_req->stream.next_in = ptr;
 136        do {
 137                obj_req->stream.next_out = expn;
 138                obj_req->stream.avail_out = sizeof(expn);
 139                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 140                SHA1_Update(&obj_req->c, expn,
 141                            sizeof(expn) - obj_req->stream.avail_out);
 142        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 143        data_received++;
 144        return size;
 145}
 146
 147static int missing__target(int code, int result)
 148{
 149        return  /* file:// URL -- do we ever use one??? */
 150                (result == CURLE_FILE_COULDNT_READ_FILE) ||
 151                /* http:// and https:// URL */
 152                (code == 404 && result == CURLE_HTTP_RETURNED_ERROR) ||
 153                /* ftp:// URL */
 154                (code == 550 && result == CURLE_FTP_COULDNT_RETR_FILE)
 155                ;
 156}
 157
 158#define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
 159
 160static void fetch_alternates(const char *base);
 161
 162static void process_object_response(void *callback_data);
 163
 164static void start_object_request(struct object_request *obj_req)
 165{
 166        char *hex = sha1_to_hex(obj_req->sha1);
 167        char prevfile[PATH_MAX];
 168        char *url;
 169        char *posn;
 170        int prevlocal;
 171        unsigned char prev_buf[PREV_BUF_SIZE];
 172        ssize_t prev_read = 0;
 173        long prev_posn = 0;
 174        char range[RANGE_HEADER_SIZE];
 175        struct curl_slist *range_header = NULL;
 176        struct active_request_slot *slot;
 177
 178        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 179        unlink(prevfile);
 180        rename(obj_req->tmpfile, prevfile);
 181        unlink(obj_req->tmpfile);
 182
 183        if (obj_req->local != -1)
 184                error("fd leakage in start: %d", obj_req->local);
 185        obj_req->local = open(obj_req->tmpfile,
 186                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 187        /* This could have failed due to the "lazy directory creation";
 188         * try to mkdir the last path component.
 189         */
 190        if (obj_req->local < 0 && errno == ENOENT) {
 191                char *dir = strrchr(obj_req->tmpfile, '/');
 192                if (dir) {
 193                        *dir = 0;
 194                        mkdir(obj_req->tmpfile, 0777);
 195                        *dir = '/';
 196                }
 197                obj_req->local = open(obj_req->tmpfile,
 198                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 199        }
 200
 201        if (obj_req->local < 0) {
 202                obj_req->state = ABORTED;
 203                error("Couldn't create temporary file %s for %s: %s",
 204                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 205                return;
 206        }
 207
 208        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 209
 210        inflateInit(&obj_req->stream);
 211
 212        SHA1_Init(&obj_req->c);
 213
 214        url = xmalloc(strlen(obj_req->repo->base) + 50);
 215        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 216        strcpy(url, obj_req->repo->base);
 217        posn = url + strlen(obj_req->repo->base);
 218        strcpy(posn, "objects/");
 219        posn += 8;
 220        memcpy(posn, hex, 2);
 221        posn += 2;
 222        *(posn++) = '/';
 223        strcpy(posn, hex + 2);
 224        strcpy(obj_req->url, url);
 225
 226        /* If a previous temp file is present, process what was already
 227           fetched. */
 228        prevlocal = open(prevfile, O_RDONLY);
 229        if (prevlocal != -1) {
 230                do {
 231                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 232                        if (prev_read>0) {
 233                                if (fwrite_sha1_file(prev_buf,
 234                                                     1,
 235                                                     prev_read,
 236                                                     obj_req) == prev_read) {
 237                                        prev_posn += prev_read;
 238                                } else {
 239                                        prev_read = -1;
 240                                }
 241                        }
 242                } while (prev_read > 0);
 243                close(prevlocal);
 244        }
 245        unlink(prevfile);
 246
 247        /* Reset inflate/SHA1 if there was an error reading the previous temp
 248           file; also rewind to the beginning of the local file. */
 249        if (prev_read == -1) {
 250                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 251                inflateInit(&obj_req->stream);
 252                SHA1_Init(&obj_req->c);
 253                if (prev_posn>0) {
 254                        prev_posn = 0;
 255                        lseek(obj_req->local, SEEK_SET, 0);
 256                        ftruncate(obj_req->local, 0);
 257                }
 258        }
 259
 260        slot = get_active_slot();
 261        slot->callback_func = process_object_response;
 262        slot->callback_data = obj_req;
 263        obj_req->slot = slot;
 264
 265        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 266        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 267        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 268        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 269        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 270
 271        /* If we have successfully processed data from a previous fetch
 272           attempt, only fetch the data we don't already have. */
 273        if (prev_posn>0) {
 274                if (get_verbosely)
 275                        fprintf(stderr,
 276                                "Resuming fetch of object %s at byte %ld\n",
 277                                hex, prev_posn);
 278                sprintf(range, "Range: bytes=%ld-", prev_posn);
 279                range_header = curl_slist_append(range_header, range);
 280                curl_easy_setopt(slot->curl,
 281                                 CURLOPT_HTTPHEADER, range_header);
 282        }
 283
 284        /* Try to get the request started, abort the request on error */
 285        obj_req->state = ACTIVE;
 286        if (!start_active_slot(slot)) {
 287                obj_req->state = ABORTED;
 288                obj_req->slot = NULL;
 289                close(obj_req->local); obj_req->local = -1;
 290                free(obj_req->url);
 291                return;
 292        }
 293}
 294
 295static void finish_object_request(struct object_request *obj_req)
 296{
 297        struct stat st;
 298
 299        fchmod(obj_req->local, 0444);
 300        close(obj_req->local); obj_req->local = -1;
 301
 302        if (obj_req->http_code == 416) {
 303                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 304        } else if (obj_req->curl_result != CURLE_OK) {
 305                if (stat(obj_req->tmpfile, &st) == 0)
 306                        if (st.st_size == 0)
 307                                unlink(obj_req->tmpfile);
 308                return;
 309        }
 310
 311        inflateEnd(&obj_req->stream);
 312        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 313        if (obj_req->zret != Z_STREAM_END) {
 314                unlink(obj_req->tmpfile);
 315                return;
 316        }
 317        if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
 318                unlink(obj_req->tmpfile);
 319                return;
 320        }
 321        obj_req->rename =
 322                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 323
 324        if (obj_req->rename == 0)
 325                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 326}
 327
 328static void process_object_response(void *callback_data)
 329{
 330        struct object_request *obj_req =
 331                (struct object_request *)callback_data;
 332
 333        obj_req->curl_result = obj_req->slot->curl_result;
 334        obj_req->http_code = obj_req->slot->http_code;
 335        obj_req->slot = NULL;
 336        obj_req->state = COMPLETE;
 337
 338        /* Use alternates if necessary */
 339        if (missing_target(obj_req)) {
 340                fetch_alternates(alt->base);
 341                if (obj_req->repo->next != NULL) {
 342                        obj_req->repo =
 343                                obj_req->repo->next;
 344                        close(obj_req->local);
 345                        obj_req->local = -1;
 346                        start_object_request(obj_req);
 347                        return;
 348                }
 349        }
 350
 351        finish_object_request(obj_req);
 352}
 353
 354static void release_object_request(struct object_request *obj_req)
 355{
 356        struct object_request *entry = object_queue_head;
 357
 358        if (obj_req->local != -1)
 359                error("fd leakage in release: %d", obj_req->local);
 360        if (obj_req == object_queue_head) {
 361                object_queue_head = obj_req->next;
 362        } else {
 363                while (entry->next != NULL && entry->next != obj_req)
 364                        entry = entry->next;
 365                if (entry->next == obj_req)
 366                        entry->next = entry->next->next;
 367        }
 368
 369        free(obj_req->url);
 370        free(obj_req);
 371}
 372
 373#ifdef USE_CURL_MULTI
 374void fill_active_slots(void)
 375{
 376        struct object_request *obj_req = object_queue_head;
 377        struct active_request_slot *slot = active_queue_head;
 378        int num_transfers;
 379
 380        while (active_requests < max_requests && obj_req != NULL) {
 381                if (obj_req->state == WAITING) {
 382                        if (has_sha1_file(obj_req->sha1))
 383                                obj_req->state = COMPLETE;
 384                        else
 385                                start_object_request(obj_req);
 386                        curl_multi_perform(curlm, &num_transfers);
 387                }
 388                obj_req = obj_req->next;
 389        }
 390
 391        while (slot != NULL) {
 392                if (!slot->in_use && slot->curl != NULL) {
 393                        curl_easy_cleanup(slot->curl);
 394                        slot->curl = NULL;
 395                }
 396                slot = slot->next;
 397        }
 398}
 399#endif
 400
 401void prefetch(unsigned char *sha1)
 402{
 403        struct object_request *newreq;
 404        struct object_request *tail;
 405        char *filename = sha1_file_name(sha1);
 406
 407        newreq = xmalloc(sizeof(*newreq));
 408        hashcpy(newreq->sha1, sha1);
 409        newreq->repo = alt;
 410        newreq->url = NULL;
 411        newreq->local = -1;
 412        newreq->state = WAITING;
 413        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 414        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 415                 "%s.temp", filename);
 416        newreq->slot = NULL;
 417        newreq->next = NULL;
 418
 419        if (object_queue_head == NULL) {
 420                object_queue_head = newreq;
 421        } else {
 422                tail = object_queue_head;
 423                while (tail->next != NULL) {
 424                        tail = tail->next;
 425                }
 426                tail->next = newreq;
 427        }
 428
 429#ifdef USE_CURL_MULTI
 430        fill_active_slots();
 431        step_active_slots();
 432#endif
 433}
 434
 435static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 436{
 437        char *hex = sha1_to_hex(sha1);
 438        char *filename;
 439        char *url;
 440        char tmpfile[PATH_MAX];
 441        long prev_posn = 0;
 442        char range[RANGE_HEADER_SIZE];
 443        struct curl_slist *range_header = NULL;
 444
 445        FILE *indexfile;
 446        struct active_request_slot *slot;
 447        struct slot_results results;
 448
 449        if (has_pack_index(sha1))
 450                return 0;
 451
 452        if (get_verbosely)
 453                fprintf(stderr, "Getting index for pack %s\n", hex);
 454
 455        url = xmalloc(strlen(repo->base) + 64);
 456        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 457
 458        filename = sha1_pack_index_name(sha1);
 459        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 460        indexfile = fopen(tmpfile, "a");
 461        if (!indexfile)
 462                return error("Unable to open local file %s for pack index",
 463                             filename);
 464
 465        slot = get_active_slot();
 466        slot->results = &results;
 467        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 468        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 469        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 470        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 471        slot->local = indexfile;
 472
 473        /* If there is data present from a previous transfer attempt,
 474           resume where it left off */
 475        prev_posn = ftell(indexfile);
 476        if (prev_posn>0) {
 477                if (get_verbosely)
 478                        fprintf(stderr,
 479                                "Resuming fetch of index for pack %s at byte %ld\n",
 480                                hex, prev_posn);
 481                sprintf(range, "Range: bytes=%ld-", prev_posn);
 482                range_header = curl_slist_append(range_header, range);
 483                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 484        }
 485
 486        if (start_active_slot(slot)) {
 487                run_active_slot(slot);
 488                if (results.curl_result != CURLE_OK) {
 489                        fclose(indexfile);
 490                        return error("Unable to get pack index %s\n%s", url,
 491                                     curl_errorstr);
 492                }
 493        } else {
 494                fclose(indexfile);
 495                return error("Unable to start request");
 496        }
 497
 498        fclose(indexfile);
 499
 500        return move_temp_to_file(tmpfile, filename);
 501}
 502
 503static int setup_index(struct alt_base *repo, unsigned char *sha1)
 504{
 505        struct packed_git *new_pack;
 506        if (has_pack_file(sha1))
 507                return 0; /* don't list this as something we can get */
 508
 509        if (fetch_index(repo, sha1))
 510                return -1;
 511
 512        new_pack = parse_pack_index(sha1);
 513        new_pack->next = repo->packs;
 514        repo->packs = new_pack;
 515        return 0;
 516}
 517
 518static void process_alternates_response(void *callback_data)
 519{
 520        struct alternates_request *alt_req =
 521                (struct alternates_request *)callback_data;
 522        struct active_request_slot *slot = alt_req->slot;
 523        struct alt_base *tail = alt;
 524        const char *base = alt_req->base;
 525        static const char null_byte = '\0';
 526        char *data;
 527        int i = 0;
 528
 529        if (alt_req->http_specific) {
 530                if (slot->curl_result != CURLE_OK ||
 531                    !alt_req->buffer->posn) {
 532
 533                        /* Try reusing the slot to get non-http alternates */
 534                        alt_req->http_specific = 0;
 535                        sprintf(alt_req->url, "%s/objects/info/alternates",
 536                                base);
 537                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 538                                         alt_req->url);
 539                        active_requests++;
 540                        slot->in_use = 1;
 541                        if (slot->finished != NULL)
 542                                (*slot->finished) = 0;
 543                        if (!start_active_slot(slot)) {
 544                                got_alternates = -1;
 545                                slot->in_use = 0;
 546                                if (slot->finished != NULL)
 547                                        (*slot->finished) = 1;
 548                        }
 549                        return;
 550                }
 551        } else if (slot->curl_result != CURLE_OK) {
 552                if (!missing_target(slot)) {
 553                        got_alternates = -1;
 554                        return;
 555                }
 556        }
 557
 558        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 559        alt_req->buffer->posn--;
 560        data = alt_req->buffer->buffer;
 561
 562        while (i < alt_req->buffer->posn) {
 563                int posn = i;
 564                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 565                        posn++;
 566                if (data[posn] == '\n') {
 567                        int okay = 0;
 568                        int serverlen = 0;
 569                        struct alt_base *newalt;
 570                        char *target = NULL;
 571                        char *path;
 572                        if (data[i] == '/') {
 573                                /* This counts
 574                                 * http://git.host/pub/scm/linux.git/
 575                                 * -----------here^
 576                                 * so memcpy(dst, base, serverlen) will
 577                                 * copy up to "...git.host".
 578                                 */
 579                                const char *colon_ss = strstr(base,"://");
 580                                if (colon_ss) {
 581                                        serverlen = (strchr(colon_ss + 3, '/')
 582                                                     - base);
 583                                        okay = 1;
 584                                }
 585                        } else if (!memcmp(data + i, "../", 3)) {
 586                                /* Relative URL; chop the corresponding
 587                                 * number of subpath from base (and ../
 588                                 * from data), and concatenate the result.
 589                                 *
 590                                 * The code first drops ../ from data, and
 591                                 * then drops one ../ from data and one path
 592                                 * from base.  IOW, one extra ../ is dropped
 593                                 * from data than path is dropped from base.
 594                                 *
 595                                 * This is not wrong.  The alternate in
 596                                 *     http://git.host/pub/scm/linux.git/
 597                                 * to borrow from
 598                                 *     http://git.host/pub/scm/linus.git/
 599                                 * is ../../linus.git/objects/.  You need
 600                                 * two ../../ to borrow from your direct
 601                                 * neighbour.
 602                                 */
 603                                i += 3;
 604                                serverlen = strlen(base);
 605                                while (i + 2 < posn &&
 606                                       !memcmp(data + i, "../", 3)) {
 607                                        do {
 608                                                serverlen--;
 609                                        } while (serverlen &&
 610                                                 base[serverlen - 1] != '/');
 611                                        i += 3;
 612                                }
 613                                /* If the server got removed, give up. */
 614                                okay = strchr(base, ':') - base + 3 <
 615                                        serverlen;
 616                        } else if (alt_req->http_specific) {
 617                                char *colon = strchr(data + i, ':');
 618                                char *slash = strchr(data + i, '/');
 619                                if (colon && slash && colon < data + posn &&
 620                                    slash < data + posn && colon < slash) {
 621                                        okay = 1;
 622                                }
 623                        }
 624                        /* skip "objects\n" at end */
 625                        if (okay) {
 626                                target = xmalloc(serverlen + posn - i - 6);
 627                                memcpy(target, base, serverlen);
 628                                memcpy(target + serverlen, data + i,
 629                                       posn - i - 7);
 630                                target[serverlen + posn - i - 7] = 0;
 631                                if (get_verbosely)
 632                                        fprintf(stderr,
 633                                                "Also look at %s\n", target);
 634                                newalt = xmalloc(sizeof(*newalt));
 635                                newalt->next = NULL;
 636                                newalt->base = target;
 637                                newalt->got_indices = 0;
 638                                newalt->packs = NULL;
 639                                path = strstr(target, "//");
 640                                if (path) {
 641                                        path = strchr(path+2, '/');
 642                                        if (path)
 643                                                newalt->path_len = strlen(path);
 644                                }
 645
 646                                while (tail->next != NULL)
 647                                        tail = tail->next;
 648                                tail->next = newalt;
 649                        }
 650                }
 651                i = posn + 1;
 652        }
 653
 654        got_alternates = 1;
 655}
 656
 657static void fetch_alternates(const char *base)
 658{
 659        struct buffer buffer;
 660        char *url;
 661        char *data;
 662        struct active_request_slot *slot;
 663        struct alternates_request alt_req;
 664
 665        /* If another request has already started fetching alternates,
 666           wait for them to arrive and return to processing this request's
 667           curl message */
 668#ifdef USE_CURL_MULTI
 669        while (got_alternates == 0) {
 670                step_active_slots();
 671        }
 672#endif
 673
 674        /* Nothing to do if they've already been fetched */
 675        if (got_alternates == 1)
 676                return;
 677
 678        /* Start the fetch */
 679        got_alternates = 0;
 680
 681        data = xmalloc(4096);
 682        buffer.size = 4096;
 683        buffer.posn = 0;
 684        buffer.buffer = data;
 685
 686        if (get_verbosely)
 687                fprintf(stderr, "Getting alternates list for %s\n", base);
 688
 689        url = xmalloc(strlen(base) + 31);
 690        sprintf(url, "%s/objects/info/http-alternates", base);
 691
 692        /* Use a callback to process the result, since another request
 693           may fail and need to have alternates loaded before continuing */
 694        slot = get_active_slot();
 695        slot->callback_func = process_alternates_response;
 696        slot->callback_data = &alt_req;
 697
 698        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 699        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 700        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 701
 702        alt_req.base = base;
 703        alt_req.url = url;
 704        alt_req.buffer = &buffer;
 705        alt_req.http_specific = 1;
 706        alt_req.slot = slot;
 707
 708        if (start_active_slot(slot))
 709                run_active_slot(slot);
 710        else
 711                got_alternates = -1;
 712
 713        free(data);
 714        free(url);
 715}
 716
 717#ifndef NO_EXPAT
 718static void
 719xml_start_tag(void *userData, const char *name, const char **atts)
 720{
 721        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 722        const char *c = strchr(name, ':');
 723        int new_len;
 724
 725        if (c == NULL)
 726                c = name;
 727        else
 728                c++;
 729
 730        new_len = strlen(ctx->name) + strlen(c) + 2;
 731
 732        if (new_len > ctx->len) {
 733                ctx->name = xrealloc(ctx->name, new_len);
 734                ctx->len = new_len;
 735        }
 736        strcat(ctx->name, ".");
 737        strcat(ctx->name, c);
 738
 739        free(ctx->cdata);
 740        ctx->cdata = NULL;
 741
 742        ctx->userFunc(ctx, 0);
 743}
 744
 745static void
 746xml_end_tag(void *userData, const char *name)
 747{
 748        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 749        const char *c = strchr(name, ':');
 750        char *ep;
 751
 752        ctx->userFunc(ctx, 1);
 753
 754        if (c == NULL)
 755                c = name;
 756        else
 757                c++;
 758
 759        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 760        *ep = 0;
 761}
 762
 763static void
 764xml_cdata(void *userData, const XML_Char *s, int len)
 765{
 766        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 767        free(ctx->cdata);
 768        ctx->cdata = xmalloc(len + 1);
 769        strlcpy(ctx->cdata, s, len + 1);
 770}
 771
 772static int remote_ls(struct alt_base *repo, const char *path, int flags,
 773                     void (*userFunc)(struct remote_ls_ctx *ls),
 774                     void *userData);
 775
 776static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 777{
 778        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 779
 780        if (tag_closed) {
 781                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 782                        if (ls->dentry_flags & IS_DIR) {
 783                                if (ls->flags & PROCESS_DIRS) {
 784                                        ls->userFunc(ls);
 785                                }
 786                                if (strcmp(ls->dentry_name, ls->path) &&
 787                                    ls->flags & RECURSIVE) {
 788                                        ls->rc = remote_ls(ls->repo,
 789                                                           ls->dentry_name,
 790                                                           ls->flags,
 791                                                           ls->userFunc,
 792                                                           ls->userData);
 793                                }
 794                        } else if (ls->flags & PROCESS_FILES) {
 795                                ls->userFunc(ls);
 796                        }
 797                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 798                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 799                                                  ls->repo->path_len + 1);
 800                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 801                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 802                        ls->dentry_flags |= IS_DIR;
 803                }
 804        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 805                free(ls->dentry_name);
 806                ls->dentry_name = NULL;
 807                ls->dentry_flags = 0;
 808        }
 809}
 810
 811static int remote_ls(struct alt_base *repo, const char *path, int flags,
 812                     void (*userFunc)(struct remote_ls_ctx *ls),
 813                     void *userData)
 814{
 815        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 816        struct active_request_slot *slot;
 817        struct slot_results results;
 818        struct buffer in_buffer;
 819        struct buffer out_buffer;
 820        char *in_data;
 821        char *out_data;
 822        XML_Parser parser = XML_ParserCreate(NULL);
 823        enum XML_Status result;
 824        struct curl_slist *dav_headers = NULL;
 825        struct xml_ctx ctx;
 826        struct remote_ls_ctx ls;
 827
 828        ls.flags = flags;
 829        ls.repo = repo;
 830        ls.path = xstrdup(path);
 831        ls.dentry_name = NULL;
 832        ls.dentry_flags = 0;
 833        ls.userData = userData;
 834        ls.userFunc = userFunc;
 835        ls.rc = 0;
 836
 837        sprintf(url, "%s%s", repo->base, path);
 838
 839        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 840        out_data = xmalloc(out_buffer.size + 1);
 841        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 842        out_buffer.posn = 0;
 843        out_buffer.buffer = out_data;
 844
 845        in_buffer.size = 4096;
 846        in_data = xmalloc(in_buffer.size);
 847        in_buffer.posn = 0;
 848        in_buffer.buffer = in_data;
 849
 850        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 851        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 852
 853        slot = get_active_slot();
 854        slot->results = &results;
 855        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 856        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 857        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 858        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 859        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 860        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 861        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 862        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 863        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 864
 865        if (start_active_slot(slot)) {
 866                run_active_slot(slot);
 867                if (results.curl_result == CURLE_OK) {
 868                        ctx.name = xcalloc(10, 1);
 869                        ctx.len = 0;
 870                        ctx.cdata = NULL;
 871                        ctx.userFunc = handle_remote_ls_ctx;
 872                        ctx.userData = &ls;
 873                        XML_SetUserData(parser, &ctx);
 874                        XML_SetElementHandler(parser, xml_start_tag,
 875                                              xml_end_tag);
 876                        XML_SetCharacterDataHandler(parser, xml_cdata);
 877                        result = XML_Parse(parser, in_buffer.buffer,
 878                                           in_buffer.posn, 1);
 879                        free(ctx.name);
 880
 881                        if (result != XML_STATUS_OK) {
 882                                ls.rc = error("XML error: %s",
 883                                              XML_ErrorString(
 884                                                      XML_GetErrorCode(parser)));
 885                        }
 886                } else {
 887                        ls.rc = -1;
 888                }
 889        } else {
 890                ls.rc = error("Unable to start PROPFIND request");
 891        }
 892
 893        free(ls.path);
 894        free(url);
 895        free(out_data);
 896        free(in_buffer.buffer);
 897        curl_slist_free_all(dav_headers);
 898
 899        return ls.rc;
 900}
 901
 902static void process_ls_pack(struct remote_ls_ctx *ls)
 903{
 904        unsigned char sha1[20];
 905
 906        if (strlen(ls->dentry_name) == 63 &&
 907            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 908            has_extension(ls->dentry_name, ".pack")) {
 909                get_sha1_hex(ls->dentry_name + 18, sha1);
 910                setup_index(ls->repo, sha1);
 911        }
 912}
 913#endif
 914
 915static int fetch_indices(struct alt_base *repo)
 916{
 917        unsigned char sha1[20];
 918        char *url;
 919        struct buffer buffer;
 920        char *data;
 921        int i = 0;
 922
 923        struct active_request_slot *slot;
 924        struct slot_results results;
 925
 926        if (repo->got_indices)
 927                return 0;
 928
 929        data = xmalloc(4096);
 930        buffer.size = 4096;
 931        buffer.posn = 0;
 932        buffer.buffer = data;
 933
 934        if (get_verbosely)
 935                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 936
 937#ifndef NO_EXPAT
 938        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 939                      process_ls_pack, NULL) == 0)
 940                return 0;
 941#endif
 942
 943        url = xmalloc(strlen(repo->base) + 21);
 944        sprintf(url, "%s/objects/info/packs", repo->base);
 945
 946        slot = get_active_slot();
 947        slot->results = &results;
 948        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 949        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 950        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 951        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 952        if (start_active_slot(slot)) {
 953                run_active_slot(slot);
 954                if (results.curl_result != CURLE_OK) {
 955                        if (missing_target(&results)) {
 956                                repo->got_indices = 1;
 957                                free(buffer.buffer);
 958                                return 0;
 959                        } else {
 960                                repo->got_indices = 0;
 961                                free(buffer.buffer);
 962                                return error("%s", curl_errorstr);
 963                        }
 964                }
 965        } else {
 966                repo->got_indices = 0;
 967                free(buffer.buffer);
 968                return error("Unable to start request");
 969        }
 970
 971        data = buffer.buffer;
 972        while (i < buffer.posn) {
 973                switch (data[i]) {
 974                case 'P':
 975                        i++;
 976                        if (i + 52 <= buffer.posn &&
 977                            !strncmp(data + i, " pack-", 6) &&
 978                            !strncmp(data + i + 46, ".pack\n", 6)) {
 979                                get_sha1_hex(data + i + 6, sha1);
 980                                setup_index(repo, sha1);
 981                                i += 51;
 982                                break;
 983                        }
 984                default:
 985                        while (i < buffer.posn && data[i] != '\n')
 986                                i++;
 987                }
 988                i++;
 989        }
 990
 991        free(buffer.buffer);
 992        repo->got_indices = 1;
 993        return 0;
 994}
 995
 996static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 997{
 998        char *url;
 999        struct packed_git *target;
1000        struct packed_git **lst;
1001        FILE *packfile;
1002        char *filename;
1003        char tmpfile[PATH_MAX];
1004        int ret;
1005        long prev_posn = 0;
1006        char range[RANGE_HEADER_SIZE];
1007        struct curl_slist *range_header = NULL;
1008
1009        struct active_request_slot *slot;
1010        struct slot_results results;
1011
1012        if (fetch_indices(repo))
1013                return -1;
1014        target = find_sha1_pack(sha1, repo->packs);
1015        if (!target)
1016                return -1;
1017
1018        if (get_verbosely) {
1019                fprintf(stderr, "Getting pack %s\n",
1020                        sha1_to_hex(target->sha1));
1021                fprintf(stderr, " which contains %s\n",
1022                        sha1_to_hex(sha1));
1023        }
1024
1025        url = xmalloc(strlen(repo->base) + 65);
1026        sprintf(url, "%s/objects/pack/pack-%s.pack",
1027                repo->base, sha1_to_hex(target->sha1));
1028
1029        filename = sha1_pack_name(target->sha1);
1030        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1031        packfile = fopen(tmpfile, "a");
1032        if (!packfile)
1033                return error("Unable to open local file %s for pack",
1034                             filename);
1035
1036        slot = get_active_slot();
1037        slot->results = &results;
1038        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1039        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1040        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1041        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1042        slot->local = packfile;
1043
1044        /* If there is data present from a previous transfer attempt,
1045           resume where it left off */
1046        prev_posn = ftell(packfile);
1047        if (prev_posn>0) {
1048                if (get_verbosely)
1049                        fprintf(stderr,
1050                                "Resuming fetch of pack %s at byte %ld\n",
1051                                sha1_to_hex(target->sha1), prev_posn);
1052                sprintf(range, "Range: bytes=%ld-", prev_posn);
1053                range_header = curl_slist_append(range_header, range);
1054                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1055        }
1056
1057        if (start_active_slot(slot)) {
1058                run_active_slot(slot);
1059                if (results.curl_result != CURLE_OK) {
1060                        fclose(packfile);
1061                        return error("Unable to get pack file %s\n%s", url,
1062                                     curl_errorstr);
1063                }
1064        } else {
1065                fclose(packfile);
1066                return error("Unable to start request");
1067        }
1068
1069        fclose(packfile);
1070
1071        ret = move_temp_to_file(tmpfile, filename);
1072        if (ret)
1073                return ret;
1074
1075        lst = &repo->packs;
1076        while (*lst != target)
1077                lst = &((*lst)->next);
1078        *lst = (*lst)->next;
1079
1080        if (verify_pack(target, 0))
1081                return -1;
1082        install_packed_git(target);
1083
1084        return 0;
1085}
1086
1087static void abort_object_request(struct object_request *obj_req)
1088{
1089        if (obj_req->local >= 0) {
1090                close(obj_req->local);
1091                obj_req->local = -1;
1092        }
1093        unlink(obj_req->tmpfile);
1094        if (obj_req->slot) {
1095                release_active_slot(obj_req->slot);
1096                obj_req->slot = NULL;
1097        }
1098        release_object_request(obj_req);
1099}
1100
1101static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1102{
1103        char *hex = sha1_to_hex(sha1);
1104        int ret = 0;
1105        struct object_request *obj_req = object_queue_head;
1106
1107        while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1108                obj_req = obj_req->next;
1109        if (obj_req == NULL)
1110                return error("Couldn't find request for %s in the queue", hex);
1111
1112        if (has_sha1_file(obj_req->sha1)) {
1113                abort_object_request(obj_req);
1114                return 0;
1115        }
1116
1117#ifdef USE_CURL_MULTI
1118        while (obj_req->state == WAITING) {
1119                step_active_slots();
1120        }
1121#else
1122        start_object_request(obj_req);
1123#endif
1124
1125        while (obj_req->state == ACTIVE) {
1126                run_active_slot(obj_req->slot);
1127        }
1128        if (obj_req->local != -1) {
1129                close(obj_req->local); obj_req->local = -1;
1130        }
1131
1132        if (obj_req->state == ABORTED) {
1133                ret = error("Request for %s aborted", hex);
1134        } else if (obj_req->curl_result != CURLE_OK &&
1135                   obj_req->http_code != 416) {
1136                if (missing_target(obj_req))
1137                        ret = -1; /* Be silent, it is probably in a pack. */
1138                else
1139                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1140                                    obj_req->errorstr, obj_req->curl_result,
1141                                    obj_req->http_code, hex);
1142        } else if (obj_req->zret != Z_STREAM_END) {
1143                corrupt_object_found++;
1144                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1145        } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1146                ret = error("File %s has bad hash", hex);
1147        } else if (obj_req->rename < 0) {
1148                ret = error("unable to write sha1 filename %s",
1149                            obj_req->filename);
1150        }
1151
1152        release_object_request(obj_req);
1153        return ret;
1154}
1155
1156int fetch(unsigned char *sha1)
1157{
1158        struct alt_base *altbase = alt;
1159
1160        if (!fetch_object(altbase, sha1))
1161                return 0;
1162        while (altbase) {
1163                if (!fetch_pack(altbase, sha1))
1164                        return 0;
1165                fetch_alternates(alt->base);
1166                altbase = altbase->next;
1167        }
1168        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1169                     alt->base);
1170}
1171
1172static inline int needs_quote(int ch)
1173{
1174        if (((ch >= 'A') && (ch <= 'Z'))
1175                        || ((ch >= 'a') && (ch <= 'z'))
1176                        || ((ch >= '0') && (ch <= '9'))
1177                        || (ch == '/')
1178                        || (ch == '-')
1179                        || (ch == '.'))
1180                return 0;
1181        return 1;
1182}
1183
1184static inline int hex(int v)
1185{
1186        if (v < 10) return '0' + v;
1187        else return 'A' + v - 10;
1188}
1189
1190static char *quote_ref_url(const char *base, const char *ref)
1191{
1192        const char *cp;
1193        char *dp, *qref;
1194        int len, baselen, ch;
1195
1196        baselen = strlen(base);
1197        len = baselen + 6; /* "refs/" + NUL */
1198        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1199                if (needs_quote(ch))
1200                        len += 2; /* extra two hex plus replacement % */
1201        qref = xmalloc(len);
1202        memcpy(qref, base, baselen);
1203        memcpy(qref + baselen, "refs/", 5);
1204        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1205                if (needs_quote(ch)) {
1206                        *dp++ = '%';
1207                        *dp++ = hex((ch >> 4) & 0xF);
1208                        *dp++ = hex(ch & 0xF);
1209                }
1210                else
1211                        *dp++ = ch;
1212        }
1213        *dp = 0;
1214
1215        return qref;
1216}
1217
1218int fetch_ref(char *ref, unsigned char *sha1)
1219{
1220        char *url;
1221        char hex[42];
1222        struct buffer buffer;
1223        const char *base = alt->base;
1224        struct active_request_slot *slot;
1225        struct slot_results results;
1226        buffer.size = 41;
1227        buffer.posn = 0;
1228        buffer.buffer = hex;
1229        hex[41] = '\0';
1230
1231        url = quote_ref_url(base, ref);
1232        slot = get_active_slot();
1233        slot->results = &results;
1234        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1235        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1236        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1237        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1238        if (start_active_slot(slot)) {
1239                run_active_slot(slot);
1240                if (results.curl_result != CURLE_OK)
1241                        return error("Couldn't get %s for %s\n%s",
1242                                     url, ref, curl_errorstr);
1243        } else {
1244                return error("Unable to start request");
1245        }
1246
1247        hex[40] = '\0';
1248        get_sha1_hex(hex, sha1);
1249        return 0;
1250}
1251
1252int main(int argc, const char **argv)
1253{
1254        int commits;
1255        const char **write_ref = NULL;
1256        char **commit_id;
1257        const char *url;
1258        char *path;
1259        int arg = 1;
1260        int rc = 0;
1261
1262        setup_ident();
1263        setup_git_directory();
1264        git_config(git_default_config);
1265
1266        while (arg < argc && argv[arg][0] == '-') {
1267                if (argv[arg][1] == 't') {
1268                        get_tree = 1;
1269                } else if (argv[arg][1] == 'c') {
1270                        get_history = 1;
1271                } else if (argv[arg][1] == 'a') {
1272                        get_all = 1;
1273                        get_tree = 1;
1274                        get_history = 1;
1275                } else if (argv[arg][1] == 'v') {
1276                        get_verbosely = 1;
1277                } else if (argv[arg][1] == 'w') {
1278                        write_ref = &argv[arg + 1];
1279                        arg++;
1280                } else if (!strcmp(argv[arg], "--recover")) {
1281                        get_recover = 1;
1282                } else if (!strcmp(argv[arg], "--stdin")) {
1283                        commits_on_stdin = 1;
1284                }
1285                arg++;
1286        }
1287        if (argc < arg + 2 - commits_on_stdin) {
1288                usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1289                return 1;
1290        }
1291        if (commits_on_stdin) {
1292                commits = pull_targets_stdin(&commit_id, &write_ref);
1293        } else {
1294                commit_id = (char **) &argv[arg++];
1295                commits = 1;
1296        }
1297        url = argv[arg];
1298
1299        http_init();
1300
1301        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1302
1303        alt = xmalloc(sizeof(*alt));
1304        alt->base = url;
1305        alt->got_indices = 0;
1306        alt->packs = NULL;
1307        alt->next = NULL;
1308        path = strstr(url, "//");
1309        if (path) {
1310                path = strchr(path+2, '/');
1311                if (path)
1312                        alt->path_len = strlen(path);
1313        }
1314
1315        if (pull(commits, commit_id, write_ref, url))
1316                rc = 1;
1317
1318        http_cleanup();
1319
1320        curl_slist_free_all(no_pragma_header);
1321
1322        if (commits_on_stdin)
1323                pull_targets_free(commits, commit_id, write_ref);
1324
1325        if (corrupt_object_found) {
1326                fprintf(stderr,
1327"Some loose object were found to be corrupt, but they might be just\n"
1328"a false '404 Not Found' error message sent with incorrect HTTP\n"
1329"status code.  Suggest running git fsck-objects.\n");
1330        }
1331        return rc;
1332}