http-fetch.con commit http-fetch.c: consolidate code to detect missing fetch target (be4a015)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#ifndef NO_EXPAT
   8#include <expat.h>
   9
  10/* Definitions for DAV requests */
  11#define DAV_PROPFIND "PROPFIND"
  12#define DAV_PROPFIND_RESP ".multistatus.response"
  13#define DAV_PROPFIND_NAME ".multistatus.response.href"
  14#define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
  15#define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
  16
  17/* Definitions for processing XML DAV responses */
  18#ifndef XML_STATUS_OK
  19enum XML_Status {
  20  XML_STATUS_OK = 1,
  21  XML_STATUS_ERROR = 0
  22};
  23#define XML_STATUS_OK    1
  24#define XML_STATUS_ERROR 0
  25#endif
  26
  27/* Flags that control remote_ls processing */
  28#define PROCESS_FILES (1u << 0)
  29#define PROCESS_DIRS  (1u << 1)
  30#define RECURSIVE     (1u << 2)
  31
  32/* Flags that remote_ls passes to callback functions */
  33#define IS_DIR (1u << 0)
  34#endif
  35
  36#define PREV_BUF_SIZE 4096
  37#define RANGE_HEADER_SIZE 30
  38
  39static int commits_on_stdin;
  40
  41static int got_alternates = -1;
  42static int corrupt_object_found;
  43
  44static struct curl_slist *no_pragma_header;
  45
  46struct alt_base
  47{
  48        const char *base;
  49        int path_len;
  50        int got_indices;
  51        struct packed_git *packs;
  52        struct alt_base *next;
  53};
  54
  55static struct alt_base *alt;
  56
  57enum object_request_state {
  58        WAITING,
  59        ABORTED,
  60        ACTIVE,
  61        COMPLETE,
  62};
  63
  64struct object_request
  65{
  66        unsigned char sha1[20];
  67        struct alt_base *repo;
  68        char *url;
  69        char filename[PATH_MAX];
  70        char tmpfile[PATH_MAX];
  71        int local;
  72        enum object_request_state state;
  73        CURLcode curl_result;
  74        char errorstr[CURL_ERROR_SIZE];
  75        long http_code;
  76        unsigned char real_sha1[20];
  77        SHA_CTX c;
  78        z_stream stream;
  79        int zret;
  80        int rename;
  81        struct active_request_slot *slot;
  82        struct object_request *next;
  83};
  84
  85struct alternates_request {
  86        const char *base;
  87        char *url;
  88        struct buffer *buffer;
  89        struct active_request_slot *slot;
  90        int http_specific;
  91};
  92
  93#ifndef NO_EXPAT
  94struct xml_ctx
  95{
  96        char *name;
  97        int len;
  98        char *cdata;
  99        void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
 100        void *userData;
 101};
 102
 103struct remote_ls_ctx
 104{
 105        struct alt_base *repo;
 106        char *path;
 107        void (*userFunc)(struct remote_ls_ctx *ls);
 108        void *userData;
 109        int flags;
 110        char *dentry_name;
 111        int dentry_flags;
 112        int rc;
 113        struct remote_ls_ctx *parent;
 114};
 115#endif
 116
 117static struct object_request *object_queue_head;
 118
 119static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
 120                               void *data)
 121{
 122        unsigned char expn[4096];
 123        size_t size = eltsize * nmemb;
 124        int posn = 0;
 125        struct object_request *obj_req = (struct object_request *)data;
 126        do {
 127                ssize_t retval = write(obj_req->local,
 128                                       (char *) ptr + posn, size - posn);
 129                if (retval < 0)
 130                        return posn;
 131                posn += retval;
 132        } while (posn < size);
 133
 134        obj_req->stream.avail_in = size;
 135        obj_req->stream.next_in = ptr;
 136        do {
 137                obj_req->stream.next_out = expn;
 138                obj_req->stream.avail_out = sizeof(expn);
 139                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
 140                SHA1_Update(&obj_req->c, expn,
 141                            sizeof(expn) - obj_req->stream.avail_out);
 142        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
 143        data_received++;
 144        return size;
 145}
 146
 147static int missing__target(int code, int result)
 148{
 149        return  /* file:// URL -- do we ever use one??? */
 150                (result == CURLE_FILE_COULDNT_READ_FILE) ||
 151                /* http:// and https:// URL */
 152                (code == 404 && result == CURLE_HTTP_RETURNED_ERROR)
 153                ;
 154}
 155
 156#define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
 157
 158static void fetch_alternates(const char *base);
 159
 160static void process_object_response(void *callback_data);
 161
 162static void start_object_request(struct object_request *obj_req)
 163{
 164        char *hex = sha1_to_hex(obj_req->sha1);
 165        char prevfile[PATH_MAX];
 166        char *url;
 167        char *posn;
 168        int prevlocal;
 169        unsigned char prev_buf[PREV_BUF_SIZE];
 170        ssize_t prev_read = 0;
 171        long prev_posn = 0;
 172        char range[RANGE_HEADER_SIZE];
 173        struct curl_slist *range_header = NULL;
 174        struct active_request_slot *slot;
 175
 176        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 177        unlink(prevfile);
 178        rename(obj_req->tmpfile, prevfile);
 179        unlink(obj_req->tmpfile);
 180
 181        if (obj_req->local != -1)
 182                error("fd leakage in start: %d", obj_req->local);
 183        obj_req->local = open(obj_req->tmpfile,
 184                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 185        /* This could have failed due to the "lazy directory creation";
 186         * try to mkdir the last path component.
 187         */
 188        if (obj_req->local < 0 && errno == ENOENT) {
 189                char *dir = strrchr(obj_req->tmpfile, '/');
 190                if (dir) {
 191                        *dir = 0;
 192                        mkdir(obj_req->tmpfile, 0777);
 193                        *dir = '/';
 194                }
 195                obj_req->local = open(obj_req->tmpfile,
 196                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 197        }
 198
 199        if (obj_req->local < 0) {
 200                obj_req->state = ABORTED;
 201                error("Couldn't create temporary file %s for %s: %s",
 202                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 203                return;
 204        }
 205
 206        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 207
 208        inflateInit(&obj_req->stream);
 209
 210        SHA1_Init(&obj_req->c);
 211
 212        url = xmalloc(strlen(obj_req->repo->base) + 50);
 213        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 214        strcpy(url, obj_req->repo->base);
 215        posn = url + strlen(obj_req->repo->base);
 216        strcpy(posn, "objects/");
 217        posn += 8;
 218        memcpy(posn, hex, 2);
 219        posn += 2;
 220        *(posn++) = '/';
 221        strcpy(posn, hex + 2);
 222        strcpy(obj_req->url, url);
 223
 224        /* If a previous temp file is present, process what was already
 225           fetched. */
 226        prevlocal = open(prevfile, O_RDONLY);
 227        if (prevlocal != -1) {
 228                do {
 229                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 230                        if (prev_read>0) {
 231                                if (fwrite_sha1_file(prev_buf,
 232                                                     1,
 233                                                     prev_read,
 234                                                     obj_req) == prev_read) {
 235                                        prev_posn += prev_read;
 236                                } else {
 237                                        prev_read = -1;
 238                                }
 239                        }
 240                } while (prev_read > 0);
 241                close(prevlocal);
 242        }
 243        unlink(prevfile);
 244
 245        /* Reset inflate/SHA1 if there was an error reading the previous temp
 246           file; also rewind to the beginning of the local file. */
 247        if (prev_read == -1) {
 248                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 249                inflateInit(&obj_req->stream);
 250                SHA1_Init(&obj_req->c);
 251                if (prev_posn>0) {
 252                        prev_posn = 0;
 253                        lseek(obj_req->local, SEEK_SET, 0);
 254                        ftruncate(obj_req->local, 0);
 255                }
 256        }
 257
 258        slot = get_active_slot();
 259        slot->callback_func = process_object_response;
 260        slot->callback_data = obj_req;
 261        obj_req->slot = slot;
 262
 263        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 264        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 265        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 266        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 267        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 268
 269        /* If we have successfully processed data from a previous fetch
 270           attempt, only fetch the data we don't already have. */
 271        if (prev_posn>0) {
 272                if (get_verbosely)
 273                        fprintf(stderr,
 274                                "Resuming fetch of object %s at byte %ld\n",
 275                                hex, prev_posn);
 276                sprintf(range, "Range: bytes=%ld-", prev_posn);
 277                range_header = curl_slist_append(range_header, range);
 278                curl_easy_setopt(slot->curl,
 279                                 CURLOPT_HTTPHEADER, range_header);
 280        }
 281
 282        /* Try to get the request started, abort the request on error */
 283        obj_req->state = ACTIVE;
 284        if (!start_active_slot(slot)) {
 285                obj_req->state = ABORTED;
 286                obj_req->slot = NULL;
 287                close(obj_req->local); obj_req->local = -1;
 288                free(obj_req->url);
 289                return;
 290        }
 291}
 292
 293static void finish_object_request(struct object_request *obj_req)
 294{
 295        struct stat st;
 296
 297        fchmod(obj_req->local, 0444);
 298        close(obj_req->local); obj_req->local = -1;
 299
 300        if (obj_req->http_code == 416) {
 301                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 302        } else if (obj_req->curl_result != CURLE_OK) {
 303                if (stat(obj_req->tmpfile, &st) == 0)
 304                        if (st.st_size == 0)
 305                                unlink(obj_req->tmpfile);
 306                return;
 307        }
 308
 309        inflateEnd(&obj_req->stream);
 310        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 311        if (obj_req->zret != Z_STREAM_END) {
 312                unlink(obj_req->tmpfile);
 313                return;
 314        }
 315        if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
 316                unlink(obj_req->tmpfile);
 317                return;
 318        }
 319        obj_req->rename =
 320                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 321
 322        if (obj_req->rename == 0)
 323                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 324}
 325
 326static void process_object_response(void *callback_data)
 327{
 328        struct object_request *obj_req =
 329                (struct object_request *)callback_data;
 330
 331        obj_req->curl_result = obj_req->slot->curl_result;
 332        obj_req->http_code = obj_req->slot->http_code;
 333        obj_req->slot = NULL;
 334        obj_req->state = COMPLETE;
 335
 336        /* Use alternates if necessary */
 337        if (missing_target(obj_req)) {
 338                fetch_alternates(alt->base);
 339                if (obj_req->repo->next != NULL) {
 340                        obj_req->repo =
 341                                obj_req->repo->next;
 342                        close(obj_req->local);
 343                        obj_req->local = -1;
 344                        start_object_request(obj_req);
 345                        return;
 346                }
 347        }
 348
 349        finish_object_request(obj_req);
 350}
 351
 352static void release_object_request(struct object_request *obj_req)
 353{
 354        struct object_request *entry = object_queue_head;
 355
 356        if (obj_req->local != -1)
 357                error("fd leakage in release: %d", obj_req->local);
 358        if (obj_req == object_queue_head) {
 359                object_queue_head = obj_req->next;
 360        } else {
 361                while (entry->next != NULL && entry->next != obj_req)
 362                        entry = entry->next;
 363                if (entry->next == obj_req)
 364                        entry->next = entry->next->next;
 365        }
 366
 367        free(obj_req->url);
 368        free(obj_req);
 369}
 370
 371#ifdef USE_CURL_MULTI
 372void fill_active_slots(void)
 373{
 374        struct object_request *obj_req = object_queue_head;
 375        struct active_request_slot *slot = active_queue_head;
 376        int num_transfers;
 377
 378        while (active_requests < max_requests && obj_req != NULL) {
 379                if (obj_req->state == WAITING) {
 380                        if (has_sha1_file(obj_req->sha1))
 381                                obj_req->state = COMPLETE;
 382                        else
 383                                start_object_request(obj_req);
 384                        curl_multi_perform(curlm, &num_transfers);
 385                }
 386                obj_req = obj_req->next;
 387        }
 388
 389        while (slot != NULL) {
 390                if (!slot->in_use && slot->curl != NULL) {
 391                        curl_easy_cleanup(slot->curl);
 392                        slot->curl = NULL;
 393                }
 394                slot = slot->next;
 395        }
 396}
 397#endif
 398
 399void prefetch(unsigned char *sha1)
 400{
 401        struct object_request *newreq;
 402        struct object_request *tail;
 403        char *filename = sha1_file_name(sha1);
 404
 405        newreq = xmalloc(sizeof(*newreq));
 406        hashcpy(newreq->sha1, sha1);
 407        newreq->repo = alt;
 408        newreq->url = NULL;
 409        newreq->local = -1;
 410        newreq->state = WAITING;
 411        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 412        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 413                 "%s.temp", filename);
 414        newreq->slot = NULL;
 415        newreq->next = NULL;
 416
 417        if (object_queue_head == NULL) {
 418                object_queue_head = newreq;
 419        } else {
 420                tail = object_queue_head;
 421                while (tail->next != NULL) {
 422                        tail = tail->next;
 423                }
 424                tail->next = newreq;
 425        }
 426
 427#ifdef USE_CURL_MULTI
 428        fill_active_slots();
 429        step_active_slots();
 430#endif
 431}
 432
 433static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 434{
 435        char *hex = sha1_to_hex(sha1);
 436        char *filename;
 437        char *url;
 438        char tmpfile[PATH_MAX];
 439        long prev_posn = 0;
 440        char range[RANGE_HEADER_SIZE];
 441        struct curl_slist *range_header = NULL;
 442
 443        FILE *indexfile;
 444        struct active_request_slot *slot;
 445        struct slot_results results;
 446
 447        if (has_pack_index(sha1))
 448                return 0;
 449
 450        if (get_verbosely)
 451                fprintf(stderr, "Getting index for pack %s\n", hex);
 452
 453        url = xmalloc(strlen(repo->base) + 64);
 454        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 455
 456        filename = sha1_pack_index_name(sha1);
 457        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 458        indexfile = fopen(tmpfile, "a");
 459        if (!indexfile)
 460                return error("Unable to open local file %s for pack index",
 461                             filename);
 462
 463        slot = get_active_slot();
 464        slot->results = &results;
 465        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 466        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 467        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 468        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 469        slot->local = indexfile;
 470
 471        /* If there is data present from a previous transfer attempt,
 472           resume where it left off */
 473        prev_posn = ftell(indexfile);
 474        if (prev_posn>0) {
 475                if (get_verbosely)
 476                        fprintf(stderr,
 477                                "Resuming fetch of index for pack %s at byte %ld\n",
 478                                hex, prev_posn);
 479                sprintf(range, "Range: bytes=%ld-", prev_posn);
 480                range_header = curl_slist_append(range_header, range);
 481                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 482        }
 483
 484        if (start_active_slot(slot)) {
 485                run_active_slot(slot);
 486                if (results.curl_result != CURLE_OK) {
 487                        fclose(indexfile);
 488                        return error("Unable to get pack index %s\n%s", url,
 489                                     curl_errorstr);
 490                }
 491        } else {
 492                fclose(indexfile);
 493                return error("Unable to start request");
 494        }
 495
 496        fclose(indexfile);
 497
 498        return move_temp_to_file(tmpfile, filename);
 499}
 500
 501static int setup_index(struct alt_base *repo, unsigned char *sha1)
 502{
 503        struct packed_git *new_pack;
 504        if (has_pack_file(sha1))
 505                return 0; /* don't list this as something we can get */
 506
 507        if (fetch_index(repo, sha1))
 508                return -1;
 509
 510        new_pack = parse_pack_index(sha1);
 511        new_pack->next = repo->packs;
 512        repo->packs = new_pack;
 513        return 0;
 514}
 515
 516static void process_alternates_response(void *callback_data)
 517{
 518        struct alternates_request *alt_req =
 519                (struct alternates_request *)callback_data;
 520        struct active_request_slot *slot = alt_req->slot;
 521        struct alt_base *tail = alt;
 522        const char *base = alt_req->base;
 523        static const char null_byte = '\0';
 524        char *data;
 525        int i = 0;
 526
 527        if (alt_req->http_specific) {
 528                if (slot->curl_result != CURLE_OK ||
 529                    !alt_req->buffer->posn) {
 530
 531                        /* Try reusing the slot to get non-http alternates */
 532                        alt_req->http_specific = 0;
 533                        sprintf(alt_req->url, "%s/objects/info/alternates",
 534                                base);
 535                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 536                                         alt_req->url);
 537                        active_requests++;
 538                        slot->in_use = 1;
 539                        if (slot->finished != NULL)
 540                                (*slot->finished) = 0;
 541                        if (!start_active_slot(slot)) {
 542                                got_alternates = -1;
 543                                slot->in_use = 0;
 544                                if (slot->finished != NULL)
 545                                        (*slot->finished) = 1;
 546                        }
 547                        return;
 548                }
 549        } else if (slot->curl_result != CURLE_OK) {
 550                if (!missing_target(slot)) {
 551                        got_alternates = -1;
 552                        return;
 553                }
 554        }
 555
 556        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 557        alt_req->buffer->posn--;
 558        data = alt_req->buffer->buffer;
 559
 560        while (i < alt_req->buffer->posn) {
 561                int posn = i;
 562                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 563                        posn++;
 564                if (data[posn] == '\n') {
 565                        int okay = 0;
 566                        int serverlen = 0;
 567                        struct alt_base *newalt;
 568                        char *target = NULL;
 569                        char *path;
 570                        if (data[i] == '/') {
 571                                /* This counts
 572                                 * http://git.host/pub/scm/linux.git/
 573                                 * -----------here^
 574                                 * so memcpy(dst, base, serverlen) will
 575                                 * copy up to "...git.host".
 576                                 */
 577                                const char *colon_ss = strstr(base,"://");
 578                                if (colon_ss) {
 579                                        serverlen = (strchr(colon_ss + 3, '/')
 580                                                     - base);
 581                                        okay = 1;
 582                                }
 583                        } else if (!memcmp(data + i, "../", 3)) {
 584                                /* Relative URL; chop the corresponding
 585                                 * number of subpath from base (and ../
 586                                 * from data), and concatenate the result.
 587                                 *
 588                                 * The code first drops ../ from data, and
 589                                 * then drops one ../ from data and one path
 590                                 * from base.  IOW, one extra ../ is dropped
 591                                 * from data than path is dropped from base.
 592                                 *
 593                                 * This is not wrong.  The alternate in
 594                                 *     http://git.host/pub/scm/linux.git/
 595                                 * to borrow from
 596                                 *     http://git.host/pub/scm/linus.git/
 597                                 * is ../../linus.git/objects/.  You need
 598                                 * two ../../ to borrow from your direct
 599                                 * neighbour.
 600                                 */
 601                                i += 3;
 602                                serverlen = strlen(base);
 603                                while (i + 2 < posn &&
 604                                       !memcmp(data + i, "../", 3)) {
 605                                        do {
 606                                                serverlen--;
 607                                        } while (serverlen &&
 608                                                 base[serverlen - 1] != '/');
 609                                        i += 3;
 610                                }
 611                                /* If the server got removed, give up. */
 612                                okay = strchr(base, ':') - base + 3 <
 613                                        serverlen;
 614                        } else if (alt_req->http_specific) {
 615                                char *colon = strchr(data + i, ':');
 616                                char *slash = strchr(data + i, '/');
 617                                if (colon && slash && colon < data + posn &&
 618                                    slash < data + posn && colon < slash) {
 619                                        okay = 1;
 620                                }
 621                        }
 622                        /* skip "objects\n" at end */
 623                        if (okay) {
 624                                target = xmalloc(serverlen + posn - i - 6);
 625                                memcpy(target, base, serverlen);
 626                                memcpy(target + serverlen, data + i,
 627                                       posn - i - 7);
 628                                target[serverlen + posn - i - 7] = 0;
 629                                if (get_verbosely)
 630                                        fprintf(stderr,
 631                                                "Also look at %s\n", target);
 632                                newalt = xmalloc(sizeof(*newalt));
 633                                newalt->next = NULL;
 634                                newalt->base = target;
 635                                newalt->got_indices = 0;
 636                                newalt->packs = NULL;
 637                                path = strstr(target, "//");
 638                                if (path) {
 639                                        path = strchr(path+2, '/');
 640                                        if (path)
 641                                                newalt->path_len = strlen(path);
 642                                }
 643
 644                                while (tail->next != NULL)
 645                                        tail = tail->next;
 646                                tail->next = newalt;
 647                        }
 648                }
 649                i = posn + 1;
 650        }
 651
 652        got_alternates = 1;
 653}
 654
 655static void fetch_alternates(const char *base)
 656{
 657        struct buffer buffer;
 658        char *url;
 659        char *data;
 660        struct active_request_slot *slot;
 661        struct alternates_request alt_req;
 662
 663        /* If another request has already started fetching alternates,
 664           wait for them to arrive and return to processing this request's
 665           curl message */
 666#ifdef USE_CURL_MULTI
 667        while (got_alternates == 0) {
 668                step_active_slots();
 669        }
 670#endif
 671
 672        /* Nothing to do if they've already been fetched */
 673        if (got_alternates == 1)
 674                return;
 675
 676        /* Start the fetch */
 677        got_alternates = 0;
 678
 679        data = xmalloc(4096);
 680        buffer.size = 4096;
 681        buffer.posn = 0;
 682        buffer.buffer = data;
 683
 684        if (get_verbosely)
 685                fprintf(stderr, "Getting alternates list for %s\n", base);
 686
 687        url = xmalloc(strlen(base) + 31);
 688        sprintf(url, "%s/objects/info/http-alternates", base);
 689
 690        /* Use a callback to process the result, since another request
 691           may fail and need to have alternates loaded before continuing */
 692        slot = get_active_slot();
 693        slot->callback_func = process_alternates_response;
 694        slot->callback_data = &alt_req;
 695
 696        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 697        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 698        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 699
 700        alt_req.base = base;
 701        alt_req.url = url;
 702        alt_req.buffer = &buffer;
 703        alt_req.http_specific = 1;
 704        alt_req.slot = slot;
 705
 706        if (start_active_slot(slot))
 707                run_active_slot(slot);
 708        else
 709                got_alternates = -1;
 710
 711        free(data);
 712        free(url);
 713}
 714
 715#ifndef NO_EXPAT
 716static void
 717xml_start_tag(void *userData, const char *name, const char **atts)
 718{
 719        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 720        const char *c = strchr(name, ':');
 721        int new_len;
 722
 723        if (c == NULL)
 724                c = name;
 725        else
 726                c++;
 727
 728        new_len = strlen(ctx->name) + strlen(c) + 2;
 729
 730        if (new_len > ctx->len) {
 731                ctx->name = xrealloc(ctx->name, new_len);
 732                ctx->len = new_len;
 733        }
 734        strcat(ctx->name, ".");
 735        strcat(ctx->name, c);
 736
 737        free(ctx->cdata);
 738        ctx->cdata = NULL;
 739
 740        ctx->userFunc(ctx, 0);
 741}
 742
 743static void
 744xml_end_tag(void *userData, const char *name)
 745{
 746        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 747        const char *c = strchr(name, ':');
 748        char *ep;
 749
 750        ctx->userFunc(ctx, 1);
 751
 752        if (c == NULL)
 753                c = name;
 754        else
 755                c++;
 756
 757        ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
 758        *ep = 0;
 759}
 760
 761static void
 762xml_cdata(void *userData, const XML_Char *s, int len)
 763{
 764        struct xml_ctx *ctx = (struct xml_ctx *)userData;
 765        free(ctx->cdata);
 766        ctx->cdata = xmalloc(len + 1);
 767        strlcpy(ctx->cdata, s, len + 1);
 768}
 769
 770static int remote_ls(struct alt_base *repo, const char *path, int flags,
 771                     void (*userFunc)(struct remote_ls_ctx *ls),
 772                     void *userData);
 773
 774static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
 775{
 776        struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
 777
 778        if (tag_closed) {
 779                if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
 780                        if (ls->dentry_flags & IS_DIR) {
 781                                if (ls->flags & PROCESS_DIRS) {
 782                                        ls->userFunc(ls);
 783                                }
 784                                if (strcmp(ls->dentry_name, ls->path) &&
 785                                    ls->flags & RECURSIVE) {
 786                                        ls->rc = remote_ls(ls->repo,
 787                                                           ls->dentry_name,
 788                                                           ls->flags,
 789                                                           ls->userFunc,
 790                                                           ls->userData);
 791                                }
 792                        } else if (ls->flags & PROCESS_FILES) {
 793                                ls->userFunc(ls);
 794                        }
 795                } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
 796                        ls->dentry_name = xmalloc(strlen(ctx->cdata) -
 797                                                  ls->repo->path_len + 1);
 798                        strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
 799                } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
 800                        ls->dentry_flags |= IS_DIR;
 801                }
 802        } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
 803                free(ls->dentry_name);
 804                ls->dentry_name = NULL;
 805                ls->dentry_flags = 0;
 806        }
 807}
 808
 809static int remote_ls(struct alt_base *repo, const char *path, int flags,
 810                     void (*userFunc)(struct remote_ls_ctx *ls),
 811                     void *userData)
 812{
 813        char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
 814        struct active_request_slot *slot;
 815        struct slot_results results;
 816        struct buffer in_buffer;
 817        struct buffer out_buffer;
 818        char *in_data;
 819        char *out_data;
 820        XML_Parser parser = XML_ParserCreate(NULL);
 821        enum XML_Status result;
 822        struct curl_slist *dav_headers = NULL;
 823        struct xml_ctx ctx;
 824        struct remote_ls_ctx ls;
 825
 826        ls.flags = flags;
 827        ls.repo = repo;
 828        ls.path = xstrdup(path);
 829        ls.dentry_name = NULL;
 830        ls.dentry_flags = 0;
 831        ls.userData = userData;
 832        ls.userFunc = userFunc;
 833        ls.rc = 0;
 834
 835        sprintf(url, "%s%s", repo->base, path);
 836
 837        out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
 838        out_data = xmalloc(out_buffer.size + 1);
 839        snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
 840        out_buffer.posn = 0;
 841        out_buffer.buffer = out_data;
 842
 843        in_buffer.size = 4096;
 844        in_data = xmalloc(in_buffer.size);
 845        in_buffer.posn = 0;
 846        in_buffer.buffer = in_data;
 847
 848        dav_headers = curl_slist_append(dav_headers, "Depth: 1");
 849        dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
 850
 851        slot = get_active_slot();
 852        slot->results = &results;
 853        curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
 854        curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
 855        curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
 856        curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
 857        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 858        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 859        curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
 860        curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
 861        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
 862
 863        if (start_active_slot(slot)) {
 864                run_active_slot(slot);
 865                if (results.curl_result == CURLE_OK) {
 866                        ctx.name = xcalloc(10, 1);
 867                        ctx.len = 0;
 868                        ctx.cdata = NULL;
 869                        ctx.userFunc = handle_remote_ls_ctx;
 870                        ctx.userData = &ls;
 871                        XML_SetUserData(parser, &ctx);
 872                        XML_SetElementHandler(parser, xml_start_tag,
 873                                              xml_end_tag);
 874                        XML_SetCharacterDataHandler(parser, xml_cdata);
 875                        result = XML_Parse(parser, in_buffer.buffer,
 876                                           in_buffer.posn, 1);
 877                        free(ctx.name);
 878
 879                        if (result != XML_STATUS_OK) {
 880                                ls.rc = error("XML error: %s",
 881                                              XML_ErrorString(
 882                                                      XML_GetErrorCode(parser)));
 883                        }
 884                } else {
 885                        ls.rc = -1;
 886                }
 887        } else {
 888                ls.rc = error("Unable to start PROPFIND request");
 889        }
 890
 891        free(ls.path);
 892        free(url);
 893        free(out_data);
 894        free(in_buffer.buffer);
 895        curl_slist_free_all(dav_headers);
 896
 897        return ls.rc;
 898}
 899
 900static void process_ls_pack(struct remote_ls_ctx *ls)
 901{
 902        unsigned char sha1[20];
 903
 904        if (strlen(ls->dentry_name) == 63 &&
 905            !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
 906            has_extension(ls->dentry_name, ".pack")) {
 907                get_sha1_hex(ls->dentry_name + 18, sha1);
 908                setup_index(ls->repo, sha1);
 909        }
 910}
 911#endif
 912
 913static int fetch_indices(struct alt_base *repo)
 914{
 915        unsigned char sha1[20];
 916        char *url;
 917        struct buffer buffer;
 918        char *data;
 919        int i = 0;
 920
 921        struct active_request_slot *slot;
 922        struct slot_results results;
 923
 924        if (repo->got_indices)
 925                return 0;
 926
 927        data = xmalloc(4096);
 928        buffer.size = 4096;
 929        buffer.posn = 0;
 930        buffer.buffer = data;
 931
 932        if (get_verbosely)
 933                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 934
 935#ifndef NO_EXPAT
 936        if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
 937                      process_ls_pack, NULL) == 0)
 938                return 0;
 939#endif
 940
 941        url = xmalloc(strlen(repo->base) + 21);
 942        sprintf(url, "%s/objects/info/packs", repo->base);
 943
 944        slot = get_active_slot();
 945        slot->results = &results;
 946        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 947        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 948        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 949        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 950        if (start_active_slot(slot)) {
 951                run_active_slot(slot);
 952                if (results.curl_result != CURLE_OK) {
 953                        if (missing_target(&results)) {
 954                                repo->got_indices = 1;
 955                                free(buffer.buffer);
 956                                return 0;
 957                        } else {
 958                                repo->got_indices = 0;
 959                                free(buffer.buffer);
 960                                return error("%s", curl_errorstr);
 961                        }
 962                }
 963        } else {
 964                repo->got_indices = 0;
 965                free(buffer.buffer);
 966                return error("Unable to start request");
 967        }
 968
 969        data = buffer.buffer;
 970        while (i < buffer.posn) {
 971                switch (data[i]) {
 972                case 'P':
 973                        i++;
 974                        if (i + 52 <= buffer.posn &&
 975                            !strncmp(data + i, " pack-", 6) &&
 976                            !strncmp(data + i + 46, ".pack\n", 6)) {
 977                                get_sha1_hex(data + i + 6, sha1);
 978                                setup_index(repo, sha1);
 979                                i += 51;
 980                                break;
 981                        }
 982                default:
 983                        while (i < buffer.posn && data[i] != '\n')
 984                                i++;
 985                }
 986                i++;
 987        }
 988
 989        free(buffer.buffer);
 990        repo->got_indices = 1;
 991        return 0;
 992}
 993
 994static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 995{
 996        char *url;
 997        struct packed_git *target;
 998        struct packed_git **lst;
 999        FILE *packfile;
1000        char *filename;
1001        char tmpfile[PATH_MAX];
1002        int ret;
1003        long prev_posn = 0;
1004        char range[RANGE_HEADER_SIZE];
1005        struct curl_slist *range_header = NULL;
1006
1007        struct active_request_slot *slot;
1008        struct slot_results results;
1009
1010        if (fetch_indices(repo))
1011                return -1;
1012        target = find_sha1_pack(sha1, repo->packs);
1013        if (!target)
1014                return -1;
1015
1016        if (get_verbosely) {
1017                fprintf(stderr, "Getting pack %s\n",
1018                        sha1_to_hex(target->sha1));
1019                fprintf(stderr, " which contains %s\n",
1020                        sha1_to_hex(sha1));
1021        }
1022
1023        url = xmalloc(strlen(repo->base) + 65);
1024        sprintf(url, "%s/objects/pack/pack-%s.pack",
1025                repo->base, sha1_to_hex(target->sha1));
1026
1027        filename = sha1_pack_name(target->sha1);
1028        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1029        packfile = fopen(tmpfile, "a");
1030        if (!packfile)
1031                return error("Unable to open local file %s for pack",
1032                             filename);
1033
1034        slot = get_active_slot();
1035        slot->results = &results;
1036        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1037        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1038        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1039        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1040        slot->local = packfile;
1041
1042        /* If there is data present from a previous transfer attempt,
1043           resume where it left off */
1044        prev_posn = ftell(packfile);
1045        if (prev_posn>0) {
1046                if (get_verbosely)
1047                        fprintf(stderr,
1048                                "Resuming fetch of pack %s at byte %ld\n",
1049                                sha1_to_hex(target->sha1), prev_posn);
1050                sprintf(range, "Range: bytes=%ld-", prev_posn);
1051                range_header = curl_slist_append(range_header, range);
1052                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1053        }
1054
1055        if (start_active_slot(slot)) {
1056                run_active_slot(slot);
1057                if (results.curl_result != CURLE_OK) {
1058                        fclose(packfile);
1059                        return error("Unable to get pack file %s\n%s", url,
1060                                     curl_errorstr);
1061                }
1062        } else {
1063                fclose(packfile);
1064                return error("Unable to start request");
1065        }
1066
1067        fclose(packfile);
1068
1069        ret = move_temp_to_file(tmpfile, filename);
1070        if (ret)
1071                return ret;
1072
1073        lst = &repo->packs;
1074        while (*lst != target)
1075                lst = &((*lst)->next);
1076        *lst = (*lst)->next;
1077
1078        if (verify_pack(target, 0))
1079                return -1;
1080        install_packed_git(target);
1081
1082        return 0;
1083}
1084
1085static void abort_object_request(struct object_request *obj_req)
1086{
1087        if (obj_req->local >= 0) {
1088                close(obj_req->local);
1089                obj_req->local = -1;
1090        }
1091        unlink(obj_req->tmpfile);
1092        if (obj_req->slot) {
1093                release_active_slot(obj_req->slot);
1094                obj_req->slot = NULL;
1095        }
1096        release_object_request(obj_req);
1097}
1098
1099static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1100{
1101        char *hex = sha1_to_hex(sha1);
1102        int ret = 0;
1103        struct object_request *obj_req = object_queue_head;
1104
1105        while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1106                obj_req = obj_req->next;
1107        if (obj_req == NULL)
1108                return error("Couldn't find request for %s in the queue", hex);
1109
1110        if (has_sha1_file(obj_req->sha1)) {
1111                abort_object_request(obj_req);
1112                return 0;
1113        }
1114
1115#ifdef USE_CURL_MULTI
1116        while (obj_req->state == WAITING) {
1117                step_active_slots();
1118        }
1119#else
1120        start_object_request(obj_req);
1121#endif
1122
1123        while (obj_req->state == ACTIVE) {
1124                run_active_slot(obj_req->slot);
1125        }
1126        if (obj_req->local != -1) {
1127                close(obj_req->local); obj_req->local = -1;
1128        }
1129
1130        if (obj_req->state == ABORTED) {
1131                ret = error("Request for %s aborted", hex);
1132        } else if (obj_req->curl_result != CURLE_OK &&
1133                   obj_req->http_code != 416) {
1134                if (missing_target(obj_req))
1135                        ret = -1; /* Be silent, it is probably in a pack. */
1136                else
1137                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1138                                    obj_req->errorstr, obj_req->curl_result,
1139                                    obj_req->http_code, hex);
1140        } else if (obj_req->zret != Z_STREAM_END) {
1141                corrupt_object_found++;
1142                ret = error("File %s (%s) corrupt", hex, obj_req->url);
1143        } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1144                ret = error("File %s has bad hash", hex);
1145        } else if (obj_req->rename < 0) {
1146                ret = error("unable to write sha1 filename %s",
1147                            obj_req->filename);
1148        }
1149
1150        release_object_request(obj_req);
1151        return ret;
1152}
1153
1154int fetch(unsigned char *sha1)
1155{
1156        struct alt_base *altbase = alt;
1157
1158        if (!fetch_object(altbase, sha1))
1159                return 0;
1160        while (altbase) {
1161                if (!fetch_pack(altbase, sha1))
1162                        return 0;
1163                fetch_alternates(alt->base);
1164                altbase = altbase->next;
1165        }
1166        return error("Unable to find %s under %s", sha1_to_hex(sha1),
1167                     alt->base);
1168}
1169
1170static inline int needs_quote(int ch)
1171{
1172        if (((ch >= 'A') && (ch <= 'Z'))
1173                        || ((ch >= 'a') && (ch <= 'z'))
1174                        || ((ch >= '0') && (ch <= '9'))
1175                        || (ch == '/')
1176                        || (ch == '-')
1177                        || (ch == '.'))
1178                return 0;
1179        return 1;
1180}
1181
1182static inline int hex(int v)
1183{
1184        if (v < 10) return '0' + v;
1185        else return 'A' + v - 10;
1186}
1187
1188static char *quote_ref_url(const char *base, const char *ref)
1189{
1190        const char *cp;
1191        char *dp, *qref;
1192        int len, baselen, ch;
1193
1194        baselen = strlen(base);
1195        len = baselen + 6; /* "refs/" + NUL */
1196        for (cp = ref; (ch = *cp) != 0; cp++, len++)
1197                if (needs_quote(ch))
1198                        len += 2; /* extra two hex plus replacement % */
1199        qref = xmalloc(len);
1200        memcpy(qref, base, baselen);
1201        memcpy(qref + baselen, "refs/", 5);
1202        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1203                if (needs_quote(ch)) {
1204                        *dp++ = '%';
1205                        *dp++ = hex((ch >> 4) & 0xF);
1206                        *dp++ = hex(ch & 0xF);
1207                }
1208                else
1209                        *dp++ = ch;
1210        }
1211        *dp = 0;
1212
1213        return qref;
1214}
1215
1216int fetch_ref(char *ref, unsigned char *sha1)
1217{
1218        char *url;
1219        char hex[42];
1220        struct buffer buffer;
1221        const char *base = alt->base;
1222        struct active_request_slot *slot;
1223        struct slot_results results;
1224        buffer.size = 41;
1225        buffer.posn = 0;
1226        buffer.buffer = hex;
1227        hex[41] = '\0';
1228
1229        url = quote_ref_url(base, ref);
1230        slot = get_active_slot();
1231        slot->results = &results;
1232        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1233        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1234        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1235        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1236        if (start_active_slot(slot)) {
1237                run_active_slot(slot);
1238                if (results.curl_result != CURLE_OK)
1239                        return error("Couldn't get %s for %s\n%s",
1240                                     url, ref, curl_errorstr);
1241        } else {
1242                return error("Unable to start request");
1243        }
1244
1245        hex[40] = '\0';
1246        get_sha1_hex(hex, sha1);
1247        return 0;
1248}
1249
1250int main(int argc, const char **argv)
1251{
1252        int commits;
1253        const char **write_ref = NULL;
1254        char **commit_id;
1255        const char *url;
1256        char *path;
1257        int arg = 1;
1258        int rc = 0;
1259
1260        setup_ident();
1261        setup_git_directory();
1262        git_config(git_default_config);
1263
1264        while (arg < argc && argv[arg][0] == '-') {
1265                if (argv[arg][1] == 't') {
1266                        get_tree = 1;
1267                } else if (argv[arg][1] == 'c') {
1268                        get_history = 1;
1269                } else if (argv[arg][1] == 'a') {
1270                        get_all = 1;
1271                        get_tree = 1;
1272                        get_history = 1;
1273                } else if (argv[arg][1] == 'v') {
1274                        get_verbosely = 1;
1275                } else if (argv[arg][1] == 'w') {
1276                        write_ref = &argv[arg + 1];
1277                        arg++;
1278                } else if (!strcmp(argv[arg], "--recover")) {
1279                        get_recover = 1;
1280                } else if (!strcmp(argv[arg], "--stdin")) {
1281                        commits_on_stdin = 1;
1282                }
1283                arg++;
1284        }
1285        if (argc < arg + 2 - commits_on_stdin) {
1286                usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1287                return 1;
1288        }
1289        if (commits_on_stdin) {
1290                commits = pull_targets_stdin(&commit_id, &write_ref);
1291        } else {
1292                commit_id = (char **) &argv[arg++];
1293                commits = 1;
1294        }
1295        url = argv[arg];
1296
1297        http_init();
1298
1299        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1300
1301        alt = xmalloc(sizeof(*alt));
1302        alt->base = url;
1303        alt->got_indices = 0;
1304        alt->packs = NULL;
1305        alt->next = NULL;
1306        path = strstr(url, "//");
1307        if (path) {
1308                path = strchr(path+2, '/');
1309                if (path)
1310                        alt->path_len = strlen(path);
1311        }
1312
1313        if (pull(commits, commit_id, write_ref, url))
1314                rc = 1;
1315
1316        http_cleanup();
1317
1318        curl_slist_free_all(no_pragma_header);
1319
1320        if (commits_on_stdin)
1321                pull_targets_free(commits, commit_id, write_ref);
1322
1323        if (corrupt_object_found) {
1324                fprintf(stderr,
1325"Some loose object were found to be corrupt, but they might be just\n"
1326"a false '404 Not Found' error message sent with incorrect HTTP\n"
1327"status code.  Suggest running git fsck-objects.\n");
1328        }
1329        return rc;
1330}