connect.con commit clone: use git protocol for cloning shallow repo locally (0d7d285)
   1#include "git-compat-util.h"
   2#include "cache.h"
   3#include "pkt-line.h"
   4#include "quote.h"
   5#include "refs.h"
   6#include "run-command.h"
   7#include "remote.h"
   8#include "connect.h"
   9#include "url.h"
  10#include "string-list.h"
  11#include "sha1-array.h"
  12
  13static char *server_capabilities;
  14static const char *parse_feature_value(const char *, const char *, int *);
  15
  16static int check_ref(const char *name, int len, unsigned int flags)
  17{
  18        if (!flags)
  19                return 1;
  20
  21        if (len < 5 || memcmp(name, "refs/", 5))
  22                return 0;
  23
  24        /* Skip the "refs/" part */
  25        name += 5;
  26        len -= 5;
  27
  28        /* REF_NORMAL means that we don't want the magic fake tag refs */
  29        if ((flags & REF_NORMAL) && check_refname_format(name, 0))
  30                return 0;
  31
  32        /* REF_HEADS means that we want regular branch heads */
  33        if ((flags & REF_HEADS) && !memcmp(name, "heads/", 6))
  34                return 1;
  35
  36        /* REF_TAGS means that we want tags */
  37        if ((flags & REF_TAGS) && !memcmp(name, "tags/", 5))
  38                return 1;
  39
  40        /* All type bits clear means that we are ok with anything */
  41        return !(flags & ~REF_NORMAL);
  42}
  43
  44int check_ref_type(const struct ref *ref, int flags)
  45{
  46        return check_ref(ref->name, strlen(ref->name), flags);
  47}
  48
  49static void die_initial_contact(int got_at_least_one_head)
  50{
  51        if (got_at_least_one_head)
  52                die("The remote end hung up upon initial contact");
  53        else
  54                die("Could not read from remote repository.\n\n"
  55                    "Please make sure you have the correct access rights\n"
  56                    "and the repository exists.");
  57}
  58
  59static void parse_one_symref_info(struct string_list *symref, const char *val, int len)
  60{
  61        char *sym, *target;
  62        struct string_list_item *item;
  63
  64        if (!len)
  65                return; /* just "symref" */
  66        /* e.g. "symref=HEAD:refs/heads/master" */
  67        sym = xmalloc(len + 1);
  68        memcpy(sym, val, len);
  69        sym[len] = '\0';
  70        target = strchr(sym, ':');
  71        if (!target)
  72                /* just "symref=something" */
  73                goto reject;
  74        *(target++) = '\0';
  75        if (check_refname_format(sym, REFNAME_ALLOW_ONELEVEL) ||
  76            check_refname_format(target, REFNAME_ALLOW_ONELEVEL))
  77                /* "symref=bogus:pair */
  78                goto reject;
  79        item = string_list_append(symref, sym);
  80        item->util = target;
  81        return;
  82reject:
  83        free(sym);
  84        return;
  85}
  86
  87static void annotate_refs_with_symref_info(struct ref *ref)
  88{
  89        struct string_list symref = STRING_LIST_INIT_DUP;
  90        const char *feature_list = server_capabilities;
  91
  92        while (feature_list) {
  93                int len;
  94                const char *val;
  95
  96                val = parse_feature_value(feature_list, "symref", &len);
  97                if (!val)
  98                        break;
  99                parse_one_symref_info(&symref, val, len);
 100                feature_list = val + 1;
 101        }
 102        sort_string_list(&symref);
 103
 104        for (; ref; ref = ref->next) {
 105                struct string_list_item *item;
 106                item = string_list_lookup(&symref, ref->name);
 107                if (!item)
 108                        continue;
 109                ref->symref = xstrdup((char *)item->util);
 110        }
 111        string_list_clear(&symref, 0);
 112}
 113
 114/*
 115 * Read all the refs from the other end
 116 */
 117struct ref **get_remote_heads(int in, char *src_buf, size_t src_len,
 118                              struct ref **list, unsigned int flags,
 119                              struct sha1_array *extra_have,
 120                              struct sha1_array *shallow_points)
 121{
 122        struct ref **orig_list = list;
 123        int got_at_least_one_head = 0;
 124
 125        *list = NULL;
 126        for (;;) {
 127                struct ref *ref;
 128                unsigned char old_sha1[20];
 129                char *name;
 130                int len, name_len;
 131                char *buffer = packet_buffer;
 132
 133                len = packet_read(in, &src_buf, &src_len,
 134                                  packet_buffer, sizeof(packet_buffer),
 135                                  PACKET_READ_GENTLE_ON_EOF |
 136                                  PACKET_READ_CHOMP_NEWLINE);
 137                if (len < 0)
 138                        die_initial_contact(got_at_least_one_head);
 139
 140                if (!len)
 141                        break;
 142
 143                if (len > 4 && !prefixcmp(buffer, "ERR "))
 144                        die("remote error: %s", buffer + 4);
 145
 146                if (len == 48 && !prefixcmp(buffer, "shallow ")) {
 147                        if (get_sha1_hex(buffer + 8, old_sha1))
 148                                die("protocol error: expected shallow sha-1, got '%s'", buffer + 8);
 149                        if (!shallow_points)
 150                                die("repository on the other end cannot be shallow");
 151                        sha1_array_append(shallow_points, old_sha1);
 152                        continue;
 153                }
 154
 155                if (len < 42 || get_sha1_hex(buffer, old_sha1) || buffer[40] != ' ')
 156                        die("protocol error: expected sha/ref, got '%s'", buffer);
 157                name = buffer + 41;
 158
 159                name_len = strlen(name);
 160                if (len != name_len + 41) {
 161                        free(server_capabilities);
 162                        server_capabilities = xstrdup(name + name_len + 1);
 163                }
 164
 165                if (extra_have &&
 166                    name_len == 5 && !memcmp(".have", name, 5)) {
 167                        sha1_array_append(extra_have, old_sha1);
 168                        continue;
 169                }
 170
 171                if (!check_ref(name, name_len, flags))
 172                        continue;
 173                ref = alloc_ref(buffer + 41);
 174                hashcpy(ref->old_sha1, old_sha1);
 175                *list = ref;
 176                list = &ref->next;
 177                got_at_least_one_head = 1;
 178        }
 179
 180        annotate_refs_with_symref_info(*orig_list);
 181
 182        return list;
 183}
 184
 185static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp)
 186{
 187        int len;
 188
 189        if (!feature_list)
 190                return NULL;
 191
 192        len = strlen(feature);
 193        while (*feature_list) {
 194                const char *found = strstr(feature_list, feature);
 195                if (!found)
 196                        return NULL;
 197                if (feature_list == found || isspace(found[-1])) {
 198                        const char *value = found + len;
 199                        /* feature with no value (e.g., "thin-pack") */
 200                        if (!*value || isspace(*value)) {
 201                                if (lenp)
 202                                        *lenp = 0;
 203                                return value;
 204                        }
 205                        /* feature with a value (e.g., "agent=git/1.2.3") */
 206                        else if (*value == '=') {
 207                                value++;
 208                                if (lenp)
 209                                        *lenp = strcspn(value, " \t\n");
 210                                return value;
 211                        }
 212                        /*
 213                         * otherwise we matched a substring of another feature;
 214                         * keep looking
 215                         */
 216                }
 217                feature_list = found + 1;
 218        }
 219        return NULL;
 220}
 221
 222int parse_feature_request(const char *feature_list, const char *feature)
 223{
 224        return !!parse_feature_value(feature_list, feature, NULL);
 225}
 226
 227const char *server_feature_value(const char *feature, int *len)
 228{
 229        return parse_feature_value(server_capabilities, feature, len);
 230}
 231
 232int server_supports(const char *feature)
 233{
 234        return !!server_feature_value(feature, NULL);
 235}
 236
 237enum protocol {
 238        PROTO_LOCAL = 1,
 239        PROTO_SSH,
 240        PROTO_GIT
 241};
 242
 243static enum protocol get_protocol(const char *name)
 244{
 245        if (!strcmp(name, "ssh"))
 246                return PROTO_SSH;
 247        if (!strcmp(name, "git"))
 248                return PROTO_GIT;
 249        if (!strcmp(name, "git+ssh"))
 250                return PROTO_SSH;
 251        if (!strcmp(name, "ssh+git"))
 252                return PROTO_SSH;
 253        if (!strcmp(name, "file"))
 254                return PROTO_LOCAL;
 255        die("I don't handle protocol '%s'", name);
 256}
 257
 258#define STR_(s) # s
 259#define STR(s)  STR_(s)
 260
 261static void get_host_and_port(char **host, const char **port)
 262{
 263        char *colon, *end;
 264
 265        if (*host[0] == '[') {
 266                end = strchr(*host + 1, ']');
 267                if (end) {
 268                        *end = 0;
 269                        end++;
 270                        (*host)++;
 271                } else
 272                        end = *host;
 273        } else
 274                end = *host;
 275        colon = strchr(end, ':');
 276
 277        if (colon) {
 278                *colon = 0;
 279                *port = colon + 1;
 280        }
 281}
 282
 283static void enable_keepalive(int sockfd)
 284{
 285        int ka = 1;
 286
 287        if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0)
 288                fprintf(stderr, "unable to set SO_KEEPALIVE on socket: %s\n",
 289                        strerror(errno));
 290}
 291
 292#ifndef NO_IPV6
 293
 294static const char *ai_name(const struct addrinfo *ai)
 295{
 296        static char addr[NI_MAXHOST];
 297        if (getnameinfo(ai->ai_addr, ai->ai_addrlen, addr, sizeof(addr), NULL, 0,
 298                        NI_NUMERICHOST) != 0)
 299                strcpy(addr, "(unknown)");
 300
 301        return addr;
 302}
 303
 304/*
 305 * Returns a connected socket() fd, or else die()s.
 306 */
 307static int git_tcp_connect_sock(char *host, int flags)
 308{
 309        struct strbuf error_message = STRBUF_INIT;
 310        int sockfd = -1;
 311        const char *port = STR(DEFAULT_GIT_PORT);
 312        struct addrinfo hints, *ai0, *ai;
 313        int gai;
 314        int cnt = 0;
 315
 316        get_host_and_port(&host, &port);
 317        if (!*port)
 318                port = "<none>";
 319
 320        memset(&hints, 0, sizeof(hints));
 321        hints.ai_socktype = SOCK_STREAM;
 322        hints.ai_protocol = IPPROTO_TCP;
 323
 324        if (flags & CONNECT_VERBOSE)
 325                fprintf(stderr, "Looking up %s ... ", host);
 326
 327        gai = getaddrinfo(host, port, &hints, &ai);
 328        if (gai)
 329                die("Unable to look up %s (port %s) (%s)", host, port, gai_strerror(gai));
 330
 331        if (flags & CONNECT_VERBOSE)
 332                fprintf(stderr, "done.\nConnecting to %s (port %s) ... ", host, port);
 333
 334        for (ai0 = ai; ai; ai = ai->ai_next, cnt++) {
 335                sockfd = socket(ai->ai_family,
 336                                ai->ai_socktype, ai->ai_protocol);
 337                if ((sockfd < 0) ||
 338                    (connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0)) {
 339                        strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
 340                                    host, cnt, ai_name(ai), strerror(errno));
 341                        if (0 <= sockfd)
 342                                close(sockfd);
 343                        sockfd = -1;
 344                        continue;
 345                }
 346                if (flags & CONNECT_VERBOSE)
 347                        fprintf(stderr, "%s ", ai_name(ai));
 348                break;
 349        }
 350
 351        freeaddrinfo(ai0);
 352
 353        if (sockfd < 0)
 354                die("unable to connect to %s:\n%s", host, error_message.buf);
 355
 356        enable_keepalive(sockfd);
 357
 358        if (flags & CONNECT_VERBOSE)
 359                fprintf(stderr, "done.\n");
 360
 361        strbuf_release(&error_message);
 362
 363        return sockfd;
 364}
 365
 366#else /* NO_IPV6 */
 367
 368/*
 369 * Returns a connected socket() fd, or else die()s.
 370 */
 371static int git_tcp_connect_sock(char *host, int flags)
 372{
 373        struct strbuf error_message = STRBUF_INIT;
 374        int sockfd = -1;
 375        const char *port = STR(DEFAULT_GIT_PORT);
 376        char *ep;
 377        struct hostent *he;
 378        struct sockaddr_in sa;
 379        char **ap;
 380        unsigned int nport;
 381        int cnt;
 382
 383        get_host_and_port(&host, &port);
 384
 385        if (flags & CONNECT_VERBOSE)
 386                fprintf(stderr, "Looking up %s ... ", host);
 387
 388        he = gethostbyname(host);
 389        if (!he)
 390                die("Unable to look up %s (%s)", host, hstrerror(h_errno));
 391        nport = strtoul(port, &ep, 10);
 392        if ( ep == port || *ep ) {
 393                /* Not numeric */
 394                struct servent *se = getservbyname(port,"tcp");
 395                if ( !se )
 396                        die("Unknown port %s", port);
 397                nport = se->s_port;
 398        }
 399
 400        if (flags & CONNECT_VERBOSE)
 401                fprintf(stderr, "done.\nConnecting to %s (port %s) ... ", host, port);
 402
 403        for (cnt = 0, ap = he->h_addr_list; *ap; ap++, cnt++) {
 404                memset(&sa, 0, sizeof sa);
 405                sa.sin_family = he->h_addrtype;
 406                sa.sin_port = htons(nport);
 407                memcpy(&sa.sin_addr, *ap, he->h_length);
 408
 409                sockfd = socket(he->h_addrtype, SOCK_STREAM, 0);
 410                if ((sockfd < 0) ||
 411                    connect(sockfd, (struct sockaddr *)&sa, sizeof sa) < 0) {
 412                        strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
 413                                host,
 414                                cnt,
 415                                inet_ntoa(*(struct in_addr *)&sa.sin_addr),
 416                                strerror(errno));
 417                        if (0 <= sockfd)
 418                                close(sockfd);
 419                        sockfd = -1;
 420                        continue;
 421                }
 422                if (flags & CONNECT_VERBOSE)
 423                        fprintf(stderr, "%s ",
 424                                inet_ntoa(*(struct in_addr *)&sa.sin_addr));
 425                break;
 426        }
 427
 428        if (sockfd < 0)
 429                die("unable to connect to %s:\n%s", host, error_message.buf);
 430
 431        enable_keepalive(sockfd);
 432
 433        if (flags & CONNECT_VERBOSE)
 434                fprintf(stderr, "done.\n");
 435
 436        return sockfd;
 437}
 438
 439#endif /* NO_IPV6 */
 440
 441
 442static void git_tcp_connect(int fd[2], char *host, int flags)
 443{
 444        int sockfd = git_tcp_connect_sock(host, flags);
 445
 446        fd[0] = sockfd;
 447        fd[1] = dup(sockfd);
 448}
 449
 450
 451static char *git_proxy_command;
 452
 453static int git_proxy_command_options(const char *var, const char *value,
 454                void *cb)
 455{
 456        if (!strcmp(var, "core.gitproxy")) {
 457                const char *for_pos;
 458                int matchlen = -1;
 459                int hostlen;
 460                const char *rhost_name = cb;
 461                int rhost_len = strlen(rhost_name);
 462
 463                if (git_proxy_command)
 464                        return 0;
 465                if (!value)
 466                        return config_error_nonbool(var);
 467                /* [core]
 468                 * ;# matches www.kernel.org as well
 469                 * gitproxy = netcatter-1 for kernel.org
 470                 * gitproxy = netcatter-2 for sample.xz
 471                 * gitproxy = netcatter-default
 472                 */
 473                for_pos = strstr(value, " for ");
 474                if (!for_pos)
 475                        /* matches everybody */
 476                        matchlen = strlen(value);
 477                else {
 478                        hostlen = strlen(for_pos + 5);
 479                        if (rhost_len < hostlen)
 480                                matchlen = -1;
 481                        else if (!strncmp(for_pos + 5,
 482                                          rhost_name + rhost_len - hostlen,
 483                                          hostlen) &&
 484                                 ((rhost_len == hostlen) ||
 485                                  rhost_name[rhost_len - hostlen -1] == '.'))
 486                                matchlen = for_pos - value;
 487                        else
 488                                matchlen = -1;
 489                }
 490                if (0 <= matchlen) {
 491                        /* core.gitproxy = none for kernel.org */
 492                        if (matchlen == 4 &&
 493                            !memcmp(value, "none", 4))
 494                                matchlen = 0;
 495                        git_proxy_command = xmemdupz(value, matchlen);
 496                }
 497                return 0;
 498        }
 499
 500        return git_default_config(var, value, cb);
 501}
 502
 503static int git_use_proxy(const char *host)
 504{
 505        git_proxy_command = getenv("GIT_PROXY_COMMAND");
 506        git_config(git_proxy_command_options, (void*)host);
 507        return (git_proxy_command && *git_proxy_command);
 508}
 509
 510static struct child_process *git_proxy_connect(int fd[2], char *host)
 511{
 512        const char *port = STR(DEFAULT_GIT_PORT);
 513        const char **argv;
 514        struct child_process *proxy;
 515
 516        get_host_and_port(&host, &port);
 517
 518        argv = xmalloc(sizeof(*argv) * 4);
 519        argv[0] = git_proxy_command;
 520        argv[1] = host;
 521        argv[2] = port;
 522        argv[3] = NULL;
 523        proxy = xcalloc(1, sizeof(*proxy));
 524        proxy->argv = argv;
 525        proxy->in = -1;
 526        proxy->out = -1;
 527        if (start_command(proxy))
 528                die("cannot start proxy %s", argv[0]);
 529        fd[0] = proxy->out; /* read from proxy stdout */
 530        fd[1] = proxy->in;  /* write to proxy stdin */
 531        return proxy;
 532}
 533
 534#define MAX_CMD_LEN 1024
 535
 536static char *get_port(char *host)
 537{
 538        char *end;
 539        char *p = strchr(host, ':');
 540
 541        if (p) {
 542                long port = strtol(p + 1, &end, 10);
 543                if (end != p + 1 && *end == '\0' && 0 <= port && port < 65536) {
 544                        *p = '\0';
 545                        return p+1;
 546                }
 547        }
 548
 549        return NULL;
 550}
 551
 552static struct child_process no_fork;
 553
 554/*
 555 * This returns a dummy child_process if the transport protocol does not
 556 * need fork(2), or a struct child_process object if it does.  Once done,
 557 * finish the connection with finish_connect() with the value returned from
 558 * this function (it is safe to call finish_connect() with NULL to support
 559 * the former case).
 560 *
 561 * If it returns, the connect is successful; it just dies on errors (this
 562 * will hopefully be changed in a libification effort, to return NULL when
 563 * the connection failed).
 564 */
 565struct child_process *git_connect(int fd[2], const char *url_orig,
 566                                  const char *prog, int flags)
 567{
 568        char *url;
 569        char *host, *path;
 570        char *end;
 571        int c;
 572        struct child_process *conn = &no_fork;
 573        enum protocol protocol = PROTO_LOCAL;
 574        int free_path = 0;
 575        char *port = NULL;
 576        const char **arg;
 577        struct strbuf cmd;
 578
 579        /* Without this we cannot rely on waitpid() to tell
 580         * what happened to our children.
 581         */
 582        signal(SIGCHLD, SIG_DFL);
 583
 584        if (is_url(url_orig))
 585                url = url_decode(url_orig);
 586        else
 587                url = xstrdup(url_orig);
 588
 589        host = strstr(url, "://");
 590        if (host) {
 591                *host = '\0';
 592                protocol = get_protocol(url);
 593                host += 3;
 594                c = '/';
 595        } else {
 596                host = url;
 597                c = ':';
 598        }
 599
 600        /*
 601         * Don't do destructive transforms with git:// as that
 602         * protocol code does '[]' unwrapping of its own.
 603         */
 604        if (host[0] == '[') {
 605                end = strchr(host + 1, ']');
 606                if (end) {
 607                        if (protocol != PROTO_GIT) {
 608                                *end = 0;
 609                                host++;
 610                        }
 611                        end++;
 612                } else
 613                        end = host;
 614        } else
 615                end = host;
 616
 617        path = strchr(end, c);
 618        if (path && !has_dos_drive_prefix(end)) {
 619                if (c == ':') {
 620                        if (host != url || path < strchrnul(host, '/')) {
 621                                protocol = PROTO_SSH;
 622                                *path++ = '\0';
 623                        } else /* '/' in the host part, assume local path */
 624                                path = end;
 625                }
 626        } else
 627                path = end;
 628
 629        if (!path || !*path)
 630                die("No path specified. See 'man git-pull' for valid url syntax");
 631
 632        /*
 633         * null-terminate hostname and point path to ~ for URL's like this:
 634         *    ssh://host.xz/~user/repo
 635         */
 636        if (protocol != PROTO_LOCAL && host != url) {
 637                char *ptr = path;
 638                if (path[1] == '~')
 639                        path++;
 640                else {
 641                        path = xstrdup(ptr);
 642                        free_path = 1;
 643                }
 644
 645                *ptr = '\0';
 646        }
 647
 648        /*
 649         * Add support for ssh port: ssh://host.xy:<port>/...
 650         */
 651        if (protocol == PROTO_SSH && host != url)
 652                port = get_port(end);
 653
 654        if (protocol == PROTO_GIT) {
 655                /* These underlying connection commands die() if they
 656                 * cannot connect.
 657                 */
 658                char *target_host = xstrdup(host);
 659                if (git_use_proxy(host))
 660                        conn = git_proxy_connect(fd, host);
 661                else
 662                        git_tcp_connect(fd, host, flags);
 663                /*
 664                 * Separate original protocol components prog and path
 665                 * from extended host header with a NUL byte.
 666                 *
 667                 * Note: Do not add any other headers here!  Doing so
 668                 * will cause older git-daemon servers to crash.
 669                 */
 670                packet_write(fd[1],
 671                             "%s %s%chost=%s%c",
 672                             prog, path, 0,
 673                             target_host, 0);
 674                free(target_host);
 675                free(url);
 676                if (free_path)
 677                        free(path);
 678                return conn;
 679        }
 680
 681        conn = xcalloc(1, sizeof(*conn));
 682
 683        strbuf_init(&cmd, MAX_CMD_LEN);
 684        strbuf_addstr(&cmd, prog);
 685        strbuf_addch(&cmd, ' ');
 686        sq_quote_buf(&cmd, path);
 687        if (cmd.len >= MAX_CMD_LEN)
 688                die("command line too long");
 689
 690        conn->in = conn->out = -1;
 691        conn->argv = arg = xcalloc(7, sizeof(*arg));
 692        if (protocol == PROTO_SSH) {
 693                const char *ssh = getenv("GIT_SSH");
 694                int putty = ssh && strcasestr(ssh, "plink");
 695                if (!ssh) ssh = "ssh";
 696
 697                *arg++ = ssh;
 698                if (putty && !strcasestr(ssh, "tortoiseplink"))
 699                        *arg++ = "-batch";
 700                if (port) {
 701                        /* P is for PuTTY, p is for OpenSSH */
 702                        *arg++ = putty ? "-P" : "-p";
 703                        *arg++ = port;
 704                }
 705                *arg++ = host;
 706        }
 707        else {
 708                /* remove repo-local variables from the environment */
 709                conn->env = local_repo_env;
 710                conn->use_shell = 1;
 711        }
 712        *arg++ = cmd.buf;
 713        *arg = NULL;
 714
 715        if (start_command(conn))
 716                die("unable to fork");
 717
 718        fd[0] = conn->out; /* read from child's stdout */
 719        fd[1] = conn->in;  /* write to child's stdin */
 720        strbuf_release(&cmd);
 721        free(url);
 722        if (free_path)
 723                free(path);
 724        return conn;
 725}
 726
 727int git_connection_is_socket(struct child_process *conn)
 728{
 729        return conn == &no_fork;
 730}
 731
 732int finish_connect(struct child_process *conn)
 733{
 734        int code;
 735        if (!conn || git_connection_is_socket(conn))
 736                return 0;
 737
 738        code = finish_command(conn);
 739        free(conn->argv);
 740        free(conn);
 741        return code;
 742}