fetch: Speed up fetch of large numbers of refs
[gitweb.git] / builtin-fetch.c
index 7568163af24df630c215e05b6082ed764150a315..5c7465cfeb5c1bb0649fed6475618ba975955e99 100644 (file)
@@ -10,6 +10,7 @@
 #include "transport.h"
 #include "run-command.h"
 #include "parse-options.h"
+#include "sigchain.h"
 
 static const char * const builtin_fetch_usage[] = {
        "git fetch [options] [<repository> <refspec>...]",
@@ -58,7 +59,7 @@ static void unlock_pack(void)
 static void unlock_pack_on_signal(int signo)
 {
        unlock_pack();
-       signal(SIGINT, SIG_DFL);
+       sigchain_pop(signo);
        raise(signo);
 }
 
@@ -166,6 +167,9 @@ static struct ref *get_ref_map(struct transport *transport,
        return ref_map;
 }
 
+#define STORE_REF_ERROR_OTHER 1
+#define STORE_REF_ERROR_DF_CONFLICT 2
+
 static int s_update_ref(const char *action,
                        struct ref *ref,
                        int check_old)
@@ -180,9 +184,11 @@ static int s_update_ref(const char *action,
        lock = lock_any_ref_for_update(ref->name,
                                       check_old ? ref->old_sha1 : NULL, 0);
        if (!lock)
-               return 2;
+               return errno == ENOTDIR ? STORE_REF_ERROR_DF_CONFLICT :
+                                         STORE_REF_ERROR_OTHER;
        if (write_ref_sha1(lock, ref->new_sha1, msg) < 0)
-               return 2;
+               return errno == ENOTDIR ? STORE_REF_ERROR_DF_CONFLICT :
+                                         STORE_REF_ERROR_OTHER;
        return 0;
 }
 
@@ -196,11 +202,7 @@ static int update_local_ref(struct ref *ref,
        struct commit *current = NULL, *updated;
        enum object_type type;
        struct branch *current_branch = branch_get(NULL);
-       const char *pretty_ref = ref->name + (
-               !prefixcmp(ref->name, "refs/heads/") ? 11 :
-               !prefixcmp(ref->name, "refs/tags/") ? 10 :
-               !prefixcmp(ref->name, "refs/remotes/") ? 13 :
-               0);
+       const char *pretty_ref = prettify_refname(ref->name);
 
        *display = 0;
        type = sha1_object_info(ref->new_sha1, NULL);
@@ -292,7 +294,7 @@ static int update_local_ref(struct ref *ref,
        }
 }
 
-static int store_updated_refs(const char *url, const char *remote_name,
+static int store_updated_refs(const char *raw_url, const char *remote_name,
                struct ref *ref_map)
 {
        FILE *fp;
@@ -301,11 +303,13 @@ static int store_updated_refs(const char *url, const char *remote_name,
        char note[1024];
        const char *what, *kind;
        struct ref *rm;
-       char *filename = git_path("FETCH_HEAD");
+       char *url, *filename = git_path("FETCH_HEAD");
 
        fp = fopen(filename, "a");
        if (!fp)
                return error("cannot open %s: %s\n", filename, strerror(errno));
+
+       url = transport_anonymize_url(raw_url);
        for (rm = ref_map; rm; rm = rm->next) {
                struct ref *ref = NULL;
 
@@ -356,12 +360,18 @@ static int store_updated_refs(const char *url, const char *remote_name,
                                                    kind);
                        note_len += sprintf(note + note_len, "'%s' of ", what);
                }
-               note_len += sprintf(note + note_len, "%.*s", url_len, url);
-               fprintf(fp, "%s\t%s\t%s\n",
+               note[note_len] = '\0';
+               fprintf(fp, "%s\t%s\t%s",
                        sha1_to_hex(commit ? commit->object.sha1 :
                                    rm->old_sha1),
                        rm->merge ? "" : "not-for-merge",
                        note);
+               for (i = 0; i < url_len; ++i)
+                       if ('\n' == url[i])
+                               fputs("\\n", fp);
+                       else
+                               fputc(url[i], fp);
+               fputc('\n', fp);
 
                if (ref)
                        rc |= update_local_ref(ref, what, note);
@@ -379,8 +389,9 @@ static int store_updated_refs(const char *url, const char *remote_name,
                                fprintf(stderr, " %s\n", note);
                }
        }
+       free(url);
        fclose(fp);
-       if (rc & 2)
+       if (rc & STORE_REF_ERROR_DF_CONFLICT)
                error("some local refs could not be updated; try running\n"
                      " 'git remote prune %s' to remove any old, conflicting "
                      "branches", remote_name);
@@ -389,14 +400,14 @@ static int store_updated_refs(const char *url, const char *remote_name,
 
 /*
  * We would want to bypass the object transfer altogether if
- * everything we are going to fetch already exists and connected
+ * everything we are going to fetch already exists and is connected
  * locally.
  *
- * The refs we are going to fetch are in to_fetch (nr_heads in
- * total).  If running
+ * The refs we are going to fetch are in ref_map.  If running
  *
- *  $ git rev-list --objects to_fetch[0] to_fetch[1] ... --not --all
+ *  $ git rev-list --objects --stdin --not --all
  *
+ * (feeding all the refs in ref_map on its standard input)
  * does not error out, that means everything reachable from the
  * refs we are going to fetch exists and is connected to some of
  * our existing refs.
@@ -405,8 +416,9 @@ static int quickfetch(struct ref *ref_map)
 {
        struct child_process revlist;
        struct ref *ref;
-       char **argv;
-       int i, err;
+       int err;
+       const char *argv[] = {"rev-list",
+               "--quiet", "--objects", "--stdin", "--not", "--all", NULL};
 
        /*
         * If we are deepening a shallow clone we already have these
@@ -418,34 +430,46 @@ static int quickfetch(struct ref *ref_map)
        if (depth)
                return -1;
 
-       for (i = 0, ref = ref_map; ref; ref = ref->next)
-               i++;
-       if (!i)
+       if (!ref_map)
                return 0;
 
-       argv = xmalloc(sizeof(*argv) * (i + 6));
-       i = 0;
-       argv[i++] = xstrdup("rev-list");
-       argv[i++] = xstrdup("--quiet");
-       argv[i++] = xstrdup("--objects");
-       for (ref = ref_map; ref; ref = ref->next)
-               argv[i++] = xstrdup(sha1_to_hex(ref->old_sha1));
-       argv[i++] = xstrdup("--not");
-       argv[i++] = xstrdup("--all");
-       argv[i++] = NULL;
-
        memset(&revlist, 0, sizeof(revlist));
-       revlist.argv = (const char**)argv;
+       revlist.argv = argv;
        revlist.git_cmd = 1;
-       revlist.no_stdin = 1;
        revlist.no_stdout = 1;
        revlist.no_stderr = 1;
-       err = run_command(&revlist);
+       revlist.in = -1;
+
+       err = start_command(&revlist);
+       if (err) {
+               error("could not run rev-list");
+               return err;
+       }
+
+       /*
+        * If rev-list --stdin encounters an unknown commit, it terminates,
+        * which will cause SIGPIPE in the write loop below.
+        */
+       sigchain_push(SIGPIPE, SIG_IGN);
+
+       for (ref = ref_map; ref; ref = ref->next) {
+               if (write_in_full(revlist.in, sha1_to_hex(ref->old_sha1), 40) < 0 ||
+                   write_str_in_full(revlist.in, "\n") < 0) {
+                       if (errno != EPIPE && errno != EINVAL)
+                               error("failed write to rev-list: %s", strerror(errno));
+                       err = -1;
+                       break;
+               }
+       }
+
+       if (close(revlist.in)) {
+               error("failed to close rev-list's stdin: %s", strerror(errno));
+               err = -1;
+       }
+
+       sigchain_pop(SIGPIPE);
 
-       for (i = 0; argv[i]; i++)
-               free(argv[i]);
-       free(argv);
-       return err;
+       return finish_command(&revlist) || err;
 }
 
 static int fetch_refs(struct transport *transport, struct ref *ref_map)
@@ -465,7 +489,8 @@ static int add_existing(const char *refname, const unsigned char *sha1,
                        int flag, void *cbdata)
 {
        struct string_list *list = (struct string_list *)cbdata;
-       string_list_insert(refname, list);
+       struct string_list_item *item = string_list_insert(refname, list);
+       item->util = (void *)sha1;
        return 0;
 }
 
@@ -480,57 +505,98 @@ static int will_fetch(struct ref **head, const unsigned char *sha1)
        return 0;
 }
 
+struct tag_data {
+       struct ref **head;
+       struct ref ***tail;
+};
+
+static int add_to_tail(struct string_list_item *item, void *cb_data)
+{
+       struct tag_data *data = (struct tag_data *)cb_data;
+       struct ref *rm = NULL;
+
+       /* We have already decided to ignore this item */
+       if (!item->util)
+               return 0;
+
+       rm = alloc_ref(item->string);
+       rm->peer_ref = alloc_ref(item->string);
+       hashcpy(rm->old_sha1, item->util);
+
+       **data->tail = rm;
+       *data->tail = &rm->next;
+
+       return 0;
+}
+
 static void find_non_local_tags(struct transport *transport,
                        struct ref **head,
                        struct ref ***tail)
 {
        struct string_list existing_refs = { NULL, 0, 0, 0 };
-       struct string_list new_refs = { NULL, 0, 0, 1 };
-       char *ref_name;
-       int ref_name_len;
-       const unsigned char *ref_sha1;
-       const struct ref *tag_ref;
-       struct ref *rm = NULL;
+       struct string_list remote_refs = { NULL, 0, 0, 0 };
+       struct tag_data data = {head, tail};
        const struct ref *ref;
+       struct string_list_item *item = NULL;
 
        for_each_ref(add_existing, &existing_refs);
        for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
                if (prefixcmp(ref->name, "refs/tags"))
                        continue;
 
-               ref_name = xstrdup(ref->name);
-               ref_name_len = strlen(ref_name);
-               ref_sha1 = ref->old_sha1;
-
-               if (!strcmp(ref_name + ref_name_len - 3, "^{}")) {
-                       ref_name[ref_name_len - 3] = 0;
-                       tag_ref = transport_get_remote_refs(transport);
-                       while (tag_ref) {
-                               if (!strcmp(tag_ref->name, ref_name)) {
-                                       ref_sha1 = tag_ref->old_sha1;
-                                       break;
-                               }
-                               tag_ref = tag_ref->next;
-                       }
+               /*
+                * The peeled ref always follows the matching base
+                * ref, so if we see a peeled ref that we don't want
+                * to fetch then we can mark the ref entry in the list
+                * as one to ignore by setting util to NULL.
+                */
+               if (!strcmp(ref->name + strlen(ref->name) - 3, "^{}")) {
+                       if (item && !has_sha1_file(ref->old_sha1) &&
+                           !will_fetch(head, ref->old_sha1) &&
+                           !has_sha1_file(item->util) &&
+                           !will_fetch(head, item->util))
+                               item->util = NULL;
+                       item = NULL;
+                       continue;
                }
 
-               if (!string_list_has_string(&existing_refs, ref_name) &&
-                   !string_list_has_string(&new_refs, ref_name) &&
-                   (has_sha1_file(ref->old_sha1) ||
-                    will_fetch(head, ref->old_sha1))) {
-                       string_list_insert(ref_name, &new_refs);
+               /*
+                * If item is non-NULL here, then we previously saw a
+                * ref not followed by a peeled reference, so we need
+                * to check if it is a lightweight tag that we want to
+                * fetch.
+                */
+               if (item && !has_sha1_file(item->util) &&
+                   !will_fetch(head, item->util))
+                       item->util = NULL;
 
-                       rm = alloc_ref(ref_name);
-                       rm->peer_ref = alloc_ref(ref_name);
-                       hashcpy(rm->old_sha1, ref_sha1);
+               item = NULL;
 
-                       **tail = rm;
-                       *tail = &rm->next;
-               }
-               free(ref_name);
+               /* skip duplicates and refs that we already have */
+               if (string_list_has_string(&remote_refs, ref->name) ||
+                   string_list_has_string(&existing_refs, ref->name))
+                       continue;
+
+               item = string_list_insert(ref->name, &remote_refs);
+               item->util = (void *)ref->old_sha1;
        }
        string_list_clear(&existing_refs, 0);
-       string_list_clear(&new_refs, 0);
+
+       /*
+        * We may have a final lightweight tag that needs to be
+        * checked to see if it needs fetching.
+        */
+       if (item && !has_sha1_file(item->util) &&
+           !will_fetch(head, item->util))
+               item->util = NULL;
+
+       /*
+        * For all the tags in the remote_refs string list, call
+        * add_to_tail to add them to the list of refs to be fetched
+        */
+       for_each_string_list(add_to_tail, &remote_refs, &data);
+
+       string_list_clear(&remote_refs, 0);
 }
 
 static void check_not_current_branch(struct ref *ref_map)
@@ -543,15 +609,21 @@ static void check_not_current_branch(struct ref *ref_map)
        for (; ref_map; ref_map = ref_map->next)
                if (ref_map->peer_ref && !strcmp(current_branch->refname,
                                        ref_map->peer_ref->name))
-                       die("Refusing to fetch into current branch");
+                       die("Refusing to fetch into current branch %s "
+                           "of non-bare repository", current_branch->refname);
 }
 
 static int do_fetch(struct transport *transport,
                    struct refspec *refs, int ref_count)
 {
+       struct string_list existing_refs = { NULL, 0, 0, 0 };
+       struct string_list_item *peer_item = NULL;
        struct ref *ref_map;
        struct ref *rm;
        int autotags = (transport->remote->fetch_tags == 1);
+
+       for_each_ref(add_existing, &existing_refs);
+
        if (transport->remote->fetch_tags == 2 && tags != TAGS_UNSET)
                tags = TAGS_SET;
        if (transport->remote->fetch_tags == -1)
@@ -574,8 +646,13 @@ static int do_fetch(struct transport *transport,
                check_not_current_branch(ref_map);
 
        for (rm = ref_map; rm; rm = rm->next) {
-               if (rm->peer_ref)
-                       read_ref(rm->peer_ref->name, rm->peer_ref->old_sha1);
+               if (rm->peer_ref) {
+                       peer_item = string_list_lookup(rm->peer_ref->name,
+                                                      &existing_refs);
+                       if (peer_item)
+                               hashcpy(rm->peer_ref->old_sha1,
+                                       peer_item->util);
+               }
        }
 
        if (tags == TAGS_DEFAULT && autotags)
@@ -607,7 +684,7 @@ static void set_option(const char *name, const char *value)
 {
        int r = transport_set_option(transport, name, value);
        if (r < 0)
-               die("Option \"%s\" value \"%s\" is not valid for %s\n",
+               die("Option \"%s\" value \"%s\" is not valid for %s",
                        name, value, transport->url);
        if (r > 0)
                warning("Option \"%s\" is ignored for %s\n",
@@ -627,7 +704,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
        for (i = 1; i < argc; i++)
                strbuf_addf(&default_rla, " %s", argv[i]);
 
-       argc = parse_options(argc, argv,
+       argc = parse_options(argc, argv, prefix,
                             builtin_fetch_options, builtin_fetch_usage, 0);
 
        if (argc == 0)
@@ -635,6 +712,9 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
        else
                remote = remote_get(argv[0]);
 
+       if (!remote)
+               die("Where do you want to fetch from today?");
+
        transport = transport_get(remote, remote->url[0]);
        if (verbosity >= 2)
                transport->verbose = 1;
@@ -647,9 +727,6 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
        if (depth)
                set_option(TRANS_OPT_DEPTH, depth);
 
-       if (!transport->url)
-               die("Where do you want to fetch from today?");
-
        if (argc > 1) {
                int j = 0;
                refs = xcalloc(argc + 1, sizeof(const char *));
@@ -672,7 +749,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
                ref_nr = j;
        }
 
-       signal(SIGINT, unlock_pack_on_signal);
+       sigchain_push_common(unlock_pack_on_signal);
        atexit(unlock_pack);
        exit_code = do_fetch(transport,
                        parse_fetch_refspec(ref_nr, refs), ref_nr);