clone: open a shortcut for connectivity check
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sun, 26 May 2013 01:16:17 +0000 (08:16 +0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 28 May 2013 15:07:20 +0000 (08:07 -0700)
In order to make sure the cloned repository is good, we run "rev-list
--objects --not --all $new_refs" on the repository. This is expensive
on large repositories. This patch attempts to mitigate the impact in
this special case.

In the "good" clone case, we only have one pack. If all of the
following are met, we can be sure that all objects reachable from the
new refs exist, which is the intention of running "rev-list ...":

- all refs point to an object in the pack
- there are no dangling pointers in any object in the pack
- no objects in the pack point to objects outside the pack

The second and third checks can be done with the help of index-pack as
a slight variation of --strict check (which introduces a new condition
for the shortcut: pack transfer must be used and the number of objects
large enough to call index-pack). The first is checked in
check_everything_connected after we get an "ok" from index-pack.

"index-pack + new checks" is still faster than the current "index-pack
+ rev-list", which is the whole point of this patch. If any of the
conditions fail, we fall back to the good old but expensive "rev-list
..". In that case it's even more expensive because we have to pay for
the new checks in index-pack. But that should only happen when the
other side is either buggy or malicious.

Cloning linux-2.6 over file://

before after
real 3m25.693s 2m53.050s
user 5m2.037s 4m42.396s
sys 0m13.750s 0m16.574s

A more realistic test with ssh:// over wireless

before after
real 11m26.629s 10m4.213s
user 5m43.196s 5m19.444s
sys 0m35.812s 0m37.630s

This shortcut is not applied to shallow clones, partly because shallow
clones should have no more objects than a usual fetch and the cost of
rev-list is acceptable, partly to avoid dealing with corner cases when
grafting is involved.

This shortcut does not apply to unpack-objects code path either
because the number of objects must be small in order to trigger that
code path.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-index-pack.txt
builtin/clone.c
builtin/index-pack.c
connected.c
connected.h
fetch-pack.c
fetch-pack.h
transport.c
transport.h
index bde8eec30db19accd80d960518b45fbc29f37bf8..7a4e0555205be5d8bb26e5a183f1552cc057637a 100644 (file)
@@ -74,6 +74,9 @@ OPTIONS
 --strict::
        Die, if the pack contains broken objects or links.
 
+--check-self-contained-and-connected::
+       Die if the pack contains broken links. For internal use only.
+
 --threads=<n>::
        Specifies the number of threads to spawn when resolving
        deltas. This requires that index-pack be compiled with
index dad426598962b2f1a00158ac7eeb05b7958d68ed..069e81e26d1393f6ca581503410814476b115387 100644 (file)
@@ -542,13 +542,15 @@ static void update_remote_refs(const struct ref *refs,
                               const struct ref *mapped_refs,
                               const struct ref *remote_head_points_at,
                               const char *branch_top,
-                              const char *msg)
+                              const char *msg,
+                              struct transport *transport)
 {
        const struct ref *rm = mapped_refs;
 
        if (0 <= option_verbosity)
                printf(_("Checking connectivity... "));
-       if (check_everything_connected(iterate_ref_map, 0, &rm))
+       if (check_everything_connected_with_transport(iterate_ref_map,
+                                                     0, &rm, transport))
                die(_("remote did not send all necessary objects"));
        if (0 <= option_verbosity)
                printf(_("done\n"));
@@ -893,6 +895,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
                if (option_upload_pack)
                        transport_set_option(transport, TRANS_OPT_UPLOADPACK,
                                             option_upload_pack);
+
+               if (transport->smart_options && !option_depth)
+                       transport->smart_options->check_self_contained_and_connected = 1;
        }
 
        refs = transport_get_remote_refs(transport);
@@ -954,7 +959,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
                transport_fetch_refs(transport, mapped_refs);
 
        update_remote_refs(refs, mapped_refs, remote_head_points_at,
-                          branch_top.buf, reflog_msg.buf);
+                          branch_top.buf, reflog_msg.buf, transport);
 
        update_head(our_head_points_at, remote_head, reflog_msg.buf);
 
index f52a04f64532558e166a7fa0372996ec7f990792..9c1cfac4427ef8d8fbca0834e1c8b644ceddf998 100644 (file)
@@ -77,8 +77,10 @@ static int nr_threads;
 
 static int from_stdin;
 static int strict;
+static int do_fsck_object;
 static int verbose;
 static int show_stat;
+static int check_self_contained_and_connected;
 
 static struct progress *progress;
 
@@ -187,13 +189,13 @@ static int mark_link(struct object *obj, int type, void *data)
 
 /* The content of each linked object must have been checked
    or it must be already present in the object database */
-static void check_object(struct object *obj)
+static unsigned check_object(struct object *obj)
 {
        if (!obj)
-               return;
+               return 0;
 
        if (!(obj->flags & FLAG_LINK))
-               return;
+               return 0;
 
        if (!(obj->flags & FLAG_CHECKED)) {
                unsigned long size;
@@ -201,17 +203,20 @@ static void check_object(struct object *obj)
                if (type != obj->type || type <= 0)
                        die(_("object of unexpected type"));
                obj->flags |= FLAG_CHECKED;
-               return;
+               return 1;
        }
+
+       return 0;
 }
 
-static void check_objects(void)
+static unsigned check_objects(void)
 {
-       unsigned i, max;
+       unsigned i, max, foreign_nr = 0;
 
        max = get_max_object_index();
        for (i = 0; i < max; i++)
-               check_object(get_indexed_object(i));
+               foreign_nr += check_object(get_indexed_object(i));
+       return foreign_nr;
 }
 
 
@@ -756,7 +761,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
                        obj = parse_object_buffer(sha1, type, size, buf, &eaten);
                        if (!obj)
                                die(_("invalid %s"), typename(type));
-                       if (fsck_object(obj, 1, fsck_error_function))
+                       if (do_fsck_object &&
+                           fsck_object(obj, 1, fsck_error_function))
                                die(_("Error in object"));
                        if (fsck_walk(obj, mark_link, NULL))
                                die(_("Not all child objects of %s are reachable"), sha1_to_hex(obj->sha1));
@@ -1490,6 +1496,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
        struct pack_idx_entry **idx_objects;
        struct pack_idx_option opts;
        unsigned char pack_sha1[20];
+       unsigned foreign_nr = 1;        /* zero is a "good" value, assume bad */
 
        if (argc == 2 && !strcmp(argv[1], "-h"))
                usage(index_pack_usage);
@@ -1511,6 +1518,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                                fix_thin_pack = 1;
                        } else if (!strcmp(arg, "--strict")) {
                                strict = 1;
+                               do_fsck_object = 1;
+                       } else if (!strcmp(arg, "--check-self-contained-and-connected")) {
+                               strict = 1;
+                               check_self_contained_and_connected = 1;
                        } else if (!strcmp(arg, "--verify")) {
                                verify = 1;
                        } else if (!strcmp(arg, "--verify-stat")) {
@@ -1624,7 +1635,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
        conclude_pack(fix_thin_pack, curr_pack, pack_sha1);
        free(deltas);
        if (strict)
-               check_objects();
+               foreign_nr = check_objects();
 
        if (show_stat)
                show_pack_info(stat_only);
@@ -1650,5 +1661,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
        if (index_name == NULL)
                free((void *) curr_index);
 
+       /*
+        * Let the caller know this pack is not self contained
+        */
+       if (check_self_contained_and_connected && foreign_nr)
+               return 1;
+
        return 0;
 }
index 1e89c1cd1d63f160bf7c95de41be61ff2d888e70..fae8d64c12e44cf6bcf4bab210abc4bc4b9c8ed9 100644 (file)
@@ -2,7 +2,12 @@
 #include "run-command.h"
 #include "sigchain.h"
 #include "connected.h"
+#include "transport.h"
 
+int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
+{
+       return check_everything_connected_with_transport(fn, quiet, cb_data, NULL);
+}
 /*
  * If we feed all the commits we want to verify to this command
  *
  *
  * Returns 0 if everything is connected, non-zero otherwise.
  */
-int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
+int check_everything_connected_with_transport(sha1_iterate_fn fn,
+                                             int quiet,
+                                             void *cb_data,
+                                             struct transport *transport)
 {
        struct child_process rev_list;
        const char *argv[] = {"rev-list", "--objects",
@@ -22,10 +30,23 @@ int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
        char commit[41];
        unsigned char sha1[20];
        int err = 0;
+       struct packed_git *new_pack = NULL;
 
        if (fn(cb_data, sha1))
                return err;
 
+       if (transport && transport->smart_options &&
+           transport->smart_options->self_contained_and_connected &&
+           transport->pack_lockfile &&
+           !suffixcmp(transport->pack_lockfile, ".keep")) {
+               struct strbuf idx_file = STRBUF_INIT;
+               strbuf_addstr(&idx_file, transport->pack_lockfile);
+               strbuf_setlen(&idx_file, idx_file.len - 5); /* ".keep" */
+               strbuf_addstr(&idx_file, ".idx");
+               new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+               strbuf_release(&idx_file);
+       }
+
        if (quiet)
                argv[5] = "--quiet";
 
@@ -42,6 +63,17 @@ int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
 
        commit[40] = '\n';
        do {
+               /*
+                * If index-pack already checked that:
+                * - there are no dangling pointers in the new pack
+                * - the pack is self contained
+                * Then if the updated ref is in the new pack, then we
+                * are sure the ref is good and not sending it to
+                * rev-list for verification.
+                */
+               if (new_pack && find_pack_entry_one(sha1, new_pack))
+                       continue;
+
                memcpy(commit, sha1_to_hex(sha1), 40);
                if (write_in_full(rev_list.in, commit, 41) < 0) {
                        if (errno != EPIPE && errno != EINVAL)
index 7e4585a6cbccdb35f4a4481fae1eaf417539e97d..0b060b7429ff2035ac3db108f80308c4a8a8e785 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef CONNECTED_H
 #define CONNECTED_H
 
+struct transport;
+
 /*
  * Take callback data, and return next object name in the buffer.
  * When called after returning the name for the last object, return -1
@@ -16,5 +18,8 @@ typedef int (*sha1_iterate_fn)(void *, unsigned char [20]);
  * Return 0 if Ok, non zero otherwise (i.e. some missing objects)
  */
 extern int check_everything_connected(sha1_iterate_fn, int quiet, void *cb_data);
+extern int check_everything_connected_with_transport(sha1_iterate_fn, int quiet,
+                                                    void *cb_data,
+                                                    struct transport *transport);
 
 #endif /* CONNECTED_H */
index 6b5467c6dec9645f53d83cdad2467a158db622c0..abe5ffbba55037c34b1bf38a86aa0ec8bd7f3447 100644 (file)
@@ -691,6 +691,7 @@ static int get_pack(struct fetch_pack_args *args,
        const char **av;
        int do_keep = args->keep_pack;
        struct child_process cmd;
+       int ret;
 
        memset(&demux, 0, sizeof(demux));
        if (use_sideband) {
@@ -747,11 +748,14 @@ static int get_pack(struct fetch_pack_args *args,
                                strcpy(keep_arg + s, "localhost");
                        *av++ = keep_arg;
                }
+               if (args->check_self_contained_and_connected)
+                       *av++ = "--check-self-contained-and-connected";
        }
        else {
                *av++ = "unpack-objects";
                if (args->quiet || args->no_progress)
                        *av++ = "-q";
+               args->check_self_contained_and_connected = 0;
        }
        if (*hdr_arg)
                *av++ = hdr_arg;
@@ -772,7 +776,12 @@ static int get_pack(struct fetch_pack_args *args,
                close(cmd.out);
        }
 
-       if (finish_command(&cmd))
+       ret = finish_command(&cmd);
+       if (!ret || (args->check_self_contained_and_connected && ret == 1))
+               args->self_contained_and_connected =
+                       args->check_self_contained_and_connected &&
+                       ret == 0;
+       else
                die("%s failed", argv[0]);
        if (use_sideband && finish_async(&demux))
                die("error in sideband demultiplexer");
index dc5266c970655a9fe4f971a0132c14e1cf731acc..40f08bab24df917986de73c8d7f4d9d36fe658da 100644 (file)
@@ -16,7 +16,9 @@ struct fetch_pack_args {
                verbose:1,
                no_progress:1,
                include_tag:1,
-               stateless_rpc:1;
+               stateless_rpc:1,
+               check_self_contained_and_connected:1,
+               self_contained_and_connected:1;
 };
 
 /*
index ba5d8afb1b04ba9d331c721fd9f730184fff2f23..359a671c8ca9232944cd6e92a2ac0a9eda7b429d 100644 (file)
@@ -534,6 +534,8 @@ static int fetch_refs_via_pack(struct transport *transport,
        args.quiet = (transport->verbose < 0);
        args.no_progress = !transport->progress;
        args.depth = data->options.depth;
+       args.check_self_contained_and_connected =
+               data->options.check_self_contained_and_connected;
 
        if (!data->got_remote_heads) {
                connect_setup(transport, 0, 0);
@@ -551,6 +553,8 @@ static int fetch_refs_via_pack(struct transport *transport,
                refs = NULL;
        data->conn = NULL;
        data->got_remote_heads = 0;
+       data->options.self_contained_and_connected =
+               args.self_contained_and_connected;
 
        free_refs(refs_tmp);
 
index fcb1d25d96a750c171c4341a9c5f08992915fb6b..4edebc535583f655cc120d169677ea221af554b3 100644 (file)
@@ -8,6 +8,8 @@ struct git_transport_options {
        unsigned thin : 1;
        unsigned keep : 1;
        unsigned followtags : 1;
+       unsigned check_self_contained_and_connected : 1;
+       unsigned self_contained_and_connected : 1;
        int depth;
        const char *uploadpack;
        const char *receivepack;