Merge branch 'jt/non-blob-lazy-fetch'
authorJunio C Hamano <gitster@pobox.com>
Fri, 19 Oct 2018 04:34:07 +0000 (13:34 +0900)
committerJunio C Hamano <gitster@pobox.com>
Fri, 19 Oct 2018 04:34:07 +0000 (13:34 +0900)
A partial clone that is configured to lazily fetch missing objects
will on-demand issue a "git fetch" request to the originating
repository to fill not-yet-obtained objects. The request has been
optimized for requesting a tree object (and not the leaf blob
objects contained in it) by telling the originating repository that
no blobs are needed.

* jt/non-blob-lazy-fetch:
fetch-pack: exclude blobs when lazy-fetching trees
fetch-pack: avoid object flags if no_dependents

fetch-pack.c
fetch-pack.h
t/t0410-partial-clone.sh
index 53914563b50475f9d7c6681544610e173b9165dd..49ab2666b9bdc889b6ddb17b43893b5c4fc6a308 100644 (file)
@@ -253,8 +253,10 @@ static int find_common(struct fetch_negotiator *negotiator,
        if (args->stateless_rpc && multi_ack == 1)
                die(_("--stateless-rpc requires multi_ack_detailed"));
 
-       mark_tips(negotiator, args->negotiation_tips);
-       for_each_cached_alternate(negotiator, insert_one_alternate_object);
+       if (!args->no_dependents) {
+               mark_tips(negotiator, args->negotiation_tips);
+               for_each_cached_alternate(negotiator, insert_one_alternate_object);
+       }
 
        fetching = 0;
        for ( ; refs ; refs = refs->next) {
@@ -271,8 +273,12 @@ static int find_common(struct fetch_negotiator *negotiator,
                 * We use lookup_object here because we are only
                 * interested in the case we *know* the object is
                 * reachable and we have already scanned it.
+                *
+                * Do this only if args->no_dependents is false (if it is true,
+                * we cannot trust the object flags).
                 */
-               if (((o = lookup_object(the_repository, remote->hash)) != NULL) &&
+               if (!args->no_dependents &&
+                   ((o = lookup_object(the_repository, remote->hash)) != NULL) &&
                                (o->flags & COMPLETE)) {
                        continue;
                }
@@ -707,31 +713,29 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
 
        oidset_clear(&loose_oid_set);
 
-       if (!args->no_dependents) {
-               if (!args->deepen) {
-                       for_each_ref(mark_complete_oid, NULL);
-                       for_each_cached_alternate(NULL, mark_alternate_complete);
-                       commit_list_sort_by_date(&complete);
-                       if (cutoff)
-                               mark_recent_complete_commits(args, cutoff);
-               }
+       if (!args->deepen) {
+               for_each_ref(mark_complete_oid, NULL);
+               for_each_cached_alternate(NULL, mark_alternate_complete);
+               commit_list_sort_by_date(&complete);
+               if (cutoff)
+                       mark_recent_complete_commits(args, cutoff);
+       }
 
-               /*
-                * Mark all complete remote refs as common refs.
-                * Don't mark them common yet; the server has to be told so first.
-                */
-               for (ref = *refs; ref; ref = ref->next) {
-                       struct object *o = deref_tag(the_repository,
-                                                    lookup_object(the_repository,
-                                                    ref->old_oid.hash),
-                                                    NULL, 0);
+       /*
+        * Mark all complete remote refs as common refs.
+        * Don't mark them common yet; the server has to be told so first.
+        */
+       for (ref = *refs; ref; ref = ref->next) {
+               struct object *o = deref_tag(the_repository,
+                                            lookup_object(the_repository,
+                                            ref->old_oid.hash),
+                                            NULL, 0);
 
-                       if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
-                               continue;
+               if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
+                       continue;
 
-                       negotiator->known_common(negotiator,
-                                                (struct commit *)o);
-               }
+               negotiator->known_common(negotiator,
+                                        (struct commit *)o);
        }
 
        save_commit_buffer = old_save_commit_buffer;
@@ -987,11 +991,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
        if (!server_supports("deepen-relative") && args->deepen_relative)
                die(_("Server does not support --deepen"));
 
-       mark_complete_and_common_ref(&negotiator, args, &ref);
-       filter_refs(args, &ref, sought, nr_sought);
-       if (everything_local(args, &ref)) {
-               packet_flush(fd[1]);
-               goto all_done;
+       if (!args->no_dependents) {
+               mark_complete_and_common_ref(&negotiator, args, &ref);
+               filter_refs(args, &ref, sought, nr_sought);
+               if (everything_local(args, &ref)) {
+                       packet_flush(fd[1]);
+                       goto all_done;
+               }
+       } else {
+               filter_refs(args, &ref, sought, nr_sought);
        }
        if (find_common(&negotiator, args, fd, &oid, ref) < 0)
                if (!args->keep_pack)
@@ -1037,7 +1045,7 @@ static void add_shallow_requests(struct strbuf *req_buf,
        }
 }
 
-static void add_wants(const struct ref *wants, struct strbuf *req_buf)
+static void add_wants(int no_dependents, const struct ref *wants, struct strbuf *req_buf)
 {
        int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0);
 
@@ -1054,8 +1062,12 @@ static void add_wants(const struct ref *wants, struct strbuf *req_buf)
                 * We use lookup_object here because we are only
                 * interested in the case we *know* the object is
                 * reachable and we have already scanned it.
+                *
+                * Do this only if args->no_dependents is false (if it is true,
+                * we cannot trust the object flags).
                 */
-               if (((o = lookup_object(the_repository, remote->hash)) != NULL) &&
+               if (!no_dependents &&
+                   ((o = lookup_object(the_repository, remote->hash)) != NULL) &&
                    (o->flags & COMPLETE)) {
                        continue;
                }
@@ -1152,7 +1164,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
        }
 
        /* add wants */
-       add_wants(wants, &req_buf);
+       add_wants(args->no_dependents, wants, &req_buf);
 
        if (args->no_dependents) {
                packet_buf_write(&req_buf, "done");
@@ -1343,16 +1355,21 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
                                args->deepen = 1;
 
                        /* Filter 'ref' by 'sought' and those that aren't local */
-                       mark_complete_and_common_ref(&negotiator, args, &ref);
-                       filter_refs(args, &ref, sought, nr_sought);
-                       if (everything_local(args, &ref))
-                               state = FETCH_DONE;
-                       else
+                       if (!args->no_dependents) {
+                               mark_complete_and_common_ref(&negotiator, args, &ref);
+                               filter_refs(args, &ref, sought, nr_sought);
+                               if (everything_local(args, &ref))
+                                       state = FETCH_DONE;
+                               else
+                                       state = FETCH_SEND_REQUEST;
+
+                               mark_tips(&negotiator, args->negotiation_tips);
+                               for_each_cached_alternate(&negotiator,
+                                                         insert_one_alternate_object);
+                       } else {
+                               filter_refs(args, &ref, sought, nr_sought);
                                state = FETCH_SEND_REQUEST;
-
-                       mark_tips(&negotiator, args->negotiation_tips);
-                       for_each_cached_alternate(&negotiator,
-                                                 insert_one_alternate_object);
+                       }
                        break;
                case FETCH_SEND_REQUEST:
                        if (send_fetch_request(&negotiator, fd[1], args, ref,
@@ -1595,6 +1612,20 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
        if (nr_sought)
                nr_sought = remove_duplicates_in_refs(sought, nr_sought);
 
+       if (args->no_dependents && !args->filter_options.choice) {
+               /*
+                * The protocol does not support requesting that only the
+                * wanted objects be sent, so approximate this by setting a
+                * "blob:none" filter if no filter is already set. This works
+                * for all object types: note that wanted blobs will still be
+                * sent because they are directly specified as a "want".
+                *
+                * NEEDSWORK: Add an option in the protocol to request that
+                * only the wanted objects be sent, and implement it.
+                */
+               parse_list_objects_filter(&args->filter_options, "blob:none");
+       }
+
        if (!ref) {
                packet_flush(fd[1]);
                die(_("no matching remote head"));
index 5b6e868802b53ca5fa59eb864199274ac538c242..43ec344d95b40c701b2fc703b4b2b1e3ff72a2d2 100644 (file)
@@ -43,6 +43,13 @@ struct fetch_pack_args {
        unsigned from_promisor:1;
 
        /*
+        * Attempt to fetch only the wanted objects, and not any objects
+        * referred to by them. Due to protocol limitations, extraneous
+        * objects may still be included. (When fetching non-blob
+        * objects, only blobs are excluded; when fetching a blob, the
+        * blob itself will still be sent. The client does not need to
+        * know whether a wanted object is a blob or not.)
+        *
         * If 1, fetch_pack() will also not modify any object flags.
         * This allows fetch_pack() to safely be called by any function,
         * regardless of which object flags it uses (if any).
index cfd0655ea19b0caa8d55fd41b5b76abde4de1c8b..c521d7d6c61f3e3c76d5c4d51440ae114004a362 100755 (executable)
@@ -182,6 +182,47 @@ test_expect_success 'fetching of missing objects works with ref-in-want enabled'
        grep "git< fetch=.*ref-in-want" trace
 '
 
+test_expect_success 'fetching of missing blobs works' '
+       rm -rf server repo &&
+       test_create_repo server &&
+       test_commit -C server foo &&
+       git -C server repack -a -d --write-bitmap-index &&
+
+       git clone "file://$(pwd)/server" repo &&
+       git hash-object repo/foo.t >blobhash &&
+       rm -rf repo/.git/objects/* &&
+
+       git -C server config uploadpack.allowanysha1inwant 1 &&
+       git -C server config uploadpack.allowfilter 1 &&
+       git -C repo config core.repositoryformatversion 1 &&
+       git -C repo config extensions.partialclone "origin" &&
+
+       git -C repo cat-file -p $(cat blobhash)
+'
+
+test_expect_success 'fetching of missing trees does not fetch blobs' '
+       rm -rf server repo &&
+       test_create_repo server &&
+       test_commit -C server foo &&
+       git -C server repack -a -d --write-bitmap-index &&
+
+       git clone "file://$(pwd)/server" repo &&
+       git -C repo rev-parse foo^{tree} >treehash &&
+       git hash-object repo/foo.t >blobhash &&
+       rm -rf repo/.git/objects/* &&
+
+       git -C server config uploadpack.allowanysha1inwant 1 &&
+       git -C server config uploadpack.allowfilter 1 &&
+       git -C repo config core.repositoryformatversion 1 &&
+       git -C repo config extensions.partialclone "origin" &&
+       git -C repo cat-file -p $(cat treehash) &&
+
+       # Ensure that the tree, but not the blob, is fetched
+       git -C repo rev-list --objects --missing=print $(cat treehash) >objects &&
+       grep "^$(cat treehash)" objects &&
+       grep "^[?]$(cat blobhash)" objects
+'
+
 test_expect_success 'rev-list stops traversal at missing and promised commit' '
        rm -rf repo &&
        test_create_repo repo &&