Merge branch 'jt/non-blob-lazy-fetch'
authorJunio C Hamano <gitster@pobox.com>
Fri, 19 Oct 2018 04:34:07 +0000 (13:34 +0900)
committerJunio C Hamano <gitster@pobox.com>
Fri, 19 Oct 2018 04:34:07 +0000 (13:34 +0900)
A partial clone that is configured to lazily fetch missing objects
will on-demand issue a "git fetch" request to the originating
repository to fill not-yet-obtained objects. The request has been
optimized for requesting a tree object (and not the leaf blob
objects contained in it) by telling the originating repository that
no blobs are needed.

* jt/non-blob-lazy-fetch:
fetch-pack: exclude blobs when lazy-fetching trees
fetch-pack: avoid object flags if no_dependents

1  2 
fetch-pack.c
t/t0410-partial-clone.sh
diff --combined fetch-pack.c
index 53914563b50475f9d7c6681544610e173b9165dd,79007f996c899272b6ae9044a9029776181f3438..49ab2666b9bdc889b6ddb17b43893b5c4fc6a308
@@@ -253,8 -253,10 +253,10 @@@ static int find_common(struct fetch_neg
        if (args->stateless_rpc && multi_ack == 1)
                die(_("--stateless-rpc requires multi_ack_detailed"));
  
-       mark_tips(negotiator, args->negotiation_tips);
-       for_each_cached_alternate(negotiator, insert_one_alternate_object);
+       if (!args->no_dependents) {
+               mark_tips(negotiator, args->negotiation_tips);
+               for_each_cached_alternate(negotiator, insert_one_alternate_object);
+       }
  
        fetching = 0;
        for ( ; refs ; refs = refs->next) {
                 * We use lookup_object here because we are only
                 * interested in the case we *know* the object is
                 * reachable and we have already scanned it.
+                *
+                * Do this only if args->no_dependents is false (if it is true,
+                * we cannot trust the object flags).
                 */
-               if (((o = lookup_object(the_repository, remote->hash)) != NULL) &&
+               if (!args->no_dependents &&
+                   ((o = lookup_object(the_repository, remote->hash)) != NULL) &&
                                (o->flags & COMPLETE)) {
                        continue;
                }
@@@ -526,14 -532,21 +532,14 @@@ static void add_refs_to_oidset(struct o
                oidset_insert(oids, &refs->old_oid);
  }
  
 -static int tip_oids_contain(struct oidset *tip_oids,
 -                          struct ref *unmatched, struct ref *newlist,
 -                          const struct object_id *id)
 +static int is_unmatched_ref(const struct ref *ref)
  {
 -      /*
 -       * Note that this only looks at the ref lists the first time it's
 -       * called. This works out in filter_refs() because even though it may
 -       * add to "newlist" between calls, the additions will always be for
 -       * oids that are already in the set.
 -       */
 -      if (!tip_oids->map.map.tablesize) {
 -              add_refs_to_oidset(tip_oids, unmatched);
 -              add_refs_to_oidset(tip_oids, newlist);
 -      }
 -      return oidset_contains(tip_oids, id);
 +      struct object_id oid;
 +      const char *p;
 +      return  ref->match_status == REF_NOT_MATCHED &&
 +              !parse_oid_hex(ref->name, &oid, &p) &&
 +              *p == '\0' &&
 +              oideq(&oid, &ref->old_oid);
  }
  
  static void filter_refs(struct fetch_pack_args *args,
        struct ref *ref, *next;
        struct oidset tip_oids = OIDSET_INIT;
        int i;
 +      int strict = !(allow_unadvertised_object_request &
 +                     (ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1));
  
        i = 0;
        for (ref = *refs; ref; ref = next) {
                }
        }
  
 +      if (strict) {
 +              for (i = 0; i < nr_sought; i++) {
 +                      ref = sought[i];
 +                      if (!is_unmatched_ref(ref))
 +                              continue;
 +
 +                      add_refs_to_oidset(&tip_oids, unmatched);
 +                      add_refs_to_oidset(&tip_oids, newlist);
 +                      break;
 +              }
 +      }
 +
        /* Append unmatched requests to the list */
        for (i = 0; i < nr_sought; i++) {
 -              struct object_id oid;
 -              const char *p;
 -
                ref = sought[i];
 -              if (ref->match_status != REF_NOT_MATCHED)
 -                      continue;
 -              if (parse_oid_hex(ref->name, &oid, &p) ||
 -                  *p != '\0' ||
 -                  oidcmp(&oid, &ref->old_oid))
 +              if (!is_unmatched_ref(ref))
                        continue;
  
 -              if ((allow_unadvertised_object_request &
 -                   (ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1)) ||
 -                  tip_oids_contain(&tip_oids, unmatched, newlist,
 -                                   &ref->old_oid)) {
 +              if (!strict || oidset_contains(&tip_oids, &ref->old_oid)) {
                        ref->match_status = REF_MATCHED;
                        *newtail = copy_ref(ref);
                        newtail = &(*newtail)->next;
@@@ -707,31 -716,29 +713,29 @@@ static void mark_complete_and_common_re
  
        oidset_clear(&loose_oid_set);
  
-       if (!args->no_dependents) {
-               if (!args->deepen) {
-                       for_each_ref(mark_complete_oid, NULL);
-                       for_each_cached_alternate(NULL, mark_alternate_complete);
-                       commit_list_sort_by_date(&complete);
-                       if (cutoff)
-                               mark_recent_complete_commits(args, cutoff);
-               }
+       if (!args->deepen) {
+               for_each_ref(mark_complete_oid, NULL);
+               for_each_cached_alternate(NULL, mark_alternate_complete);
+               commit_list_sort_by_date(&complete);
+               if (cutoff)
+                       mark_recent_complete_commits(args, cutoff);
+       }
  
-               /*
-                * Mark all complete remote refs as common refs.
-                * Don't mark them common yet; the server has to be told so first.
-                */
-               for (ref = *refs; ref; ref = ref->next) {
-                       struct object *o = deref_tag(the_repository,
-                                                    lookup_object(the_repository,
-                                                    ref->old_oid.hash),
-                                                    NULL, 0);
+       /*
+        * Mark all complete remote refs as common refs.
+        * Don't mark them common yet; the server has to be told so first.
+        */
+       for (ref = *refs; ref; ref = ref->next) {
+               struct object *o = deref_tag(the_repository,
+                                            lookup_object(the_repository,
+                                            ref->old_oid.hash),
+                                            NULL, 0);
  
-                       if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
-                               continue;
+               if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
+                       continue;
  
-                       negotiator->known_common(negotiator,
-                                                (struct commit *)o);
-               }
+               negotiator->known_common(negotiator,
+                                        (struct commit *)o);
        }
  
        save_commit_buffer = old_save_commit_buffer;
@@@ -987,11 -994,15 +991,15 @@@ static struct ref *do_fetch_pack(struc
        if (!server_supports("deepen-relative") && args->deepen_relative)
                die(_("Server does not support --deepen"));
  
-       mark_complete_and_common_ref(&negotiator, args, &ref);
-       filter_refs(args, &ref, sought, nr_sought);
-       if (everything_local(args, &ref)) {
-               packet_flush(fd[1]);
-               goto all_done;
+       if (!args->no_dependents) {
+               mark_complete_and_common_ref(&negotiator, args, &ref);
+               filter_refs(args, &ref, sought, nr_sought);
+               if (everything_local(args, &ref)) {
+                       packet_flush(fd[1]);
+                       goto all_done;
+               }
+       } else {
+               filter_refs(args, &ref, sought, nr_sought);
        }
        if (find_common(&negotiator, args, fd, &oid, ref) < 0)
                if (!args->keep_pack)
@@@ -1037,7 -1048,7 +1045,7 @@@ static void add_shallow_requests(struc
        }
  }
  
- static void add_wants(const struct ref *wants, struct strbuf *req_buf)
+ static void add_wants(int no_dependents, const struct ref *wants, struct strbuf *req_buf)
  {
        int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0);
  
                 * We use lookup_object here because we are only
                 * interested in the case we *know* the object is
                 * reachable and we have already scanned it.
+                *
+                * Do this only if args->no_dependents is false (if it is true,
+                * we cannot trust the object flags).
                 */
-               if (((o = lookup_object(the_repository, remote->hash)) != NULL) &&
+               if (!no_dependents &&
+                   ((o = lookup_object(the_repository, remote->hash)) != NULL) &&
                    (o->flags & COMPLETE)) {
                        continue;
                }
@@@ -1152,7 -1167,7 +1164,7 @@@ static int send_fetch_request(struct fe
        }
  
        /* add wants */
-       add_wants(wants, &req_buf);
+       add_wants(args->no_dependents, wants, &req_buf);
  
        if (args->no_dependents) {
                packet_buf_write(&req_buf, "done");
@@@ -1343,16 -1358,21 +1355,21 @@@ static struct ref *do_fetch_pack_v2(str
                                args->deepen = 1;
  
                        /* Filter 'ref' by 'sought' and those that aren't local */
-                       mark_complete_and_common_ref(&negotiator, args, &ref);
-                       filter_refs(args, &ref, sought, nr_sought);
-                       if (everything_local(args, &ref))
-                               state = FETCH_DONE;
-                       else
+                       if (!args->no_dependents) {
+                               mark_complete_and_common_ref(&negotiator, args, &ref);
+                               filter_refs(args, &ref, sought, nr_sought);
+                               if (everything_local(args, &ref))
+                                       state = FETCH_DONE;
+                               else
+                                       state = FETCH_SEND_REQUEST;
+                               mark_tips(&negotiator, args->negotiation_tips);
+                               for_each_cached_alternate(&negotiator,
+                                                         insert_one_alternate_object);
+                       } else {
+                               filter_refs(args, &ref, sought, nr_sought);
                                state = FETCH_SEND_REQUEST;
-                       mark_tips(&negotiator, args->negotiation_tips);
-                       for_each_cached_alternate(&negotiator,
-                                                 insert_one_alternate_object);
+                       }
                        break;
                case FETCH_SEND_REQUEST:
                        if (send_fetch_request(&negotiator, fd[1], args, ref,
@@@ -1595,6 -1615,20 +1612,20 @@@ struct ref *fetch_pack(struct fetch_pac
        if (nr_sought)
                nr_sought = remove_duplicates_in_refs(sought, nr_sought);
  
+       if (args->no_dependents && !args->filter_options.choice) {
+               /*
+                * The protocol does not support requesting that only the
+                * wanted objects be sent, so approximate this by setting a
+                * "blob:none" filter if no filter is already set. This works
+                * for all object types: note that wanted blobs will still be
+                * sent because they are directly specified as a "want".
+                *
+                * NEEDSWORK: Add an option in the protocol to request that
+                * only the wanted objects be sent, and implement it.
+                */
+               parse_list_objects_filter(&args->filter_options, "blob:none");
+       }
        if (!ref) {
                packet_flush(fd[1]);
                die(_("no matching remote head"));
diff --combined t/t0410-partial-clone.sh
index cfd0655ea19b0caa8d55fd41b5b76abde4de1c8b,08a0c3651c7977d3c9abd1ab883ec00cb0af9dc3..c521d7d6c61f3e3c76d5c4d51440ae114004a362
@@@ -170,18 -170,47 +170,59 @@@ test_expect_success 'fetching of missin
        git verify-pack --verbose "$IDX" | grep "$HASH"
  '
  
 +test_expect_success 'fetching of missing objects works with ref-in-want enabled' '
 +      # ref-in-want requires protocol version 2
 +      git -C server config protocol.version 2 &&
 +      git -C server config uploadpack.allowrefinwant 1 &&
 +      git -C repo config protocol.version 2 &&
 +
 +      rm -rf repo/.git/objects/* &&
 +      rm -f trace &&
 +      GIT_TRACE_PACKET="$(pwd)/trace" git -C repo cat-file -p "$HASH" &&
 +      grep "git< fetch=.*ref-in-want" trace
 +'
 +
+ test_expect_success 'fetching of missing blobs works' '
+       rm -rf server repo &&
+       test_create_repo server &&
+       test_commit -C server foo &&
+       git -C server repack -a -d --write-bitmap-index &&
+       git clone "file://$(pwd)/server" repo &&
+       git hash-object repo/foo.t >blobhash &&
+       rm -rf repo/.git/objects/* &&
+       git -C server config uploadpack.allowanysha1inwant 1 &&
+       git -C server config uploadpack.allowfilter 1 &&
+       git -C repo config core.repositoryformatversion 1 &&
+       git -C repo config extensions.partialclone "origin" &&
+       git -C repo cat-file -p $(cat blobhash)
+ '
+ test_expect_success 'fetching of missing trees does not fetch blobs' '
+       rm -rf server repo &&
+       test_create_repo server &&
+       test_commit -C server foo &&
+       git -C server repack -a -d --write-bitmap-index &&
+       git clone "file://$(pwd)/server" repo &&
+       git -C repo rev-parse foo^{tree} >treehash &&
+       git hash-object repo/foo.t >blobhash &&
+       rm -rf repo/.git/objects/* &&
+       git -C server config uploadpack.allowanysha1inwant 1 &&
+       git -C server config uploadpack.allowfilter 1 &&
+       git -C repo config core.repositoryformatversion 1 &&
+       git -C repo config extensions.partialclone "origin" &&
+       git -C repo cat-file -p $(cat treehash) &&
+       # Ensure that the tree, but not the blob, is fetched
+       git -C repo rev-list --objects --missing=print $(cat treehash) >objects &&
+       grep "^$(cat treehash)" objects &&
+       grep "^[?]$(cat blobhash)" objects
+ '
  test_expect_success 'rev-list stops traversal at missing and promised commit' '
        rm -rf repo &&
        test_create_repo repo &&
  
        git -C repo config core.repositoryformatversion 1 &&
        git -C repo config extensions.partialclone "arbitrary string" &&
 -      git -C repo rev-list --exclude-promisor-objects --objects bar >out &&
 +      GIT_TEST_COMMIT_GRAPH=0 git -C repo rev-list --exclude-promisor-objects --objects bar >out &&
        grep $(git -C repo rev-parse bar) out &&
        ! grep $FOO out
  '