Merge branch 'jt/partial-clone-missing-ref-delta-base'
authorJunio C Hamano <gitster@pobox.com>
Fri, 21 Jun 2019 18:24:09 +0000 (11:24 -0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 21 Jun 2019 18:24:09 +0000 (11:24 -0700)
"git fetch" into a lazy clone forgot to fetch base objects that are
necessary to complete delta in a thin packfile, which has been
corrected.

* jt/partial-clone-missing-ref-delta-base:
t5616: cover case of client having delta base
t5616: use correct flag to check object is missing
index-pack: prefetch missing REF_DELTA bases
t5616: refactor packfile replacement

builtin/index-pack.c
t/t5616-partial-clone.sh
index ccf4eb7e9b3361ee7eb209180d98ce8df757d92d..0d55f73b0b443b60dccc31096675d40691b3eb56 100644 (file)
@@ -14,6 +14,7 @@
 #include "thread-utils.h"
 #include "packfile.h"
 #include "object-store.h"
+#include "fetch-object.h"
 
 static const char index_pack_usage[] =
 "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
@@ -1351,6 +1352,25 @@ static void fix_unresolved_deltas(struct hashfile *f)
                sorted_by_pos[i] = &ref_deltas[i];
        QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare);
 
+       if (repository_format_partial_clone) {
+               /*
+                * Prefetch the delta bases.
+                */
+               struct oid_array to_fetch = OID_ARRAY_INIT;
+               for (i = 0; i < nr_ref_deltas; i++) {
+                       struct ref_delta_entry *d = sorted_by_pos[i];
+                       if (!oid_object_info_extended(the_repository, &d->oid,
+                                                     NULL,
+                                                     OBJECT_INFO_FOR_PREFETCH))
+                               continue;
+                       oid_array_append(&to_fetch, &d->oid);
+               }
+               if (to_fetch.nr)
+                       fetch_objects(repository_format_partial_clone,
+                                     to_fetch.oid, to_fetch.nr);
+               oid_array_clear(&to_fetch);
+       }
+
        for (i = 0; i < nr_ref_deltas; i++) {
                struct ref_delta_entry *d = sorted_by_pos[i];
                enum object_type type;
@@ -1650,8 +1670,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
        int report_end_of_input = 0;
 
        /*
-        * index-pack never needs to fetch missing objects, since it only
-        * accesses the repo to do hash collision checks
+        * index-pack never needs to fetch missing objects except when
+        * REF_DELTA bases are missing (which are explicitly handled). It only
+        * accesses the repo to do hash collision checks and to check which
+        * REF_DELTA bases need to be fetched.
         */
        fetch_if_missing = 0;
 
index 9a8f9886b3e2d82b5c10bf749d485f05bd364db6..b91ef548f86b0e250b8fdcd1b8b764c780218931 100755 (executable)
@@ -244,11 +244,25 @@ test_expect_success 'fetch what is specified on CLI even if already promised' '
 . "$TEST_DIRECTORY"/lib-httpd.sh
 start_httpd
 
-# Converts bytes into a form suitable for inclusion in a sed command. For
-# example, "printf 'ab\r\n' | hex_unpack" results in '\x61\x62\x0d\x0a'.
-sed_escape () {
-       perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)' |
-               sed 's/\(..\)/\\x\1/g'
+# Converts bytes into their hexadecimal representation. For example,
+# "printf 'ab\r\n' | hex_unpack" results in '61620d0a'.
+hex_unpack () {
+       perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)'
+}
+
+# Inserts $1 at the start of the string and every 2 characters thereafter.
+intersperse () {
+       sed 's/\(..\)/'$1'\1/g'
+}
+
+# Create a one-time-sed command to replace the existing packfile with $1.
+replace_packfile () {
+       # The protocol requires that the packfile be sent in sideband 1, hence
+       # the extra \x01 byte at the beginning.
+       printf "1,/packfile/!c %04x\\\\x01%s0000" \
+               "$(($(wc -c <$1) + 5))" \
+               "$(hex_unpack <$1 | intersperse '\\x')" \
+               >"$HTTPD_ROOT_PATH/one-time-sed"
 }
 
 test_expect_success 'upon cloning, check that all refs point to objects' '
@@ -270,10 +284,7 @@ test_expect_success 'upon cloning, check that all refs point to objects' '
        # Replace the existing packfile with the crafted one. The protocol
        # requires that the packfile be sent in sideband 1, hence the extra
        # \x01 byte at the beginning.
-       printf "1,/packfile/!c %04x\\\\x01%s0000" \
-               "$(($(wc -c <incomplete.pack) + 5))" \
-               "$(sed_escape <incomplete.pack)" \
-               >"$HTTPD_ROOT_PATH/one-time-sed" &&
+       replace_packfile incomplete.pack &&
 
        # Use protocol v2 because the sed command looks for the "packfile"
        # section header.
@@ -313,10 +324,7 @@ test_expect_success 'when partial cloning, tolerate server not sending target of
        # Replace the existing packfile with the crafted one. The protocol
        # requires that the packfile be sent in sideband 1, hence the extra
        # \x01 byte at the beginning.
-       printf "1,/packfile/!c %04x\\\\x01%s0000" \
-               "$(($(wc -c <incomplete.pack) + 5))" \
-               "$(sed_escape <incomplete.pack)" \
-               >"$HTTPD_ROOT_PATH/one-time-sed" &&
+       replace_packfile incomplete.pack &&
 
        # Use protocol v2 because the sed command looks for the "packfile"
        # section header.
@@ -331,4 +339,82 @@ test_expect_success 'when partial cloning, tolerate server not sending target of
        ! test -e "$HTTPD_ROOT_PATH/one-time-sed"
 '
 
+test_expect_success 'tolerate server sending REF_DELTA against missing promisor objects' '
+       SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
+       rm -rf "$SERVER" repo &&
+       test_create_repo "$SERVER" &&
+       test_config -C "$SERVER" uploadpack.allowfilter 1 &&
+       test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
+
+       # Create a commit with 2 blobs to be used as delta bases.
+       for i in $(test_seq 10)
+       do
+               echo "this is a line" >>"$SERVER/foo.txt" &&
+               echo "this is another line" >>"$SERVER/have.txt"
+       done &&
+       git -C "$SERVER" add foo.txt have.txt &&
+       git -C "$SERVER" commit -m bar &&
+       git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase_missing &&
+       git -C "$SERVER" rev-parse HEAD:have.txt >deltabase_have &&
+
+       # Clone. The client has deltabase_have but not deltabase_missing.
+       git -c protocol.version=2 clone --no-checkout \
+               --filter=blob:none $HTTPD_URL/one_time_sed/server repo &&
+       git -C repo hash-object -w -- "$SERVER/have.txt" &&
+
+       # Sanity check to ensure that the client does not have
+       # deltabase_missing.
+       git -C repo rev-list --objects --ignore-missing \
+               -- $(cat deltabase_missing) >objlist &&
+       test_line_count = 0 objlist &&
+
+       # Another commit. This commit will be fetched by the client.
+       echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/foo.txt" &&
+       echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/have.txt" &&
+       git -C "$SERVER" add foo.txt have.txt &&
+       git -C "$SERVER" commit -m baz &&
+
+       # Pack a thin pack containing, among other things, HEAD:foo.txt
+       # delta-ed against HEAD^:foo.txt and HEAD:have.txt delta-ed against
+       # HEAD^:have.txt.
+       printf "%s\n--not\n%s\n" \
+               $(git -C "$SERVER" rev-parse HEAD) \
+               $(git -C "$SERVER" rev-parse HEAD^) |
+               git -C "$SERVER" pack-objects --thin --stdout >thin.pack &&
+
+       # Ensure that the pack contains one delta against HEAD^:foo.txt. Since
+       # the delta contains at least 26 novel characters, the size cannot be
+       # contained in 4 bits, so the object header will take up 2 bytes. The
+       # most significant nybble of the first byte is 0b1111 (0b1 to indicate
+       # that the header continues, and 0b111 to indicate REF_DELTA), followed
+       # by any 3 nybbles, then the OID of the delta base.
+       printf "f.,..%s" $(intersperse "," <deltabase_missing) >want &&
+       hex_unpack <thin.pack | intersperse "," >have &&
+       grep $(cat want) have &&
+
+       # Ensure that the pack contains one delta against HEAD^:have.txt,
+       # similar to the above.
+       printf "f.,..%s" $(intersperse "," <deltabase_have) >want &&
+       hex_unpack <thin.pack | intersperse "," >have &&
+       grep $(cat want) have &&
+
+       replace_packfile thin.pack &&
+
+       # Use protocol v2 because the sed command looks for the "packfile"
+       # section header.
+       test_config -C "$SERVER" protocol.version 2 &&
+
+       # Fetch the thin pack and ensure that index-pack is able to handle the
+       # REF_DELTA object with a missing promisor delta base.
+       GIT_TRACE_PACKET="$(pwd)/trace" git -C repo -c protocol.version=2 fetch &&
+
+       # Ensure that the missing delta base was directly fetched, but not the
+       # one that the client has.
+       grep "want $(cat deltabase_missing)" trace &&
+       ! grep "want $(cat deltabase_have)" trace &&
+
+       # Ensure that the one-time-sed script was used.
+       ! test -e "$HTTPD_ROOT_PATH/one-time-sed"
+'
+
 test_done