t / t5616-partial-clone.shon commit general improvements (43abf13)
   1#!/bin/sh
   2
   3test_description='git partial clone'
   4
   5. ./test-lib.sh
   6
   7# create a normal "src" repo where we can later create new commits.
   8# expect_1.oids will contain a list of the OIDs of all blobs.
   9test_expect_success 'setup normal src repo' '
  10        echo "{print \$1}" >print_1.awk &&
  11        echo "{print \$2}" >print_2.awk &&
  12
  13        git init src &&
  14        for n in 1 2 3 4
  15        do
  16                echo "This is file: $n" > src/file.$n.txt
  17                git -C src add file.$n.txt
  18                git -C src commit -m "file $n"
  19                git -C src ls-files -s file.$n.txt >>temp
  20        done &&
  21        awk -f print_2.awk <temp | sort >expect_1.oids &&
  22        test_line_count = 4 expect_1.oids
  23'
  24
  25# bare clone "src" giving "srv.bare" for use as our server.
  26test_expect_success 'setup bare clone for server' '
  27        git clone --bare "file://$(pwd)/src" srv.bare &&
  28        git -C srv.bare config --local uploadpack.allowfilter 1 &&
  29        git -C srv.bare config --local uploadpack.allowanysha1inwant 1
  30'
  31
  32# do basic partial clone from "srv.bare"
  33# confirm we are missing all of the known blobs.
  34# confirm partial clone was registered in the local config.
  35test_expect_success 'do partial clone 1' '
  36        git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 &&
  37
  38        git -C pc1 rev-list --quiet --objects --missing=print HEAD >revs &&
  39        awk -f print_1.awk revs |
  40        sed "s/?//" |
  41        sort >observed.oids &&
  42
  43        test_cmp expect_1.oids observed.oids &&
  44        test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" &&
  45        test "$(git -C pc1 config --local remote.origin.promisor)" = "true" &&
  46        test "$(git -C pc1 config --local remote.origin.partialclonefilter)" = "blob:none"
  47'
  48
  49# checkout master to force dynamic object fetch of blobs at HEAD.
  50test_expect_success 'verify checkout with dynamic object fetch' '
  51        git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed &&
  52        test_line_count = 4 observed &&
  53        git -C pc1 checkout master &&
  54        git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed &&
  55        test_line_count = 0 observed
  56'
  57
  58# create new commits in "src" repo to establish a blame history on file.1.txt
  59# and push to "srv.bare".
  60test_expect_success 'push new commits to server' '
  61        git -C src remote add srv "file://$(pwd)/srv.bare" &&
  62        for x in a b c d e
  63        do
  64                echo "Mod file.1.txt $x" >>src/file.1.txt
  65                git -C src add file.1.txt
  66                git -C src commit -m "mod $x"
  67        done &&
  68        git -C src blame master -- file.1.txt >expect.blame &&
  69        git -C src push -u srv master
  70'
  71
  72# (partial) fetch in the partial clone repo from the promisor remote.
  73# verify that fetch inherited the filter-spec from the config and DOES NOT
  74# have the new blobs.
  75test_expect_success 'partial fetch inherits filter settings' '
  76        git -C pc1 fetch origin &&
  77        git -C pc1 rev-list --quiet --objects --missing=print \
  78                master..origin/master >observed &&
  79        test_line_count = 5 observed
  80'
  81
  82# force dynamic object fetch using diff.
  83# we should only get 1 new blob (for the file in origin/master).
  84test_expect_success 'verify diff causes dynamic object fetch' '
  85        git -C pc1 diff master..origin/master -- file.1.txt &&
  86        git -C pc1 rev-list --quiet --objects --missing=print \
  87                 master..origin/master >observed &&
  88        test_line_count = 4 observed
  89'
  90
  91# force full dynamic object fetch of the file's history using blame.
  92# we should get the intermediate blobs for the file.
  93test_expect_success 'verify blame causes dynamic object fetch' '
  94        git -C pc1 blame origin/master -- file.1.txt >observed.blame &&
  95        test_cmp expect.blame observed.blame &&
  96        git -C pc1 rev-list --quiet --objects --missing=print \
  97                master..origin/master >observed &&
  98        test_line_count = 0 observed
  99'
 100
 101# create new commits in "src" repo to establish a history on file.2.txt
 102# and push to "srv.bare".
 103test_expect_success 'push new commits to server for file.2.txt' '
 104        for x in a b c d e f
 105        do
 106                echo "Mod file.2.txt $x" >>src/file.2.txt
 107                git -C src add file.2.txt
 108                git -C src commit -m "mod $x"
 109        done &&
 110        git -C src push -u srv master
 111'
 112
 113# Do FULL fetch by disabling inherited filter-spec using --no-filter.
 114# Verify we have all the new blobs.
 115test_expect_success 'override inherited filter-spec using --no-filter' '
 116        git -C pc1 fetch --no-filter origin &&
 117        git -C pc1 rev-list --quiet --objects --missing=print \
 118                master..origin/master >observed &&
 119        test_line_count = 0 observed
 120'
 121
 122# create new commits in "src" repo to establish a history on file.3.txt
 123# and push to "srv.bare".
 124test_expect_success 'push new commits to server for file.3.txt' '
 125        for x in a b c d e f
 126        do
 127                echo "Mod file.3.txt $x" >>src/file.3.txt
 128                git -C src add file.3.txt
 129                git -C src commit -m "mod $x"
 130        done &&
 131        git -C src push -u srv master
 132'
 133
 134# Do a partial fetch and then try to manually fetch the missing objects.
 135# This can be used as the basis of a pre-command hook to bulk fetch objects
 136# perhaps combined with a command in dry-run mode.
 137test_expect_success 'manual prefetch of missing objects' '
 138        git -C pc1 fetch --filter=blob:none origin &&
 139
 140        git -C pc1 rev-list --quiet --objects --missing=print \
 141                 master..origin/master >revs &&
 142        awk -f print_1.awk revs |
 143        sed "s/?//" |
 144        sort >observed.oids &&
 145
 146        test_line_count = 6 observed.oids &&
 147        git -C pc1 fetch-pack --stdin "file://$(pwd)/srv.bare" <observed.oids &&
 148
 149        git -C pc1 rev-list --quiet --objects --missing=print \
 150                master..origin/master >revs &&
 151        awk -f print_1.awk revs |
 152        sed "s/?//" |
 153        sort >observed.oids &&
 154
 155        test_line_count = 0 observed.oids
 156'
 157
 158test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack --fsck-objects' '
 159        git init src &&
 160        test_commit -C src x &&
 161        test_config -C src uploadpack.allowfilter 1 &&
 162        test_config -C src uploadpack.allowanysha1inwant 1 &&
 163
 164        GIT_TRACE="$(pwd)/trace" git -c transfer.fsckobjects=1 \
 165                clone --filter="blob:none" "file://$(pwd)/src" dst &&
 166        grep "git index-pack.*--fsck-objects" trace
 167'
 168
 169test_expect_success 'use fsck before and after manually fetching a missing subtree' '
 170        # push new commit so server has a subtree
 171        mkdir src/dir &&
 172        echo "in dir" >src/dir/file.txt &&
 173        git -C src add dir/file.txt &&
 174        git -C src commit -m "file in dir" &&
 175        git -C src push -u srv master &&
 176        SUBTREE=$(git -C src rev-parse HEAD:dir) &&
 177
 178        rm -rf dst &&
 179        git clone --no-checkout --filter=tree:0 "file://$(pwd)/srv.bare" dst &&
 180        git -C dst fsck &&
 181
 182        # Make sure we only have commits, and all trees and blobs are missing.
 183        git -C dst rev-list --missing=allow-any --objects master \
 184                >fetched_objects &&
 185        awk -f print_1.awk fetched_objects |
 186        xargs -n1 git -C dst cat-file -t >fetched_types &&
 187
 188        sort -u fetched_types >unique_types.observed &&
 189        echo commit >unique_types.expected &&
 190        test_cmp unique_types.expected unique_types.observed &&
 191
 192        # Auto-fetch a tree with cat-file.
 193        git -C dst cat-file -p $SUBTREE >tree_contents &&
 194        grep file.txt tree_contents &&
 195
 196        # fsck still works after an auto-fetch of a tree.
 197        git -C dst fsck &&
 198
 199        # Auto-fetch all remaining trees and blobs with --missing=error
 200        git -C dst rev-list --missing=error --objects master >fetched_objects &&
 201        test_line_count = 70 fetched_objects &&
 202
 203        awk -f print_1.awk fetched_objects |
 204        xargs -n1 git -C dst cat-file -t >fetched_types &&
 205
 206        sort -u fetched_types >unique_types.observed &&
 207        test_write_lines blob commit tree >unique_types.expected &&
 208        test_cmp unique_types.expected unique_types.observed
 209'
 210
 211test_expect_success 'implicitly construct combine: filter with repeated flags' '
 212        GIT_TRACE=$(pwd)/trace git clone --bare \
 213                --filter=blob:none --filter=tree:1 \
 214                "file://$(pwd)/srv.bare" pc2 &&
 215        grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
 216                trace &&
 217        git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
 218
 219        # We should have gotten some root trees.
 220        grep " $" objects &&
 221        # Should not have gotten any non-root trees or blobs.
 222        ! grep " ." objects &&
 223
 224        xargs -n 1 git -C pc2 cat-file -t <objects >types &&
 225        sort -u types >unique_types.actual &&
 226        test_write_lines commit tree >unique_types.expected &&
 227        test_cmp unique_types.expected unique_types.actual
 228'
 229
 230test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
 231        rm -rf src dst &&
 232        git init src &&
 233        test_commit -C src x &&
 234        test_config -C src uploadpack.allowfilter 1 &&
 235        test_config -C src uploadpack.allowanysha1inwant 1 &&
 236
 237        # Create a tag pointing to a blob.
 238        BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
 239        git -C src tag myblob "$BLOB" &&
 240
 241        git clone --filter="blob:none" "file://$(pwd)/src" dst 2>err &&
 242        ! grep "does not point to a valid object" err &&
 243        git -C dst fsck
 244'
 245
 246test_expect_success 'fetch what is specified on CLI even if already promised' '
 247        rm -rf src dst.git &&
 248        git init src &&
 249        test_commit -C src foo &&
 250        test_config -C src uploadpack.allowfilter 1 &&
 251        test_config -C src uploadpack.allowanysha1inwant 1 &&
 252
 253        git hash-object --stdin <src/foo.t >blob &&
 254
 255        git clone --bare --filter=blob:none "file://$(pwd)/src" dst.git &&
 256        git -C dst.git rev-list --objects --quiet --missing=print HEAD >missing_before &&
 257        grep "?$(cat blob)" missing_before &&
 258        git -C dst.git fetch origin $(cat blob) &&
 259        git -C dst.git rev-list --objects --quiet --missing=print HEAD >missing_after &&
 260        ! grep "?$(cat blob)" missing_after
 261'
 262
 263. "$TEST_DIRECTORY"/lib-httpd.sh
 264start_httpd
 265
 266# Converts bytes into their hexadecimal representation. For example,
 267# "printf 'ab\r\n' | hex_unpack" results in '61620d0a'.
 268hex_unpack () {
 269        perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)'
 270}
 271
 272# Inserts $1 at the start of the string and every 2 characters thereafter.
 273intersperse () {
 274        sed 's/\(..\)/'$1'\1/g'
 275}
 276
 277# Create a one-time-sed command to replace the existing packfile with $1.
 278replace_packfile () {
 279        # The protocol requires that the packfile be sent in sideband 1, hence
 280        # the extra \x01 byte at the beginning.
 281        printf "1,/packfile/!c %04x\\\\x01%s0000" \
 282                "$(($(wc -c <$1) + 5))" \
 283                "$(hex_unpack <$1 | intersperse '\\x')" \
 284                >"$HTTPD_ROOT_PATH/one-time-sed"
 285}
 286
 287test_expect_success 'upon cloning, check that all refs point to objects' '
 288        SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
 289        rm -rf "$SERVER" repo &&
 290        test_create_repo "$SERVER" &&
 291        test_commit -C "$SERVER" foo &&
 292        test_config -C "$SERVER" uploadpack.allowfilter 1 &&
 293        test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
 294
 295        # Create a tag pointing to a blob.
 296        BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) &&
 297        git -C "$SERVER" tag myblob "$BLOB" &&
 298
 299        # Craft a packfile not including that blob.
 300        git -C "$SERVER" rev-parse HEAD |
 301        git -C "$SERVER" pack-objects --stdout >incomplete.pack &&
 302
 303        # Replace the existing packfile with the crafted one. The protocol
 304        # requires that the packfile be sent in sideband 1, hence the extra
 305        # \x01 byte at the beginning.
 306        replace_packfile incomplete.pack &&
 307
 308        # Use protocol v2 because the sed command looks for the "packfile"
 309        # section header.
 310        test_config -C "$SERVER" protocol.version 2 &&
 311        test_must_fail git -c protocol.version=2 clone \
 312                --filter=blob:none $HTTPD_URL/one_time_sed/server repo 2>err &&
 313
 314        test_i18ngrep "did not send all necessary objects" err &&
 315
 316        # Ensure that the one-time-sed script was used.
 317        ! test -e "$HTTPD_ROOT_PATH/one-time-sed"
 318'
 319
 320test_expect_success 'when partial cloning, tolerate server not sending target of tag' '
 321        SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
 322        rm -rf "$SERVER" repo &&
 323        test_create_repo "$SERVER" &&
 324        test_commit -C "$SERVER" foo &&
 325        test_config -C "$SERVER" uploadpack.allowfilter 1 &&
 326        test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
 327
 328        # Create an annotated tag pointing to a blob.
 329        BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) &&
 330        git -C "$SERVER" tag -m message -a myblob "$BLOB" &&
 331
 332        # Craft a packfile including the tag, but not the blob it points to.
 333        # Also, omit objects referenced from HEAD in order to force a second
 334        # fetch (to fetch missing objects) upon the automatic checkout that
 335        # happens after a clone.
 336        printf "%s\n%s\n--not\n%s\n%s\n" \
 337                $(git -C "$SERVER" rev-parse HEAD) \
 338                $(git -C "$SERVER" rev-parse myblob) \
 339                $(git -C "$SERVER" rev-parse HEAD^{tree}) \
 340                $(git -C "$SERVER" rev-parse myblob^{blob}) |
 341                git -C "$SERVER" pack-objects --thin --stdout >incomplete.pack &&
 342
 343        # Replace the existing packfile with the crafted one. The protocol
 344        # requires that the packfile be sent in sideband 1, hence the extra
 345        # \x01 byte at the beginning.
 346        replace_packfile incomplete.pack &&
 347
 348        # Use protocol v2 because the sed command looks for the "packfile"
 349        # section header.
 350        test_config -C "$SERVER" protocol.version 2 &&
 351
 352        # Exercise to make sure it works.
 353        git -c protocol.version=2 clone \
 354                --filter=blob:none $HTTPD_URL/one_time_sed/server repo 2> err &&
 355        ! grep "missing object referenced by" err &&
 356
 357        # Ensure that the one-time-sed script was used.
 358        ! test -e "$HTTPD_ROOT_PATH/one-time-sed"
 359'
 360
 361test_expect_success 'tolerate server sending REF_DELTA against missing promisor objects' '
 362        SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
 363        rm -rf "$SERVER" repo &&
 364        test_create_repo "$SERVER" &&
 365        test_config -C "$SERVER" uploadpack.allowfilter 1 &&
 366        test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
 367
 368        # Create a commit with 2 blobs to be used as delta bases.
 369        for i in $(test_seq 10)
 370        do
 371                echo "this is a line" >>"$SERVER/foo.txt" &&
 372                echo "this is another line" >>"$SERVER/have.txt"
 373        done &&
 374        git -C "$SERVER" add foo.txt have.txt &&
 375        git -C "$SERVER" commit -m bar &&
 376        git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase_missing &&
 377        git -C "$SERVER" rev-parse HEAD:have.txt >deltabase_have &&
 378
 379        # Clone. The client has deltabase_have but not deltabase_missing.
 380        git -c protocol.version=2 clone --no-checkout \
 381                --filter=blob:none $HTTPD_URL/one_time_sed/server repo &&
 382        git -C repo hash-object -w -- "$SERVER/have.txt" &&
 383
 384        # Sanity check to ensure that the client does not have
 385        # deltabase_missing.
 386        git -C repo rev-list --objects --ignore-missing \
 387                -- $(cat deltabase_missing) >objlist &&
 388        test_line_count = 0 objlist &&
 389
 390        # Another commit. This commit will be fetched by the client.
 391        echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/foo.txt" &&
 392        echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/have.txt" &&
 393        git -C "$SERVER" add foo.txt have.txt &&
 394        git -C "$SERVER" commit -m baz &&
 395
 396        # Pack a thin pack containing, among other things, HEAD:foo.txt
 397        # delta-ed against HEAD^:foo.txt and HEAD:have.txt delta-ed against
 398        # HEAD^:have.txt.
 399        printf "%s\n--not\n%s\n" \
 400                $(git -C "$SERVER" rev-parse HEAD) \
 401                $(git -C "$SERVER" rev-parse HEAD^) |
 402                git -C "$SERVER" pack-objects --thin --stdout >thin.pack &&
 403
 404        # Ensure that the pack contains one delta against HEAD^:foo.txt. Since
 405        # the delta contains at least 26 novel characters, the size cannot be
 406        # contained in 4 bits, so the object header will take up 2 bytes. The
 407        # most significant nybble of the first byte is 0b1111 (0b1 to indicate
 408        # that the header continues, and 0b111 to indicate REF_DELTA), followed
 409        # by any 3 nybbles, then the OID of the delta base.
 410        printf "f.,..%s" $(intersperse "," <deltabase_missing) >want &&
 411        hex_unpack <thin.pack | intersperse "," >have &&
 412        grep $(cat want) have &&
 413
 414        # Ensure that the pack contains one delta against HEAD^:have.txt,
 415        # similar to the above.
 416        printf "f.,..%s" $(intersperse "," <deltabase_have) >want &&
 417        hex_unpack <thin.pack | intersperse "," >have &&
 418        grep $(cat want) have &&
 419
 420        replace_packfile thin.pack &&
 421
 422        # Use protocol v2 because the sed command looks for the "packfile"
 423        # section header.
 424        test_config -C "$SERVER" protocol.version 2 &&
 425
 426        # Fetch the thin pack and ensure that index-pack is able to handle the
 427        # REF_DELTA object with a missing promisor delta base.
 428        GIT_TRACE_PACKET="$(pwd)/trace" git -C repo -c protocol.version=2 fetch &&
 429
 430        # Ensure that the missing delta base was directly fetched, but not the
 431        # one that the client has.
 432        grep "want $(cat deltabase_missing)" trace &&
 433        ! grep "want $(cat deltabase_have)" trace &&
 434
 435        # Ensure that the one-time-sed script was used.
 436        ! test -e "$HTTPD_ROOT_PATH/one-time-sed"
 437'
 438
 439# DO NOT add non-httpd-specific tests here, because the last part of this
 440# test script is only executed when httpd is available and enabled.
 441
 442test_done