t / t0028-working-tree-encoding.shon commit Merge branch 'jk/loose-object-cache-oid' (cba595a)
   1#!/bin/sh
   2
   3test_description='working-tree-encoding conversion via gitattributes'
   4
   5. ./test-lib.sh
   6
   7GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
   8
   9test_expect_success 'setup test files' '
  10        git config core.eol lf &&
  11
  12        text="hallo there!\ncan you read me?" &&
  13        echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
  14        echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
  15        printf "$text" >test.utf8.raw &&
  16        printf "$text" | iconv -f UTF-8 -t UTF-16 >test.utf16.raw &&
  17        printf "$text" | iconv -f UTF-8 -t UTF-32 >test.utf32.raw &&
  18        printf "\377\376"                         >test.utf16lebom.raw &&
  19        printf "$text" | iconv -f UTF-8 -t UTF-32LE >>test.utf16lebom.raw &&
  20
  21        # Line ending tests
  22        printf "one\ntwo\nthree\n" >lf.utf8.raw &&
  23        printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
  24
  25        # BOM tests
  26        printf "\0a\0b\0c"                         >nobom.utf16be.raw &&
  27        printf "a\0b\0c\0"                         >nobom.utf16le.raw &&
  28        printf "\376\777\0a\0b\0c"                 >bebom.utf16be.raw &&
  29        printf "\777\376a\0b\0c\0"                 >lebom.utf16le.raw &&
  30        printf "\0\0\0a\0\0\0b\0\0\0c"             >nobom.utf32be.raw &&
  31        printf "a\0\0\0b\0\0\0c\0\0\0"             >nobom.utf32le.raw &&
  32        printf "\0\0\376\777\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
  33        printf "\777\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
  34
  35        # Add only UTF-16 file, we will add the UTF-32 file later
  36        cp test.utf16.raw test.utf16 &&
  37        cp test.utf32.raw test.utf32 &&
  38        cp test.utf16lebom.raw test.utf16lebom &&
  39        git add .gitattributes test.utf16 test.utf16lebom &&
  40        git commit -m initial
  41'
  42
  43test_expect_success 'ensure UTF-8 is stored in Git' '
  44        test_when_finished "rm -f test.utf16.git" &&
  45
  46        git cat-file -p :test.utf16 >test.utf16.git &&
  47        test_cmp_bin test.utf8.raw test.utf16.git
  48'
  49
  50test_expect_success 're-encode to UTF-16 on checkout' '
  51        test_when_finished "rm -f test.utf16.raw" &&
  52
  53        rm test.utf16 &&
  54        git checkout test.utf16 &&
  55        test_cmp_bin test.utf16.raw test.utf16
  56'
  57
  58test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
  59        rm test.utf16lebom &&
  60        git checkout test.utf16lebom &&
  61        test_cmp_bin test.utf16lebom.raw test.utf16lebom
  62'
  63
  64test_expect_success 'check $GIT_DIR/info/attributes support' '
  65        test_when_finished "rm -f test.utf32.git" &&
  66        test_when_finished "git reset --hard HEAD" &&
  67
  68        echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
  69        git add test.utf32 &&
  70
  71        git cat-file -p :test.utf32 >test.utf32.git &&
  72        test_cmp_bin test.utf8.raw test.utf32.git
  73'
  74
  75for i in 16 32
  76do
  77        test_expect_success "check prohibited UTF-${i} BOM" '
  78                test_when_finished "git reset --hard HEAD" &&
  79
  80                echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
  81                echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
  82
  83                # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
  84                # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
  85                # In these cases the BOM is prohibited.
  86                cp bebom.utf${i}be.raw bebom.utf${i}be &&
  87                test_must_fail git add bebom.utf${i}be 2>err.out &&
  88                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
  89                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
  90
  91                cp lebom.utf${i}le.raw lebom.utf${i}be &&
  92                test_must_fail git add lebom.utf${i}be 2>err.out &&
  93                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
  94                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
  95
  96                cp bebom.utf${i}be.raw bebom.utf${i}le &&
  97                test_must_fail git add bebom.utf${i}le 2>err.out &&
  98                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
  99                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
 100
 101                cp lebom.utf${i}le.raw lebom.utf${i}le &&
 102                test_must_fail git add lebom.utf${i}le 2>err.out &&
 103                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
 104                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
 105        '
 106
 107        test_expect_success "check required UTF-${i} BOM" '
 108                test_when_finished "git reset --hard HEAD" &&
 109
 110                echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
 111
 112                cp nobom.utf${i}be.raw nobom.utf${i} &&
 113                test_must_fail git add nobom.utf${i} 2>err.out &&
 114                test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
 115                test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
 116
 117                cp nobom.utf${i}le.raw nobom.utf${i} &&
 118                test_must_fail git add nobom.utf${i} 2>err.out &&
 119                test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
 120                test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
 121        '
 122
 123        test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
 124                test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
 125                test_when_finished "git reset --hard HEAD^" &&
 126
 127                cat lf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >lf.utf${i}.raw &&
 128                cat crlf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >crlf.utf${i}.raw &&
 129                cp crlf.utf${i}.raw eol.utf${i} &&
 130
 131                cat >expectIndexLF <<-EOF &&
 132                        i/lf    w/-text attr/text               eol.utf${i}
 133                EOF
 134
 135                git add eol.utf${i} &&
 136                git commit -m eol &&
 137
 138                # UTF-${i} with CRLF (Windows line endings)
 139                rm eol.utf${i} &&
 140                git -c core.eol=crlf checkout eol.utf${i} &&
 141                test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
 142
 143                # Although the file has CRLF in the working tree,
 144                # ensure LF in the index
 145                git ls-files --eol eol.utf${i} >actual &&
 146                test_cmp expectIndexLF actual &&
 147
 148                # UTF-${i} with LF (Unix line endings)
 149                rm eol.utf${i} &&
 150                git -c core.eol=lf checkout eol.utf${i} &&
 151                test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
 152
 153                # The file LF in the working tree, ensure LF in the index
 154                git ls-files --eol eol.utf${i} >actual &&
 155                test_cmp expectIndexLF actual
 156        '
 157done
 158
 159test_expect_success 'check unsupported encodings' '
 160        test_when_finished "git reset --hard HEAD" &&
 161
 162        echo "*.set text working-tree-encoding" >.gitattributes &&
 163        printf "set" >t.set &&
 164        test_must_fail git add t.set 2>err.out &&
 165        test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
 166
 167        echo "*.unset text -working-tree-encoding" >.gitattributes &&
 168        printf "unset" >t.unset &&
 169        git add t.unset &&
 170
 171        echo "*.empty text working-tree-encoding=" >.gitattributes &&
 172        printf "empty" >t.empty &&
 173        git add t.empty &&
 174
 175        echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
 176        printf "garbage" >t.garbage &&
 177        test_must_fail git add t.garbage 2>err.out &&
 178        test_i18ngrep "failed to encode" err.out
 179'
 180
 181test_expect_success 'error if encoding round trip is not the same during refresh' '
 182        BEFORE_STATE=$(git rev-parse HEAD) &&
 183        test_when_finished "git reset --hard $BEFORE_STATE" &&
 184
 185        # Add and commit a UTF-16 file but skip the "working-tree-encoding"
 186        # filter. Consequently, the in-repo representation is UTF-16 and not
 187        # UTF-8. This simulates a Git version that has no working tree encoding
 188        # support.
 189        echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
 190        echo "hallo" >nonsense.utf16le &&
 191        TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
 192        git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
 193        COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
 194        git update-ref refs/heads/master $COMMIT &&
 195
 196        test_must_fail git checkout HEAD^ 2>err.out &&
 197        test_i18ngrep "error: .* overwritten by checkout:" err.out
 198'
 199
 200test_expect_success 'error if encoding garbage is already in Git' '
 201        BEFORE_STATE=$(git rev-parse HEAD) &&
 202        test_when_finished "git reset --hard $BEFORE_STATE" &&
 203
 204        # Skip the UTF-16 filter for the added file
 205        # This simulates a Git version that has no checkoutEncoding support
 206        cp nobom.utf16be.raw nonsense.utf16 &&
 207        TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
 208        git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
 209        COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
 210        git update-ref refs/heads/master $COMMIT &&
 211
 212        git diff 2>err.out &&
 213        test_i18ngrep "error: BOM is required" err.out
 214'
 215
 216test_lazy_prereq ICONV_SHIFT_JIS '
 217        iconv -f UTF-8 -t SHIFT-JIS </dev/null
 218'
 219
 220test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
 221        test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
 222        test_when_finished "git reset --hard HEAD" &&
 223
 224        text="hallo there!\nroundtrip test here!" &&
 225        printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
 226        printf "$text" | iconv -f UTF-8 -t UTF-16 >roundtrip.utf16 &&
 227        echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
 228
 229        # SHIFT-JIS encoded files are round-trip checked by default...
 230        GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
 231                grep "Checking roundtrip encoding for SHIFT-JIS" &&
 232        git reset &&
 233
 234        # ... unless we overwrite the Git config!
 235        ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
 236                add .gitattributes roundtrip.shift 2>&1 |
 237                grep "Checking roundtrip encoding for SHIFT-JIS" &&
 238        git reset &&
 239
 240        # UTF-16 encoded files should not be round-trip checked by default...
 241        ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
 242                grep "Checking roundtrip encoding for UTF-16" &&
 243        git reset &&
 244
 245        # ... unless we tell Git to check it!
 246        GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
 247                add roundtrip.utf16 2>&1 |
 248                grep "Checking roundtrip encoding for utf-16" &&
 249        git reset &&
 250
 251        # ... unless we tell Git to check it!
 252        # (here we also check that the casing of the encoding is irrelevant)
 253        GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
 254                add roundtrip.utf16 2>&1 |
 255                grep "Checking roundtrip encoding for utf-16" &&
 256        git reset
 257'
 258
 259test_done