t / t0028-working-tree-encoding.shon commit Merge branch 'md/list-objects-filter-combo' (627b826)
   1#!/bin/sh
   2
   3test_description='working-tree-encoding conversion via gitattributes'
   4
   5. ./test-lib.sh
   6
   7GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
   8
   9test_lazy_prereq NO_UTF16_BOM '
  10        test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6
  11'
  12
  13test_lazy_prereq NO_UTF32_BOM '
  14        test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12
  15'
  16
  17write_utf16 () {
  18        if test_have_prereq NO_UTF16_BOM
  19        then
  20                printf '\xfe\xff'
  21        fi &&
  22        iconv -f UTF-8 -t UTF-16
  23}
  24
  25write_utf32 () {
  26        if test_have_prereq NO_UTF32_BOM
  27        then
  28                printf '\x00\x00\xfe\xff'
  29        fi &&
  30        iconv -f UTF-8 -t UTF-32
  31}
  32
  33test_expect_success 'setup test files' '
  34        git config core.eol lf &&
  35
  36        text="hallo there!\ncan you read me?" &&
  37        echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
  38        echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
  39        printf "$text" >test.utf8.raw &&
  40        printf "$text" | write_utf16 >test.utf16.raw &&
  41        printf "$text" | write_utf32 >test.utf32.raw &&
  42        printf "\377\376"                         >test.utf16lebom.raw &&
  43        printf "$text" | iconv -f UTF-8 -t UTF-32LE >>test.utf16lebom.raw &&
  44
  45        # Line ending tests
  46        printf "one\ntwo\nthree\n" >lf.utf8.raw &&
  47        printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
  48
  49        # BOM tests
  50        printf "\0a\0b\0c"                         >nobom.utf16be.raw &&
  51        printf "a\0b\0c\0"                         >nobom.utf16le.raw &&
  52        printf "\376\377\0a\0b\0c"                 >bebom.utf16be.raw &&
  53        printf "\377\376a\0b\0c\0"                 >lebom.utf16le.raw &&
  54        printf "\0\0\0a\0\0\0b\0\0\0c"             >nobom.utf32be.raw &&
  55        printf "a\0\0\0b\0\0\0c\0\0\0"             >nobom.utf32le.raw &&
  56        printf "\0\0\376\377\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
  57        printf "\377\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
  58
  59        # Add only UTF-16 file, we will add the UTF-32 file later
  60        cp test.utf16.raw test.utf16 &&
  61        cp test.utf32.raw test.utf32 &&
  62        cp test.utf16lebom.raw test.utf16lebom &&
  63        git add .gitattributes test.utf16 test.utf16lebom &&
  64        git commit -m initial
  65'
  66
  67test_expect_success 'ensure UTF-8 is stored in Git' '
  68        test_when_finished "rm -f test.utf16.git" &&
  69
  70        git cat-file -p :test.utf16 >test.utf16.git &&
  71        test_cmp_bin test.utf8.raw test.utf16.git
  72'
  73
  74test_expect_success 're-encode to UTF-16 on checkout' '
  75        test_when_finished "rm -f test.utf16.raw" &&
  76
  77        rm test.utf16 &&
  78        git checkout test.utf16 &&
  79        test_cmp_bin test.utf16.raw test.utf16
  80'
  81
  82test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
  83        rm test.utf16lebom &&
  84        git checkout test.utf16lebom &&
  85        test_cmp_bin test.utf16lebom.raw test.utf16lebom
  86'
  87
  88test_expect_success 'check $GIT_DIR/info/attributes support' '
  89        test_when_finished "rm -f test.utf32.git" &&
  90        test_when_finished "git reset --hard HEAD" &&
  91
  92        echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
  93        git add test.utf32 &&
  94
  95        git cat-file -p :test.utf32 >test.utf32.git &&
  96        test_cmp_bin test.utf8.raw test.utf32.git
  97'
  98
  99for i in 16 32
 100do
 101        test_expect_success "check prohibited UTF-${i} BOM" '
 102                test_when_finished "git reset --hard HEAD" &&
 103
 104                echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
 105                echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
 106
 107                # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
 108                # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
 109                # In these cases the BOM is prohibited.
 110                cp bebom.utf${i}be.raw bebom.utf${i}be &&
 111                test_must_fail git add bebom.utf${i}be 2>err.out &&
 112                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
 113                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
 114
 115                cp lebom.utf${i}le.raw lebom.utf${i}be &&
 116                test_must_fail git add lebom.utf${i}be 2>err.out &&
 117                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
 118                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
 119
 120                cp bebom.utf${i}be.raw bebom.utf${i}le &&
 121                test_must_fail git add bebom.utf${i}le 2>err.out &&
 122                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
 123                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
 124
 125                cp lebom.utf${i}le.raw lebom.utf${i}le &&
 126                test_must_fail git add lebom.utf${i}le 2>err.out &&
 127                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
 128                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
 129        '
 130
 131        test_expect_success "check required UTF-${i} BOM" '
 132                test_when_finished "git reset --hard HEAD" &&
 133
 134                echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
 135
 136                cp nobom.utf${i}be.raw nobom.utf${i} &&
 137                test_must_fail git add nobom.utf${i} 2>err.out &&
 138                test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
 139                test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
 140
 141                cp nobom.utf${i}le.raw nobom.utf${i} &&
 142                test_must_fail git add nobom.utf${i} 2>err.out &&
 143                test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
 144                test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
 145        '
 146
 147        test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
 148                test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
 149                test_when_finished "git reset --hard HEAD^" &&
 150
 151                cat lf.utf8.raw | write_utf${i} >lf.utf${i}.raw &&
 152                cat crlf.utf8.raw | write_utf${i} >crlf.utf${i}.raw &&
 153                cp crlf.utf${i}.raw eol.utf${i} &&
 154
 155                cat >expectIndexLF <<-EOF &&
 156                        i/lf    w/-text attr/text               eol.utf${i}
 157                EOF
 158
 159                git add eol.utf${i} &&
 160                git commit -m eol &&
 161
 162                # UTF-${i} with CRLF (Windows line endings)
 163                rm eol.utf${i} &&
 164                git -c core.eol=crlf checkout eol.utf${i} &&
 165                test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
 166
 167                # Although the file has CRLF in the working tree,
 168                # ensure LF in the index
 169                git ls-files --eol eol.utf${i} >actual &&
 170                test_cmp expectIndexLF actual &&
 171
 172                # UTF-${i} with LF (Unix line endings)
 173                rm eol.utf${i} &&
 174                git -c core.eol=lf checkout eol.utf${i} &&
 175                test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
 176
 177                # The file LF in the working tree, ensure LF in the index
 178                git ls-files --eol eol.utf${i} >actual &&
 179                test_cmp expectIndexLF actual
 180        '
 181done
 182
 183test_expect_success 'check unsupported encodings' '
 184        test_when_finished "git reset --hard HEAD" &&
 185
 186        echo "*.set text working-tree-encoding" >.gitattributes &&
 187        printf "set" >t.set &&
 188        test_must_fail git add t.set 2>err.out &&
 189        test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
 190
 191        echo "*.unset text -working-tree-encoding" >.gitattributes &&
 192        printf "unset" >t.unset &&
 193        git add t.unset &&
 194
 195        echo "*.empty text working-tree-encoding=" >.gitattributes &&
 196        printf "empty" >t.empty &&
 197        git add t.empty &&
 198
 199        echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
 200        printf "garbage" >t.garbage &&
 201        test_must_fail git add t.garbage 2>err.out &&
 202        test_i18ngrep "failed to encode" err.out
 203'
 204
 205test_expect_success 'error if encoding round trip is not the same during refresh' '
 206        BEFORE_STATE=$(git rev-parse HEAD) &&
 207        test_when_finished "git reset --hard $BEFORE_STATE" &&
 208
 209        # Add and commit a UTF-16 file but skip the "working-tree-encoding"
 210        # filter. Consequently, the in-repo representation is UTF-16 and not
 211        # UTF-8. This simulates a Git version that has no working tree encoding
 212        # support.
 213        echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
 214        echo "hallo" >nonsense.utf16le &&
 215        TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
 216        git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
 217        COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
 218        git update-ref refs/heads/master $COMMIT &&
 219
 220        test_must_fail git checkout HEAD^ 2>err.out &&
 221        test_i18ngrep "error: .* overwritten by checkout:" err.out
 222'
 223
 224test_expect_success 'error if encoding garbage is already in Git' '
 225        BEFORE_STATE=$(git rev-parse HEAD) &&
 226        test_when_finished "git reset --hard $BEFORE_STATE" &&
 227
 228        # Skip the UTF-16 filter for the added file
 229        # This simulates a Git version that has no checkoutEncoding support
 230        cp nobom.utf16be.raw nonsense.utf16 &&
 231        TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
 232        git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
 233        COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
 234        git update-ref refs/heads/master $COMMIT &&
 235
 236        git diff 2>err.out &&
 237        test_i18ngrep "error: BOM is required" err.out
 238'
 239
 240test_lazy_prereq ICONV_SHIFT_JIS '
 241        iconv -f UTF-8 -t SHIFT-JIS </dev/null
 242'
 243
 244test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
 245        test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
 246        test_when_finished "git reset --hard HEAD" &&
 247
 248        text="hallo there!\nroundtrip test here!" &&
 249        printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
 250        printf "$text" | write_utf16 >roundtrip.utf16 &&
 251        echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
 252
 253        # SHIFT-JIS encoded files are round-trip checked by default...
 254        GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
 255                grep "Checking roundtrip encoding for SHIFT-JIS" &&
 256        git reset &&
 257
 258        # ... unless we overwrite the Git config!
 259        ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
 260                add .gitattributes roundtrip.shift 2>&1 |
 261                grep "Checking roundtrip encoding for SHIFT-JIS" &&
 262        git reset &&
 263
 264        # UTF-16 encoded files should not be round-trip checked by default...
 265        ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
 266                grep "Checking roundtrip encoding for UTF-16" &&
 267        git reset &&
 268
 269        # ... unless we tell Git to check it!
 270        GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
 271                add roundtrip.utf16 2>&1 |
 272                grep "Checking roundtrip encoding for utf-16" &&
 273        git reset &&
 274
 275        # ... unless we tell Git to check it!
 276        # (here we also check that the casing of the encoding is irrelevant)
 277        GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
 278                add roundtrip.utf16 2>&1 |
 279                grep "Checking roundtrip encoding for utf-16" &&
 280        git reset
 281'
 282
 283test_done