t / t0028-working-tree-encoding.shon commit convert: check for detectable errors in UTF encodings (7a17918)
   1#!/bin/sh
   2
   3test_description='working-tree-encoding conversion via gitattributes'
   4
   5. ./test-lib.sh
   6
   7test_expect_success 'setup test files' '
   8        git config core.eol lf &&
   9
  10        text="hallo there!\ncan you read me?" &&
  11        echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
  12        printf "$text" >test.utf8.raw &&
  13        printf "$text" | iconv -f UTF-8 -t UTF-16 >test.utf16.raw &&
  14        printf "$text" | iconv -f UTF-8 -t UTF-32 >test.utf32.raw &&
  15
  16        # Line ending tests
  17        printf "one\ntwo\nthree\n" >lf.utf8.raw &&
  18        printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
  19
  20        # BOM tests
  21        printf "\0a\0b\0c"                         >nobom.utf16be.raw &&
  22        printf "a\0b\0c\0"                         >nobom.utf16le.raw &&
  23        printf "\376\777\0a\0b\0c"                 >bebom.utf16be.raw &&
  24        printf "\777\376a\0b\0c\0"                 >lebom.utf16le.raw &&
  25        printf "\0\0\0a\0\0\0b\0\0\0c"             >nobom.utf32be.raw &&
  26        printf "a\0\0\0b\0\0\0c\0\0\0"             >nobom.utf32le.raw &&
  27        printf "\0\0\376\777\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
  28        printf "\777\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
  29
  30        # Add only UTF-16 file, we will add the UTF-32 file later
  31        cp test.utf16.raw test.utf16 &&
  32        cp test.utf32.raw test.utf32 &&
  33        git add .gitattributes test.utf16 &&
  34        git commit -m initial
  35'
  36
  37test_expect_success 'ensure UTF-8 is stored in Git' '
  38        test_when_finished "rm -f test.utf16.git" &&
  39
  40        git cat-file -p :test.utf16 >test.utf16.git &&
  41        test_cmp_bin test.utf8.raw test.utf16.git
  42'
  43
  44test_expect_success 're-encode to UTF-16 on checkout' '
  45        test_when_finished "rm -f test.utf16.raw" &&
  46
  47        rm test.utf16 &&
  48        git checkout test.utf16 &&
  49        test_cmp_bin test.utf16.raw test.utf16
  50'
  51
  52test_expect_success 'check $GIT_DIR/info/attributes support' '
  53        test_when_finished "rm -f test.utf32.git" &&
  54        test_when_finished "git reset --hard HEAD" &&
  55
  56        echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
  57        git add test.utf32 &&
  58
  59        git cat-file -p :test.utf32 >test.utf32.git &&
  60        test_cmp_bin test.utf8.raw test.utf32.git
  61'
  62
  63for i in 16 32
  64do
  65        test_expect_success "check prohibited UTF-${i} BOM" '
  66                test_when_finished "git reset --hard HEAD" &&
  67
  68                echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
  69                echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
  70
  71                # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
  72                # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
  73                # In these cases the BOM is prohibited.
  74                cp bebom.utf${i}be.raw bebom.utf${i}be &&
  75                test_must_fail git add bebom.utf${i}be 2>err.out &&
  76                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
  77                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
  78
  79                cp lebom.utf${i}le.raw lebom.utf${i}be &&
  80                test_must_fail git add lebom.utf${i}be 2>err.out &&
  81                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
  82                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
  83
  84                cp bebom.utf${i}be.raw bebom.utf${i}le &&
  85                test_must_fail git add bebom.utf${i}le 2>err.out &&
  86                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
  87                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
  88
  89                cp lebom.utf${i}le.raw lebom.utf${i}le &&
  90                test_must_fail git add lebom.utf${i}le 2>err.out &&
  91                test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
  92                test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
  93        '
  94
  95        test_expect_success "check required UTF-${i} BOM" '
  96                test_when_finished "git reset --hard HEAD" &&
  97
  98                echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
  99
 100                cp nobom.utf${i}be.raw nobom.utf${i} &&
 101                test_must_fail git add nobom.utf${i} 2>err.out &&
 102                test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
 103                test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
 104
 105                cp nobom.utf${i}le.raw nobom.utf${i} &&
 106                test_must_fail git add nobom.utf${i} 2>err.out &&
 107                test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
 108                test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
 109        '
 110
 111        test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
 112                test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
 113                test_when_finished "git reset --hard HEAD^" &&
 114
 115                cat lf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >lf.utf${i}.raw &&
 116                cat crlf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >crlf.utf${i}.raw &&
 117                cp crlf.utf${i}.raw eol.utf${i} &&
 118
 119                cat >expectIndexLF <<-EOF &&
 120                        i/lf    w/-text attr/text               eol.utf${i}
 121                EOF
 122
 123                git add eol.utf${i} &&
 124                git commit -m eol &&
 125
 126                # UTF-${i} with CRLF (Windows line endings)
 127                rm eol.utf${i} &&
 128                git -c core.eol=crlf checkout eol.utf${i} &&
 129                test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
 130
 131                # Although the file has CRLF in the working tree,
 132                # ensure LF in the index
 133                git ls-files --eol eol.utf${i} >actual &&
 134                test_cmp expectIndexLF actual &&
 135
 136                # UTF-${i} with LF (Unix line endings)
 137                rm eol.utf${i} &&
 138                git -c core.eol=lf checkout eol.utf${i} &&
 139                test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
 140
 141                # The file LF in the working tree, ensure LF in the index
 142                git ls-files --eol eol.utf${i} >actual &&
 143                test_cmp expectIndexLF actual
 144        '
 145done
 146
 147test_expect_success 'check unsupported encodings' '
 148        test_when_finished "git reset --hard HEAD" &&
 149
 150        echo "*.set text working-tree-encoding" >.gitattributes &&
 151        printf "set" >t.set &&
 152        test_must_fail git add t.set 2>err.out &&
 153        test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
 154
 155        echo "*.unset text -working-tree-encoding" >.gitattributes &&
 156        printf "unset" >t.unset &&
 157        git add t.unset &&
 158
 159        echo "*.empty text working-tree-encoding=" >.gitattributes &&
 160        printf "empty" >t.empty &&
 161        git add t.empty &&
 162
 163        echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
 164        printf "garbage" >t.garbage &&
 165        test_must_fail git add t.garbage 2>err.out &&
 166        test_i18ngrep "failed to encode" err.out
 167'
 168
 169test_expect_success 'error if encoding round trip is not the same during refresh' '
 170        BEFORE_STATE=$(git rev-parse HEAD) &&
 171        test_when_finished "git reset --hard $BEFORE_STATE" &&
 172
 173        # Add and commit a UTF-16 file but skip the "working-tree-encoding"
 174        # filter. Consequently, the in-repo representation is UTF-16 and not
 175        # UTF-8. This simulates a Git version that has no working tree encoding
 176        # support.
 177        echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
 178        echo "hallo" >nonsense.utf16le &&
 179        TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
 180        git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
 181        COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
 182        git update-ref refs/heads/master $COMMIT &&
 183
 184        test_must_fail git checkout HEAD^ 2>err.out &&
 185        test_i18ngrep "error: .* overwritten by checkout:" err.out
 186'
 187
 188test_expect_success 'error if encoding garbage is already in Git' '
 189        BEFORE_STATE=$(git rev-parse HEAD) &&
 190        test_when_finished "git reset --hard $BEFORE_STATE" &&
 191
 192        # Skip the UTF-16 filter for the added file
 193        # This simulates a Git version that has no checkoutEncoding support
 194        cp nobom.utf16be.raw nonsense.utf16 &&
 195        TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
 196        git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
 197        COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
 198        git update-ref refs/heads/master $COMMIT &&
 199
 200        git diff 2>err.out &&
 201        test_i18ngrep "error: BOM is required" err.out
 202'
 203
 204test_done