1#!/bin/sh
23
test_description='working-tree-encoding conversion via gitattributes'
45
. ./test-lib.sh
67
test_expect_success 'setup test files' '
8git config core.eol lf &&
910
text="hallo there!\ncan you read me?" &&
11echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
12printf "$text" >test.utf8.raw &&
13printf "$text" | iconv -f UTF-8 -t UTF-16 >test.utf16.raw &&
14printf "$text" | iconv -f UTF-8 -t UTF-32 >test.utf32.raw &&
1516
# Line ending tests
17printf "one\ntwo\nthree\n" >lf.utf8.raw &&
18printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
1920
# BOM tests
21printf "\0a\0b\0c" >nobom.utf16be.raw &&
22printf "a\0b\0c\0" >nobom.utf16le.raw &&
23printf "\376\777\0a\0b\0c" >bebom.utf16be.raw &&
24printf "\777\376a\0b\0c\0" >lebom.utf16le.raw &&
25printf "\0\0\0a\0\0\0b\0\0\0c" >nobom.utf32be.raw &&
26printf "a\0\0\0b\0\0\0c\0\0\0" >nobom.utf32le.raw &&
27printf "\0\0\376\777\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
28printf "\777\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
2930
# Add only UTF-16 file, we will add the UTF-32 file later
31cp test.utf16.raw test.utf16 &&
32cp test.utf32.raw test.utf32 &&
33git add .gitattributes test.utf16 &&
34git commit -m initial
35'
3637
test_expect_success 'ensure UTF-8 is stored in Git' '
38test_when_finished "rm -f test.utf16.git" &&
3940
git cat-file -p :test.utf16 >test.utf16.git &&
41test_cmp_bin test.utf8.raw test.utf16.git
42'
4344
test_expect_success 're-encode to UTF-16 on checkout' '
45test_when_finished "rm -f test.utf16.raw" &&
4647
rm test.utf16 &&
48git checkout test.utf16 &&
49test_cmp_bin test.utf16.raw test.utf16
50'
5152
test_expect_success 'check $GIT_DIR/info/attributes support' '
53test_when_finished "rm -f test.utf32.git" &&
54test_when_finished "git reset --hard HEAD" &&
5556
echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
57git add test.utf32 &&
5859
git cat-file -p :test.utf32 >test.utf32.git &&
60test_cmp_bin test.utf8.raw test.utf32.git
61'
6263
for i in 16 32
64do
65test_expect_success "check prohibited UTF-${i} BOM" '
66test_when_finished "git reset --hard HEAD" &&
6768
echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
69echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
7071
# Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
72# but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
73# In these cases the BOM is prohibited.
74cp bebom.utf${i}be.raw bebom.utf${i}be &&
75test_must_fail git add bebom.utf${i}be 2>err.out &&
76test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
77test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
7879
cp lebom.utf${i}le.raw lebom.utf${i}be &&
80test_must_fail git add lebom.utf${i}be 2>err.out &&
81test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
82test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
8384
cp bebom.utf${i}be.raw bebom.utf${i}le &&
85test_must_fail git add bebom.utf${i}le 2>err.out &&
86test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
87test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
8889
cp lebom.utf${i}le.raw lebom.utf${i}le &&
90test_must_fail git add lebom.utf${i}le 2>err.out &&
91test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
92test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
93'
9495
test_expect_success "check required UTF-${i} BOM" '
96test_when_finished "git reset --hard HEAD" &&
9798
echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
99100
cp nobom.utf${i}be.raw nobom.utf${i} &&
101test_must_fail git add nobom.utf${i} 2>err.out &&
102test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
103test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
104105
cp nobom.utf${i}le.raw nobom.utf${i} &&
106test_must_fail git add nobom.utf${i} 2>err.out &&
107test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
108test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
109'
110111
test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
112test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
113test_when_finished "git reset --hard HEAD^" &&
114115
cat lf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >lf.utf${i}.raw &&
116cat crlf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >crlf.utf${i}.raw &&
117cp crlf.utf${i}.raw eol.utf${i} &&
118119
cat >expectIndexLF <<-EOF &&
120i/lf w/-text attr/text eol.utf${i}
121EOF
122123
git add eol.utf${i} &&
124git commit -m eol &&
125126
# UTF-${i} with CRLF (Windows line endings)
127rm eol.utf${i} &&
128git -c core.eol=crlf checkout eol.utf${i} &&
129test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
130131
# Although the file has CRLF in the working tree,
132# ensure LF in the index
133git ls-files --eol eol.utf${i} >actual &&
134test_cmp expectIndexLF actual &&
135136
# UTF-${i} with LF (Unix line endings)
137rm eol.utf${i} &&
138git -c core.eol=lf checkout eol.utf${i} &&
139test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
140141
# The file LF in the working tree, ensure LF in the index
142git ls-files --eol eol.utf${i} >actual &&
143test_cmp expectIndexLF actual
144'
145done
146147
test_expect_success 'check unsupported encodings' '
148test_when_finished "git reset --hard HEAD" &&
149150
echo "*.set text working-tree-encoding" >.gitattributes &&
151printf "set" >t.set &&
152test_must_fail git add t.set 2>err.out &&
153test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
154155
echo "*.unset text -working-tree-encoding" >.gitattributes &&
156printf "unset" >t.unset &&
157git add t.unset &&
158159
echo "*.empty text working-tree-encoding=" >.gitattributes &&
160printf "empty" >t.empty &&
161git add t.empty &&
162163
echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
164printf "garbage" >t.garbage &&
165test_must_fail git add t.garbage 2>err.out &&
166test_i18ngrep "failed to encode" err.out
167'
168169
test_expect_success 'error if encoding round trip is not the same during refresh' '
170BEFORE_STATE=$(git rev-parse HEAD) &&
171test_when_finished "git reset --hard $BEFORE_STATE" &&
172173
# Add and commit a UTF-16 file but skip the "working-tree-encoding"
174# filter. Consequently, the in-repo representation is UTF-16 and not
175# UTF-8. This simulates a Git version that has no working tree encoding
176# support.
177echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
178echo "hallo" >nonsense.utf16le &&
179TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
180git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
181COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
182git update-ref refs/heads/master $COMMIT &&
183184
test_must_fail git checkout HEAD^ 2>err.out &&
185test_i18ngrep "error: .* overwritten by checkout:" err.out
186'
187188
test_expect_success 'error if encoding garbage is already in Git' '
189BEFORE_STATE=$(git rev-parse HEAD) &&
190test_when_finished "git reset --hard $BEFORE_STATE" &&
191192
# Skip the UTF-16 filter for the added file
193# This simulates a Git version that has no checkoutEncoding support
194cp nobom.utf16be.raw nonsense.utf16 &&
195TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
196git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
197COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
198git update-ref refs/heads/master $COMMIT &&
199200
git diff 2>err.out &&
201test_i18ngrep "error: BOM is required" err.out
202'
203204
test_done