1#!/bin/sh
2
3test_description='working-tree-encoding conversion via gitattributes'
4
5. ./test-lib.sh
6
7GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
8
9test_expect_success 'setup test files' '
10 git config core.eol lf &&
11
12 text="hallo there!\ncan you read me?" &&
13 echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
14 printf "$text" >test.utf8.raw &&
15 printf "$text" | iconv -f UTF-8 -t UTF-16 >test.utf16.raw &&
16 printf "$text" | iconv -f UTF-8 -t UTF-32 >test.utf32.raw &&
17
18 # Line ending tests
19 printf "one\ntwo\nthree\n" >lf.utf8.raw &&
20 printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
21
22 # BOM tests
23 printf "\0a\0b\0c" >nobom.utf16be.raw &&
24 printf "a\0b\0c\0" >nobom.utf16le.raw &&
25 printf "\376\777\0a\0b\0c" >bebom.utf16be.raw &&
26 printf "\777\376a\0b\0c\0" >lebom.utf16le.raw &&
27 printf "\0\0\0a\0\0\0b\0\0\0c" >nobom.utf32be.raw &&
28 printf "a\0\0\0b\0\0\0c\0\0\0" >nobom.utf32le.raw &&
29 printf "\0\0\376\777\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
30 printf "\777\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
31
32 # Add only UTF-16 file, we will add the UTF-32 file later
33 cp test.utf16.raw test.utf16 &&
34 cp test.utf32.raw test.utf32 &&
35 git add .gitattributes test.utf16 &&
36 git commit -m initial
37'
38
39test_expect_success 'ensure UTF-8 is stored in Git' '
40 test_when_finished "rm -f test.utf16.git" &&
41
42 git cat-file -p :test.utf16 >test.utf16.git &&
43 test_cmp_bin test.utf8.raw test.utf16.git
44'
45
46test_expect_success 're-encode to UTF-16 on checkout' '
47 test_when_finished "rm -f test.utf16.raw" &&
48
49 rm test.utf16 &&
50 git checkout test.utf16 &&
51 test_cmp_bin test.utf16.raw test.utf16
52'
53
54test_expect_success 'check $GIT_DIR/info/attributes support' '
55 test_when_finished "rm -f test.utf32.git" &&
56 test_when_finished "git reset --hard HEAD" &&
57
58 echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
59 git add test.utf32 &&
60
61 git cat-file -p :test.utf32 >test.utf32.git &&
62 test_cmp_bin test.utf8.raw test.utf32.git
63'
64
65for i in 16 32
66do
67 test_expect_success "check prohibited UTF-${i} BOM" '
68 test_when_finished "git reset --hard HEAD" &&
69
70 echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
71 echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
72
73 # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
74 # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
75 # In these cases the BOM is prohibited.
76 cp bebom.utf${i}be.raw bebom.utf${i}be &&
77 test_must_fail git add bebom.utf${i}be 2>err.out &&
78 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
79 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
80
81 cp lebom.utf${i}le.raw lebom.utf${i}be &&
82 test_must_fail git add lebom.utf${i}be 2>err.out &&
83 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
84 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
85
86 cp bebom.utf${i}be.raw bebom.utf${i}le &&
87 test_must_fail git add bebom.utf${i}le 2>err.out &&
88 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
89 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
90
91 cp lebom.utf${i}le.raw lebom.utf${i}le &&
92 test_must_fail git add lebom.utf${i}le 2>err.out &&
93 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
94 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
95 '
96
97 test_expect_success "check required UTF-${i} BOM" '
98 test_when_finished "git reset --hard HEAD" &&
99
100 echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
101
102 cp nobom.utf${i}be.raw nobom.utf${i} &&
103 test_must_fail git add nobom.utf${i} 2>err.out &&
104 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
105 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
106
107 cp nobom.utf${i}le.raw nobom.utf${i} &&
108 test_must_fail git add nobom.utf${i} 2>err.out &&
109 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
110 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
111 '
112
113 test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
114 test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
115 test_when_finished "git reset --hard HEAD^" &&
116
117 cat lf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >lf.utf${i}.raw &&
118 cat crlf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >crlf.utf${i}.raw &&
119 cp crlf.utf${i}.raw eol.utf${i} &&
120
121 cat >expectIndexLF <<-EOF &&
122 i/lf w/-text attr/text eol.utf${i}
123 EOF
124
125 git add eol.utf${i} &&
126 git commit -m eol &&
127
128 # UTF-${i} with CRLF (Windows line endings)
129 rm eol.utf${i} &&
130 git -c core.eol=crlf checkout eol.utf${i} &&
131 test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
132
133 # Although the file has CRLF in the working tree,
134 # ensure LF in the index
135 git ls-files --eol eol.utf${i} >actual &&
136 test_cmp expectIndexLF actual &&
137
138 # UTF-${i} with LF (Unix line endings)
139 rm eol.utf${i} &&
140 git -c core.eol=lf checkout eol.utf${i} &&
141 test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
142
143 # The file LF in the working tree, ensure LF in the index
144 git ls-files --eol eol.utf${i} >actual &&
145 test_cmp expectIndexLF actual
146 '
147done
148
149test_expect_success 'check unsupported encodings' '
150 test_when_finished "git reset --hard HEAD" &&
151
152 echo "*.set text working-tree-encoding" >.gitattributes &&
153 printf "set" >t.set &&
154 test_must_fail git add t.set 2>err.out &&
155 test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
156
157 echo "*.unset text -working-tree-encoding" >.gitattributes &&
158 printf "unset" >t.unset &&
159 git add t.unset &&
160
161 echo "*.empty text working-tree-encoding=" >.gitattributes &&
162 printf "empty" >t.empty &&
163 git add t.empty &&
164
165 echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
166 printf "garbage" >t.garbage &&
167 test_must_fail git add t.garbage 2>err.out &&
168 test_i18ngrep "failed to encode" err.out
169'
170
171test_expect_success 'error if encoding round trip is not the same during refresh' '
172 BEFORE_STATE=$(git rev-parse HEAD) &&
173 test_when_finished "git reset --hard $BEFORE_STATE" &&
174
175 # Add and commit a UTF-16 file but skip the "working-tree-encoding"
176 # filter. Consequently, the in-repo representation is UTF-16 and not
177 # UTF-8. This simulates a Git version that has no working tree encoding
178 # support.
179 echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
180 echo "hallo" >nonsense.utf16le &&
181 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
182 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
183 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
184 git update-ref refs/heads/master $COMMIT &&
185
186 test_must_fail git checkout HEAD^ 2>err.out &&
187 test_i18ngrep "error: .* overwritten by checkout:" err.out
188'
189
190test_expect_success 'error if encoding garbage is already in Git' '
191 BEFORE_STATE=$(git rev-parse HEAD) &&
192 test_when_finished "git reset --hard $BEFORE_STATE" &&
193
194 # Skip the UTF-16 filter for the added file
195 # This simulates a Git version that has no checkoutEncoding support
196 cp nobom.utf16be.raw nonsense.utf16 &&
197 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
198 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
199 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
200 git update-ref refs/heads/master $COMMIT &&
201
202 git diff 2>err.out &&
203 test_i18ngrep "error: BOM is required" err.out
204'
205
206test_expect_success 'check roundtrip encoding' '
207 test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
208 test_when_finished "git reset --hard HEAD" &&
209
210 text="hallo there!\nroundtrip test here!" &&
211 printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
212 printf "$text" | iconv -f UTF-8 -t UTF-16 >roundtrip.utf16 &&
213 echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
214
215 # SHIFT-JIS encoded files are round-trip checked by default...
216 GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
217 grep "Checking roundtrip encoding for SHIFT-JIS" &&
218 git reset &&
219
220 # ... unless we overwrite the Git config!
221 ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
222 add .gitattributes roundtrip.shift 2>&1 |
223 grep "Checking roundtrip encoding for SHIFT-JIS" &&
224 git reset &&
225
226 # UTF-16 encoded files should not be round-trip checked by default...
227 ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
228 grep "Checking roundtrip encoding for UTF-16" &&
229 git reset &&
230
231 # ... unless we tell Git to check it!
232 GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
233 add roundtrip.utf16 2>&1 |
234 grep "Checking roundtrip encoding for utf-16" &&
235 git reset &&
236
237 # ... unless we tell Git to check it!
238 # (here we also check that the casing of the encoding is irrelevant)
239 GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
240 add roundtrip.utf16 2>&1 |
241 grep "Checking roundtrip encoding for utf-16" &&
242 git reset
243'
244
245test_done