convert: check for detectable errors in UTF encodings
[gitweb.git] / t / t0028-working-tree-encoding.sh
index 8e574ccdd87b9a565577e5207f80dcd46f916fbc..a318d03232d2b6130b25ad5c7b9d163179d9a29c 100755 (executable)
@@ -62,6 +62,52 @@ test_expect_success 'check $GIT_DIR/info/attributes support' '
 
 for i in 16 32
 do
+       test_expect_success "check prohibited UTF-${i} BOM" '
+               test_when_finished "git reset --hard HEAD" &&
+
+               echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
+               echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
+
+               # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
+               # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
+               # In these cases the BOM is prohibited.
+               cp bebom.utf${i}be.raw bebom.utf${i}be &&
+               test_must_fail git add bebom.utf${i}be 2>err.out &&
+               test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
+               test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
+
+               cp lebom.utf${i}le.raw lebom.utf${i}be &&
+               test_must_fail git add lebom.utf${i}be 2>err.out &&
+               test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
+               test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
+
+               cp bebom.utf${i}be.raw bebom.utf${i}le &&
+               test_must_fail git add bebom.utf${i}le 2>err.out &&
+               test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
+               test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
+
+               cp lebom.utf${i}le.raw lebom.utf${i}le &&
+               test_must_fail git add lebom.utf${i}le 2>err.out &&
+               test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
+               test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
+       '
+
+       test_expect_success "check required UTF-${i} BOM" '
+               test_when_finished "git reset --hard HEAD" &&
+
+               echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
+
+               cp nobom.utf${i}be.raw nobom.utf${i} &&
+               test_must_fail git add nobom.utf${i} 2>err.out &&
+               test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
+               test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
+
+               cp nobom.utf${i}le.raw nobom.utf${i} &&
+               test_must_fail git add nobom.utf${i} 2>err.out &&
+               test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
+               test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
+       '
+
        test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
                test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
                test_when_finished "git reset --hard HEAD^" &&
@@ -139,4 +185,20 @@ test_expect_success 'error if encoding round trip is not the same during refresh
        test_i18ngrep "error: .* overwritten by checkout:" err.out
 '
 
+test_expect_success 'error if encoding garbage is already in Git' '
+       BEFORE_STATE=$(git rev-parse HEAD) &&
+       test_when_finished "git reset --hard $BEFORE_STATE" &&
+
+       # Skip the UTF-16 filter for the added file
+       # This simulates a Git version that has no checkoutEncoding support
+       cp nobom.utf16be.raw nonsense.utf16 &&
+       TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
+       git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
+       COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
+       git update-ref refs/heads/master $COMMIT &&
+
+       git diff 2>err.out &&
+       test_i18ngrep "error: BOM is required" err.out
+'
+
 test_done