utf8: add function to detect a missing UTF-16/32 BOM
[gitweb.git] / utf8.c
diff --git a/utf8.c b/utf8.c
index 83af3a9f6b7b0ea81992154018b0e2083a00685f..25d366d6b3d8665d5e9e9555e60d2ddac659ae0b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -586,6 +586,19 @@ int has_prohibited_utf_bom(const char *enc, const char *data, size_t len)
        );
 }
 
+int is_missing_required_utf_bom(const char *enc, const char *data, size_t len)
+{
+       return (
+          (same_utf_encoding(enc, "UTF-16")) &&
+          !(has_bom_prefix(data, len, utf16_be_bom, sizeof(utf16_be_bom)) ||
+            has_bom_prefix(data, len, utf16_le_bom, sizeof(utf16_le_bom)))
+       ) || (
+          (same_utf_encoding(enc, "UTF-32")) &&
+          !(has_bom_prefix(data, len, utf32_be_bom, sizeof(utf32_be_bom)) ||
+            has_bom_prefix(data, len, utf32_le_bom, sizeof(utf32_le_bom)))
+       );
+}
+
 /*
  * Returns first character length in bytes for multi-byte `text` according to
  * `encoding`.