utf8: refactor code to decide fallback encoding
authorJunio C Hamano <gitster@pobox.com>
Tue, 27 Sep 2016 01:09:48 +0000 (18:09 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 27 Sep 2016 01:16:23 +0000 (18:16 -0700)
The codepath we use to call iconv_open() has a provision to use a
fallback encoding when it fails, hoping that "UTF-8" being spelled
differently could be the reason why the library function did not
like the encoding names we gave it. Essentially, we turn what we
have observed to be used as variants of "UTF-8" (e.g. "utf8") into
the most official spelling and use that as a fallback.

We do the same thing for input and output encoding. Introduce a
helper function to do just one side and call that twice.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
utf8.c
diff --git a/utf8.c b/utf8.c
index 00e10c86ad7ea7bcc2d53a90a2f608dd66d47462..550e785ecbf0584cb959050a24fcce58905cff3b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -489,6 +489,21 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outs
        return out;
 }
 
        return out;
 }
 
+static const char *fallback_encoding(const char *name)
+{
+       /*
+        * Some platforms do not have the variously spelled variants of
+        * UTF-8, so let's fall back to trying the most official
+        * spelling. We do so only as a fallback in case the platform
+        * does understand the user's spelling, but not our official
+        * one.
+        */
+       if (is_encoding_utf8(name))
+               return "UTF-8";
+
+       return name;
+}
+
 char *reencode_string_len(const char *in, int insz,
                          const char *out_encoding, const char *in_encoding,
                          int *outsz)
 char *reencode_string_len(const char *in, int insz,
                          const char *out_encoding, const char *in_encoding,
                          int *outsz)
@@ -501,17 +516,9 @@ char *reencode_string_len(const char *in, int insz,
 
        conv = iconv_open(out_encoding, in_encoding);
        if (conv == (iconv_t) -1) {
 
        conv = iconv_open(out_encoding, in_encoding);
        if (conv == (iconv_t) -1) {
-               /*
-                * Some platforms do not have the variously spelled variants of
-                * UTF-8, so let's fall back to trying the most official
-                * spelling. We do so only as a fallback in case the platform
-                * does understand the user's spelling, but not our official
-                * one.
-                */
-               if (is_encoding_utf8(in_encoding))
-                       in_encoding = "UTF-8";
-               if (is_encoding_utf8(out_encoding))
-                       out_encoding = "UTF-8";
+               in_encoding = fallback_encoding(in_encoding);
+               out_encoding = fallback_encoding(out_encoding);
+
                conv = iconv_open(out_encoding, in_encoding);
                if (conv == (iconv_t) -1)
                        return NULL;
                conv = iconv_open(out_encoding, in_encoding);
                if (conv == (iconv_t) -1)
                        return NULL;