Do a better job at guessing unknown character sets

author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Jul 2007 17:34:44 +0000 (10:34 -0700)

committer Junio C Hamano <gitster@pobox.com>
Thu, 19 Jul 2007 00:01:10 +0000 (17:01 -0700)
author: Linus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Jul 2007 17:34:44 +0000 (10:34 -0700)
committer: Junio C Hamano <gitster@pobox.com>
Thu, 19 Jul 2007 00:01:10 +0000 (17:01 -0700)
diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c

index 489c2c58c01514ac3d967d1c3f46f1243f853580..a37a4fff39afe6bafaace59b710e1dfdc56bf35a 100644 (file)
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -499,15 +499,40 @@ static int decode_b_segment(char *in, char *ot, char *ep)
         return 0;
  }
  
+/*
+ * When there is no known charset, guess.
+ *
+ * Right now we assume that if the target is UTF-8 (the default),
+ * and it already looks like UTF-8 (which includes US-ASCII as its
+ * subset, of course) then that is what it is and there is nothing
+ * to do.
+ *
+ * Otherwise, we default to assuming it is Latin1 for historical
+ * reasons.
+ */
+static const char *guess_charset(const char *line, const char *target_charset)
+{
+       if (is_encoding_utf8(target_charset)) {
+               if (is_utf8(line))
+                       return NULL;
+       }
+       return "latin1";
+}
+
  static void convert_to_utf8(char *line, const char *charset)
  {
-       static const char latin_one[] = "latin1";
-       const char *input_charset = *charset ? charset : latin_one;
-       char *out = reencode_string(line, metainfo_charset, input_charset);
+       char *out;
+
+       if (!charset || !*charset) {
+               charset = guess_charset(line, metainfo_charset);
+               if (!charset)
+                       return;
+       }
  
+       out = reencode_string(line, metainfo_charset, charset);
         if (!out)
                 die("cannot convert from %s to %s\n",
-                   input_charset, metainfo_charset);
+                   charset, metainfo_charset);
         strcpy(line, out);
         free(out);
  }
author	Linus Torvalds <torvalds@linux-foundation.org>
author	Tue, 17 Jul 2007 17:34:44 +0000 (10:34 -0700)
committer	Junio C Hamano <gitster@pobox.com>
committer	Thu, 19 Jul 2007 00:01:10 +0000 (17:01 -0700)