1/* 2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason 3 */ 4 5#include "cache.h" 6#include "exec_cmd.h" 7#include "gettext.h" 8#include "strbuf.h" 9#include "utf8.h" 10 11#ifndef NO_GETTEXT 12# include <locale.h> 13# include <libintl.h> 14# ifdef HAVE_LIBCHARSET_H 15# include <libcharset.h> 16# else 17# include <langinfo.h> 18# define locale_charset() nl_langinfo(CODESET) 19# endif 20#endif 21 22static const char *charset; 23 24/* 25 * Guess the user's preferred languages from the value in LANGUAGE environment 26 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. 27 * 28 * The result can be a colon-separated list like "ko:ja:en". 29 */ 30const char *get_preferred_languages(void) 31{ 32 const char *retval; 33 34 retval = getenv("LANGUAGE"); 35 if (retval && *retval) 36 return retval; 37 38#ifndef NO_GETTEXT 39 retval = setlocale(LC_MESSAGES, NULL); 40 if (retval && *retval && 41 strcmp(retval, "C") && 42 strcmp(retval, "POSIX")) 43 return retval; 44#endif 45 46 return NULL; 47} 48 49#ifdef GETTEXT_POISON 50int use_gettext_poison(void) 51{ 52 static int poison_requested = -1; 53 if (poison_requested == -1) 54 poison_requested = getenv("GIT_GETTEXT_POISON") ? 1 : 0; 55 return poison_requested; 56} 57#endif 58 59#ifndef NO_GETTEXT 60static int test_vsnprintf(const char *fmt, ...) 61{ 62 char buf[26]; 63 int ret; 64 va_list ap; 65 va_start(ap, fmt); 66 ret = vsnprintf(buf, sizeof(buf), fmt, ap); 67 va_end(ap); 68 return ret; 69} 70 71static void init_gettext_charset(const char *domain) 72{ 73 /* 74 This trick arranges for messages to be emitted in the user's 75 requested encoding, but avoids setting LC_CTYPE from the 76 environment for the whole program. 77 78 This primarily done to avoid a bug in vsnprintf in the GNU C 79 Library [1]. which triggered a "your vsnprintf is broken" error 80 on Git's own repository when inspecting v0.99.6~1 under a UTF-8 81 locale. 82 83 That commit contains a ISO-8859-1 encoded author name, which 84 the locale aware vsnprintf(3) won't interpolate in the format 85 argument, due to mismatch between the data encoding and the 86 locale. 87 88 Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at 89 this point, because it'd require auditing all the code that uses C 90 functions whose semantics are modified by LC_CTYPE. 91 92 But only setting LC_MESSAGES as we do creates a problem, since 93 we declare the encoding of our PO files[2] the gettext 94 implementation will try to recode it to the user's locale, but 95 without LC_CTYPE it'll emit something like this on 'git init' 96 under the Icelandic locale: 97 98 Bj? til t?ma Git lind ? /hlagh/.git/ 99 100 Gettext knows about the encoding of our PO file, but we haven't 101 told it about the user's encoding, so all the non-US-ASCII 102 characters get encoded to question marks. 103 104 But we're in luck! We can set LC_CTYPE from the environment 105 only while we call nl_langinfo and 106 bind_textdomain_codeset. That suffices to tell gettext what 107 encoding it should emit in, so it'll now say: 108 109 Bjó til tóma Git lind í /hlagh/.git/ 110 111 And the equivalent ISO-8859-1 string will be emitted under a 112 ISO-8859-1 locale. 113 114 With this change way we get the advantages of setting LC_CTYPE 115 (talk to the user in his language/encoding), without the major 116 drawbacks (changed semantics for C functions we rely on). 117 118 However foreign functions using other message catalogs that 119 aren't using our neat trick will still have a problem, e.g. if 120 we have to call perror(3): 121 122 #include <stdio.h> 123 #include <locale.h> 124 #include <errno.h> 125 126 int main(void) 127 { 128 setlocale(LC_MESSAGES, ""); 129 setlocale(LC_CTYPE, "C"); 130 errno = ENODEV; 131 perror("test"); 132 return 0; 133 } 134 135 Running that will give you a message with question marks: 136 137 $ LANGUAGE= LANG=de_DE.utf8 ./test 138 test: Kein passendes Ger?t gefunden 139 140 The vsnprintf bug has been fixed since glibc 2.17. 141 142 Then we could simply set LC_CTYPE from the environment, which would 143 make things like the external perror(3) messages work. 144 145 See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for 146 regression tests. 147 148 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 149 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po 150 */ 151 setlocale(LC_CTYPE, ""); 152 charset = locale_charset(); 153 bind_textdomain_codeset(domain, charset); 154 /* the string is taken from v0.99.6~1 */ 155 if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0) 156 setlocale(LC_CTYPE, "C"); 157} 158 159void git_setup_gettext(void) 160{ 161 const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT); 162 163 if (!podir) 164 podir = system_path(GIT_LOCALE_PATH); 165 166 bindtextdomain("git", podir); 167 setlocale(LC_MESSAGES, ""); 168 setlocale(LC_TIME, ""); 169 init_gettext_charset("git"); 170 textdomain("git"); 171} 172 173/* return the number of columns of string 's' in current locale */ 174int gettext_width(const char *s) 175{ 176 static int is_utf8 = -1; 177 if (is_utf8 == -1) 178 is_utf8 = is_utf8_locale(); 179 180 return is_utf8 ? utf8_strwidth(s) : strlen(s); 181} 182#endif 183 184int is_utf8_locale(void) 185{ 186#ifdef NO_GETTEXT 187 if (!charset) { 188 const char *env = getenv("LC_ALL"); 189 if (!env || !*env) 190 env = getenv("LC_CTYPE"); 191 if (!env || !*env) 192 env = getenv("LANG"); 193 if (!env) 194 env = ""; 195 if (strchr(env, '.')) 196 env = strchr(env, '.') + 1; 197 charset = xstrdup(env); 198 } 199#endif 200 return is_encoding_utf8(charset); 201}