1/* 2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason 3 */ 4 5#include"cache.h" 6#include"exec-cmd.h" 7#include"gettext.h" 8#include"strbuf.h" 9#include"utf8.h" 10#include"config.h" 11 12#ifndef NO_GETTEXT 13# include <locale.h> 14# include <libintl.h> 15# ifdef GIT_WINDOWS_NATIVE 16 17static const char*locale_charset(void) 18{ 19const char*env =getenv("LC_ALL"), *dot; 20 21if(!env || !*env) 22 env =getenv("LC_CTYPE"); 23if(!env || !*env) 24 env =getenv("LANG"); 25 26if(!env) 27return"UTF-8"; 28 29 dot =strchr(env,'.'); 30return!dot ? env : dot +1; 31} 32 33# elif defined HAVE_LIBCHARSET_H 34# include <libcharset.h> 35# else 36# include <langinfo.h> 37# define locale_charset() nl_langinfo(CODESET) 38# endif 39#endif 40 41static const char*charset; 42 43/* 44 * Guess the user's preferred languages from the value in LANGUAGE environment 45 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. 46 * 47 * The result can be a colon-separated list like "ko:ja:en". 48 */ 49const char*get_preferred_languages(void) 50{ 51const char*retval; 52 53 retval =getenv("LANGUAGE"); 54if(retval && *retval) 55return retval; 56 57#ifndef NO_GETTEXT 58 retval =setlocale(LC_MESSAGES, NULL); 59if(retval && *retval && 60strcmp(retval,"C") && 61strcmp(retval,"POSIX")) 62return retval; 63#endif 64 65return NULL; 66} 67 68intuse_gettext_poison(void) 69{ 70static int poison_requested = -1; 71if(poison_requested == -1) 72 poison_requested =git_env_bool("GIT_TEST_GETTEXT_POISON",0); 73return poison_requested; 74} 75 76#ifndef NO_GETTEXT 77static inttest_vsnprintf(const char*fmt, ...) 78{ 79char buf[26]; 80int ret; 81va_list ap; 82va_start(ap, fmt); 83 ret =vsnprintf(buf,sizeof(buf), fmt, ap); 84va_end(ap); 85return ret; 86} 87 88static voidinit_gettext_charset(const char*domain) 89{ 90/* 91 This trick arranges for messages to be emitted in the user's 92 requested encoding, but avoids setting LC_CTYPE from the 93 environment for the whole program. 94 95 This primarily done to avoid a bug in vsnprintf in the GNU C 96 Library [1]. which triggered a "your vsnprintf is broken" error 97 on Git's own repository when inspecting v0.99.6~1 under a UTF-8 98 locale. 99 100 That commit contains a ISO-8859-1 encoded author name, which 101 the locale aware vsnprintf(3) won't interpolate in the format 102 argument, due to mismatch between the data encoding and the 103 locale. 104 105 Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at 106 this point, because it'd require auditing all the code that uses C 107 functions whose semantics are modified by LC_CTYPE. 108 109 But only setting LC_MESSAGES as we do creates a problem, since 110 we declare the encoding of our PO files[2] the gettext 111 implementation will try to recode it to the user's locale, but 112 without LC_CTYPE it'll emit something like this on 'git init' 113 under the Icelandic locale: 114 115 Bj? til t?ma Git lind ? /hlagh/.git/ 116 117 Gettext knows about the encoding of our PO file, but we haven't 118 told it about the user's encoding, so all the non-US-ASCII 119 characters get encoded to question marks. 120 121 But we're in luck! We can set LC_CTYPE from the environment 122 only while we call nl_langinfo and 123 bind_textdomain_codeset. That suffices to tell gettext what 124 encoding it should emit in, so it'll now say: 125 126 Bjó til tóma Git lind í /hlagh/.git/ 127 128 And the equivalent ISO-8859-1 string will be emitted under a 129 ISO-8859-1 locale. 130 131 With this change way we get the advantages of setting LC_CTYPE 132 (talk to the user in his language/encoding), without the major 133 drawbacks (changed semantics for C functions we rely on). 134 135 However foreign functions using other message catalogs that 136 aren't using our neat trick will still have a problem, e.g. if 137 we have to call perror(3): 138 139 #include <stdio.h> 140 #include <locale.h> 141 #include <errno.h> 142 143 int main(void) 144 { 145 setlocale(LC_MESSAGES, ""); 146 setlocale(LC_CTYPE, "C"); 147 errno = ENODEV; 148 perror("test"); 149 return 0; 150 } 151 152 Running that will give you a message with question marks: 153 154 $ LANGUAGE= LANG=de_DE.utf8 ./test 155 test: Kein passendes Ger?t gefunden 156 157 The vsnprintf bug has been fixed since glibc 2.17. 158 159 Then we could simply set LC_CTYPE from the environment, which would 160 make things like the external perror(3) messages work. 161 162 See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for 163 regression tests. 164 165 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 166 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po 167 */ 168setlocale(LC_CTYPE,""); 169 charset =locale_charset(); 170bind_textdomain_codeset(domain, charset); 171/* the string is taken from v0.99.6~1 */ 172if(test_vsnprintf("%.*s",13,"David_K\345gedal") <0) 173setlocale(LC_CTYPE,"C"); 174} 175 176voidgit_setup_gettext(void) 177{ 178const char*podir =getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT); 179char*p = NULL; 180 181if(!podir) 182 podir = p =system_path(GIT_LOCALE_PATH); 183 184use_gettext_poison();/* getenv() reentrancy paranoia */ 185 186if(!is_directory(podir)) { 187free(p); 188return; 189} 190 191bindtextdomain("git", podir); 192setlocale(LC_MESSAGES,""); 193setlocale(LC_TIME,""); 194init_gettext_charset("git"); 195textdomain("git"); 196 197free(p); 198} 199 200/* return the number of columns of string 's' in current locale */ 201intgettext_width(const char*s) 202{ 203static int is_utf8 = -1; 204if(is_utf8 == -1) 205 is_utf8 =is_utf8_locale(); 206 207return is_utf8 ?utf8_strwidth(s) :strlen(s); 208} 209#endif 210 211intis_utf8_locale(void) 212{ 213#ifdef NO_GETTEXT 214if(!charset) { 215const char*env =getenv("LC_ALL"); 216if(!env || !*env) 217 env =getenv("LC_CTYPE"); 218if(!env || !*env) 219 env =getenv("LANG"); 220if(!env) 221 env =""; 222if(strchr(env,'.')) 223 env =strchr(env,'.') +1; 224 charset =xstrdup(env); 225} 226#endif 227returnis_encoding_utf8(charset); 228}