1/* 2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason 3 */ 4 5#include"cache.h" 6#include"exec_cmd.h" 7#include"gettext.h" 8#include"strbuf.h" 9#include"utf8.h" 10 11#ifndef NO_GETTEXT 12# include <locale.h> 13# include <libintl.h> 14# ifdef HAVE_LIBCHARSET_H 15# include <libcharset.h> 16# else 17# include <langinfo.h> 18# define locale_charset() nl_langinfo(CODESET) 19# endif 20#endif 21 22static const char*charset; 23 24/* 25 * Guess the user's preferred languages from the value in LANGUAGE environment 26 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. 27 * 28 * The result can be a colon-separated list like "ko:ja:en". 29 */ 30const char*get_preferred_languages(void) 31{ 32const char*retval; 33 34 retval =getenv("LANGUAGE"); 35if(retval && *retval) 36return retval; 37 38#ifndef NO_GETTEXT 39 retval =setlocale(LC_MESSAGES, NULL); 40if(retval && *retval && 41strcmp(retval,"C") && 42strcmp(retval,"POSIX")) 43return retval; 44#endif 45 46return NULL; 47} 48 49#ifdef GETTEXT_POISON 50intuse_gettext_poison(void) 51{ 52static int poison_requested = -1; 53if(poison_requested == -1) 54 poison_requested =getenv("GIT_GETTEXT_POISON") ?1:0; 55return poison_requested; 56} 57#endif 58 59#ifndef NO_GETTEXT 60static inttest_vsnprintf(const char*fmt, ...) 61{ 62char buf[26]; 63int ret; 64va_list ap; 65va_start(ap, fmt); 66 ret =vsnprintf(buf,sizeof(buf), fmt, ap); 67va_end(ap); 68return ret; 69} 70 71static voidinit_gettext_charset(const char*domain) 72{ 73/* 74 This trick arranges for messages to be emitted in the user's 75 requested encoding, but avoids setting LC_CTYPE from the 76 environment for the whole program. 77 78 This primarily done to avoid a bug in vsnprintf in the GNU C 79 Library [1]. which triggered a "your vsnprintf is broken" error 80 on Git's own repository when inspecting v0.99.6~1 under a UTF-8 81 locale. 82 83 That commit contains a ISO-8859-1 encoded author name, which 84 the locale aware vsnprintf(3) won't interpolate in the format 85 argument, due to mismatch between the data encoding and the 86 locale. 87 88 Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at 89 this point, because it'd require auditing all the code that uses C 90 functions whose semantics are modified by LC_CTYPE. 91 92 But only setting LC_MESSAGES as we do creates a problem, since 93 we declare the encoding of our PO files[2] the gettext 94 implementation will try to recode it to the user's locale, but 95 without LC_CTYPE it'll emit something like this on 'git init' 96 under the Icelandic locale: 97 98 Bj? til t?ma Git lind ? /hlagh/.git/ 99 100 Gettext knows about the encoding of our PO file, but we haven't 101 told it about the user's encoding, so all the non-US-ASCII 102 characters get encoded to question marks. 103 104 But we're in luck! We can set LC_CTYPE from the environment 105 only while we call nl_langinfo and 106 bind_textdomain_codeset. That suffices to tell gettext what 107 encoding it should emit in, so it'll now say: 108 109 Bjó til tóma Git lind í /hlagh/.git/ 110 111 And the equivalent ISO-8859-1 string will be emitted under a 112 ISO-8859-1 locale. 113 114 With this change way we get the advantages of setting LC_CTYPE 115 (talk to the user in his language/encoding), without the major 116 drawbacks (changed semantics for C functions we rely on). 117 118 However foreign functions using other message catalogs that 119 aren't using our neat trick will still have a problem, e.g. if 120 we have to call perror(3): 121 122 #include <stdio.h> 123 #include <locale.h> 124 #include <errno.h> 125 126 int main(void) 127 { 128 setlocale(LC_MESSAGES, ""); 129 setlocale(LC_CTYPE, "C"); 130 errno = ENODEV; 131 perror("test"); 132 return 0; 133 } 134 135 Running that will give you a message with question marks: 136 137 $ LANGUAGE= LANG=de_DE.utf8 ./test 138 test: Kein passendes Ger?t gefunden 139 140 The vsnprintf bug has been fixed since glibc 2.17. 141 142 Then we could simply set LC_CTYPE from the environment, which would 143 make things like the external perror(3) messages work. 144 145 See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for 146 regression tests. 147 148 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 149 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po 150 */ 151setlocale(LC_CTYPE,""); 152 charset =locale_charset(); 153bind_textdomain_codeset(domain, charset); 154/* the string is taken from v0.99.6~1 */ 155if(test_vsnprintf("%.*s",13,"David_K\345gedal") <0) 156setlocale(LC_CTYPE,"C"); 157} 158 159voidgit_setup_gettext(void) 160{ 161const char*podir =getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT); 162 163if(!podir) 164 podir =system_path(GIT_LOCALE_PATH); 165 166if(!is_directory(podir)) 167return; 168 169bindtextdomain("git", podir); 170setlocale(LC_MESSAGES,""); 171setlocale(LC_TIME,""); 172init_gettext_charset("git"); 173textdomain("git"); 174} 175 176/* return the number of columns of string 's' in current locale */ 177intgettext_width(const char*s) 178{ 179static int is_utf8 = -1; 180if(is_utf8 == -1) 181 is_utf8 =is_utf8_locale(); 182 183return is_utf8 ?utf8_strwidth(s) :strlen(s); 184} 185#endif 186 187intis_utf8_locale(void) 188{ 189#ifdef NO_GETTEXT 190if(!charset) { 191const char*env =getenv("LC_ALL"); 192if(!env || !*env) 193 env =getenv("LC_CTYPE"); 194if(!env || !*env) 195 env =getenv("LANG"); 196if(!env) 197 env =""; 198if(strchr(env,'.')) 199 env =strchr(env,'.') +1; 200 charset =xstrdup(env); 201} 202#endif 203returnis_encoding_utf8(charset); 204}