1/* 2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason 3 */ 4 5#include"cache.h" 6#include"exec-cmd.h" 7#include"gettext.h" 8#include"strbuf.h" 9#include"utf8.h" 10 11#ifndef NO_GETTEXT 12# include <locale.h> 13# include <libintl.h> 14# ifdef HAVE_LIBCHARSET_H 15# include <libcharset.h> 16# else 17# include <langinfo.h> 18# define locale_charset() nl_langinfo(CODESET) 19# endif 20#endif 21 22static const char*charset; 23 24/* 25 * Guess the user's preferred languages from the value in LANGUAGE environment 26 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. 27 * 28 * The result can be a colon-separated list like "ko:ja:en". 29 */ 30const char*get_preferred_languages(void) 31{ 32const char*retval; 33 34 retval =getenv("LANGUAGE"); 35if(retval && *retval) 36return retval; 37 38#ifndef NO_GETTEXT 39 retval =setlocale(LC_MESSAGES, NULL); 40if(retval && *retval && 41strcmp(retval,"C") && 42strcmp(retval,"POSIX")) 43return retval; 44#endif 45 46return NULL; 47} 48 49#ifdef GETTEXT_POISON 50intuse_gettext_poison(void) 51{ 52static int poison_requested = -1; 53if(poison_requested == -1) 54 poison_requested =getenv("GIT_GETTEXT_POISON") ?1:0; 55return poison_requested; 56} 57#endif 58 59#ifndef NO_GETTEXT 60static inttest_vsnprintf(const char*fmt, ...) 61{ 62char buf[26]; 63int ret; 64va_list ap; 65va_start(ap, fmt); 66 ret =vsnprintf(buf,sizeof(buf), fmt, ap); 67va_end(ap); 68return ret; 69} 70 71static voidinit_gettext_charset(const char*domain) 72{ 73/* 74 This trick arranges for messages to be emitted in the user's 75 requested encoding, but avoids setting LC_CTYPE from the 76 environment for the whole program. 77 78 This primarily done to avoid a bug in vsnprintf in the GNU C 79 Library [1]. which triggered a "your vsnprintf is broken" error 80 on Git's own repository when inspecting v0.99.6~1 under a UTF-8 81 locale. 82 83 That commit contains a ISO-8859-1 encoded author name, which 84 the locale aware vsnprintf(3) won't interpolate in the format 85 argument, due to mismatch between the data encoding and the 86 locale. 87 88 Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at 89 this point, because it'd require auditing all the code that uses C 90 functions whose semantics are modified by LC_CTYPE. 91 92 But only setting LC_MESSAGES as we do creates a problem, since 93 we declare the encoding of our PO files[2] the gettext 94 implementation will try to recode it to the user's locale, but 95 without LC_CTYPE it'll emit something like this on 'git init' 96 under the Icelandic locale: 97 98 Bj? til t?ma Git lind ? /hlagh/.git/ 99 100 Gettext knows about the encoding of our PO file, but we haven't 101 told it about the user's encoding, so all the non-US-ASCII 102 characters get encoded to question marks. 103 104 But we're in luck! We can set LC_CTYPE from the environment 105 only while we call nl_langinfo and 106 bind_textdomain_codeset. That suffices to tell gettext what 107 encoding it should emit in, so it'll now say: 108 109 Bjó til tóma Git lind í /hlagh/.git/ 110 111 And the equivalent ISO-8859-1 string will be emitted under a 112 ISO-8859-1 locale. 113 114 With this change way we get the advantages of setting LC_CTYPE 115 (talk to the user in his language/encoding), without the major 116 drawbacks (changed semantics for C functions we rely on). 117 118 However foreign functions using other message catalogs that 119 aren't using our neat trick will still have a problem, e.g. if 120 we have to call perror(3): 121 122 #include <stdio.h> 123 #include <locale.h> 124 #include <errno.h> 125 126 int main(void) 127 { 128 setlocale(LC_MESSAGES, ""); 129 setlocale(LC_CTYPE, "C"); 130 errno = ENODEV; 131 perror("test"); 132 return 0; 133 } 134 135 Running that will give you a message with question marks: 136 137 $ LANGUAGE= LANG=de_DE.utf8 ./test 138 test: Kein passendes Ger?t gefunden 139 140 The vsnprintf bug has been fixed since glibc 2.17. 141 142 Then we could simply set LC_CTYPE from the environment, which would 143 make things like the external perror(3) messages work. 144 145 See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for 146 regression tests. 147 148 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 149 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po 150 */ 151setlocale(LC_CTYPE,""); 152 charset =locale_charset(); 153bind_textdomain_codeset(domain, charset); 154/* the string is taken from v0.99.6~1 */ 155if(test_vsnprintf("%.*s",13,"David_K\345gedal") <0) 156setlocale(LC_CTYPE,"C"); 157} 158 159voidgit_setup_gettext(void) 160{ 161const char*podir =getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT); 162 163if(!podir) 164 podir =system_path(GIT_LOCALE_PATH); 165 166bindtextdomain("git", podir); 167setlocale(LC_MESSAGES,""); 168setlocale(LC_TIME,""); 169init_gettext_charset("git"); 170textdomain("git"); 171} 172 173/* return the number of columns of string 's' in current locale */ 174intgettext_width(const char*s) 175{ 176static int is_utf8 = -1; 177if(is_utf8 == -1) 178 is_utf8 =is_utf8_locale(); 179 180return is_utf8 ?utf8_strwidth(s) :strlen(s); 181} 182#endif 183 184intis_utf8_locale(void) 185{ 186#ifdef NO_GETTEXT 187if(!charset) { 188const char*env =getenv("LC_ALL"); 189if(!env || !*env) 190 env =getenv("LC_CTYPE"); 191if(!env || !*env) 192 env =getenv("LANG"); 193if(!env) 194 env =""; 195if(strchr(env,'.')) 196 env =strchr(env,'.') +1; 197 charset =xstrdup(env); 198} 199#endif 200returnis_encoding_utf8(charset); 201}