1/* 2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason 3 */ 4 5#include "cache.h" 6#include "exec_cmd.h" 7#include "gettext.h" 8#include "strbuf.h" 9#include "utf8.h" 10 11#ifndef NO_GETTEXT 12# include <locale.h> 13# include <libintl.h> 14# ifdef HAVE_LIBCHARSET_H 15# include <libcharset.h> 16# else 17# include <langinfo.h> 18# define locale_charset() nl_langinfo(CODESET) 19# endif 20#endif 21 22static const char *charset; 23 24/* 25 * Guess the user's preferred languages from the value in LANGUAGE environment 26 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. 27 * 28 * The result can be a colon-separated list like "ko:ja:en". 29 */ 30const char *get_preferred_languages(void) 31{ 32 const char *retval; 33 34 retval = getenv("LANGUAGE"); 35 if (retval && *retval) 36 return retval; 37 38#ifndef NO_GETTEXT 39 retval = setlocale(LC_MESSAGES, NULL); 40 if (retval && *retval && 41 strcmp(retval, "C") && 42 strcmp(retval, "POSIX")) 43 return retval; 44#endif 45 46 return NULL; 47} 48 49#ifdef GETTEXT_POISON 50int use_gettext_poison(void) 51{ 52 static int poison_requested = -1; 53 if (poison_requested == -1) 54 poison_requested = getenv("GIT_GETTEXT_POISON") ? 1 : 0; 55 return poison_requested; 56} 57#endif 58 59#ifndef NO_GETTEXT 60static int test_vsnprintf(const char *fmt, ...) 61{ 62 char buf[26]; 63 int ret; 64 va_list ap; 65 va_start(ap, fmt); 66 ret = vsnprintf(buf, sizeof(buf), fmt, ap); 67 va_end(ap); 68 return ret; 69} 70 71static void init_gettext_charset(const char *domain) 72{ 73 /* 74 This trick arranges for messages to be emitted in the user's 75 requested encoding, but avoids setting LC_CTYPE from the 76 environment for the whole program. 77 78 This primarily done to avoid a bug in vsnprintf in the GNU C 79 Library [1]. which triggered a "your vsnprintf is broken" error 80 on Git's own repository when inspecting v0.99.6~1 under a UTF-8 81 locale. 82 83 That commit contains a ISO-8859-1 encoded author name, which 84 the locale aware vsnprintf(3) won't interpolate in the format 85 argument, due to mismatch between the data encoding and the 86 locale. 87 88 Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at 89 this point, because it'd require auditing all the code that uses C 90 functions whose semantics are modified by LC_CTYPE. 91 92 But only setting LC_MESSAGES as we do creates a problem, since 93 we declare the encoding of our PO files[2] the gettext 94 implementation will try to recode it to the user's locale, but 95 without LC_CTYPE it'll emit something like this on 'git init' 96 under the Icelandic locale: 97 98 Bj? til t?ma Git lind ? /hlagh/.git/ 99 100 Gettext knows about the encoding of our PO file, but we haven't 101 told it about the user's encoding, so all the non-US-ASCII 102 characters get encoded to question marks. 103 104 But we're in luck! We can set LC_CTYPE from the environment 105 only while we call nl_langinfo and 106 bind_textdomain_codeset. That suffices to tell gettext what 107 encoding it should emit in, so it'll now say: 108 109 Bjó til tóma Git lind í /hlagh/.git/ 110 111 And the equivalent ISO-8859-1 string will be emitted under a 112 ISO-8859-1 locale. 113 114 With this change way we get the advantages of setting LC_CTYPE 115 (talk to the user in his language/encoding), without the major 116 drawbacks (changed semantics for C functions we rely on). 117 118 However foreign functions using other message catalogs that 119 aren't using our neat trick will still have a problem, e.g. if 120 we have to call perror(3): 121 122 #include <stdio.h> 123 #include <locale.h> 124 #include <errno.h> 125 126 int main(void) 127 { 128 setlocale(LC_MESSAGES, ""); 129 setlocale(LC_CTYPE, "C"); 130 errno = ENODEV; 131 perror("test"); 132 return 0; 133 } 134 135 Running that will give you a message with question marks: 136 137 $ LANGUAGE= LANG=de_DE.utf8 ./test 138 test: Kein passendes Ger?t gefunden 139 140 The vsnprintf bug has been fixed since glibc 2.17. 141 142 Then we could simply set LC_CTYPE from the environment, which would 143 make things like the external perror(3) messages work. 144 145 See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for 146 regression tests. 147 148 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 149 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po 150 */ 151 setlocale(LC_CTYPE, ""); 152 charset = locale_charset(); 153 bind_textdomain_codeset(domain, charset); 154 /* the string is taken from v0.99.6~1 */ 155 if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0) 156 setlocale(LC_CTYPE, "C"); 157} 158 159void git_setup_gettext(void) 160{ 161 const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT); 162 163 if (!podir) 164 podir = system_path(GIT_LOCALE_PATH); 165 166 if (!is_directory(podir)) 167 return; 168 169 bindtextdomain("git", podir); 170 setlocale(LC_MESSAGES, ""); 171 setlocale(LC_TIME, ""); 172 init_gettext_charset("git"); 173 textdomain("git"); 174} 175 176/* return the number of columns of string 's' in current locale */ 177int gettext_width(const char *s) 178{ 179 static int is_utf8 = -1; 180 if (is_utf8 == -1) 181 is_utf8 = is_utf8_locale(); 182 183 return is_utf8 ? utf8_strwidth(s) : strlen(s); 184} 185#endif 186 187int is_utf8_locale(void) 188{ 189#ifdef NO_GETTEXT 190 if (!charset) { 191 const char *env = getenv("LC_ALL"); 192 if (!env || !*env) 193 env = getenv("LC_CTYPE"); 194 if (!env || !*env) 195 env = getenv("LANG"); 196 if (!env) 197 env = ""; 198 if (strchr(env, '.')) 199 env = strchr(env, '.') + 1; 200 charset = xstrdup(env); 201 } 202#endif 203 return is_encoding_utf8(charset); 204}