1/* 2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason 3 */ 4 5#include"git-compat-util.h" 6#include"gettext.h" 7#include"strbuf.h" 8#include"utf8.h" 9 10#ifndef NO_GETTEXT 11# include <locale.h> 12# include <libintl.h> 13# ifdef HAVE_LIBCHARSET_H 14# include <libcharset.h> 15# else 16# include <langinfo.h> 17# define locale_charset() nl_langinfo(CODESET) 18# endif 19#endif 20 21/* 22 * Guess the user's preferred languages from the value in LANGUAGE environment 23 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined. 24 * 25 * The result can be a colon-separated list like "ko:ja:en". 26 */ 27const char*get_preferred_languages(void) 28{ 29const char*retval; 30 31 retval =getenv("LANGUAGE"); 32if(retval && *retval) 33return retval; 34 35#ifndef NO_GETTEXT 36 retval =setlocale(LC_MESSAGES, NULL); 37if(retval && *retval && 38strcmp(retval,"C") && 39strcmp(retval,"POSIX")) 40return retval; 41#endif 42 43return NULL; 44} 45 46#ifdef GETTEXT_POISON 47intuse_gettext_poison(void) 48{ 49static int poison_requested = -1; 50if(poison_requested == -1) 51 poison_requested =getenv("GIT_GETTEXT_POISON") ?1:0; 52return poison_requested; 53} 54#endif 55 56#ifndef NO_GETTEXT 57static inttest_vsnprintf(const char*fmt, ...) 58{ 59char buf[26]; 60int ret; 61va_list ap; 62va_start(ap, fmt); 63 ret =vsnprintf(buf,sizeof(buf), fmt, ap); 64va_end(ap); 65return ret; 66} 67 68static const char*charset; 69static voidinit_gettext_charset(const char*domain) 70{ 71/* 72 This trick arranges for messages to be emitted in the user's 73 requested encoding, but avoids setting LC_CTYPE from the 74 environment for the whole program. 75 76 This primarily done to avoid a bug in vsnprintf in the GNU C 77 Library [1]. which triggered a "your vsnprintf is broken" error 78 on Git's own repository when inspecting v0.99.6~1 under a UTF-8 79 locale. 80 81 That commit contains a ISO-8859-1 encoded author name, which 82 the locale aware vsnprintf(3) won't interpolate in the format 83 argument, due to mismatch between the data encoding and the 84 locale. 85 86 Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at 87 this point, because it'd require auditing all the code that uses C 88 functions whose semantics are modified by LC_CTYPE. 89 90 But only setting LC_MESSAGES as we do creates a problem, since 91 we declare the encoding of our PO files[2] the gettext 92 implementation will try to recode it to the user's locale, but 93 without LC_CTYPE it'll emit something like this on 'git init' 94 under the Icelandic locale: 95 96 Bj? til t?ma Git lind ? /hlagh/.git/ 97 98 Gettext knows about the encoding of our PO file, but we haven't 99 told it about the user's encoding, so all the non-US-ASCII 100 characters get encoded to question marks. 101 102 But we're in luck! We can set LC_CTYPE from the environment 103 only while we call nl_langinfo and 104 bind_textdomain_codeset. That suffices to tell gettext what 105 encoding it should emit in, so it'll now say: 106 107 Bjó til tóma Git lind í /hlagh/.git/ 108 109 And the equivalent ISO-8859-1 string will be emitted under a 110 ISO-8859-1 locale. 111 112 With this change way we get the advantages of setting LC_CTYPE 113 (talk to the user in his language/encoding), without the major 114 drawbacks (changed semantics for C functions we rely on). 115 116 However foreign functions using other message catalogs that 117 aren't using our neat trick will still have a problem, e.g. if 118 we have to call perror(3): 119 120 #include <stdio.h> 121 #include <locale.h> 122 #include <errno.h> 123 124 int main(void) 125 { 126 setlocale(LC_MESSAGES, ""); 127 setlocale(LC_CTYPE, "C"); 128 errno = ENODEV; 129 perror("test"); 130 return 0; 131 } 132 133 Running that will give you a message with question marks: 134 135 $ LANGUAGE= LANG=de_DE.utf8 ./test 136 test: Kein passendes Ger?t gefunden 137 138 The vsnprintf bug has been fixed since glibc 2.17. 139 140 Then we could simply set LC_CTYPE from the environment, which would 141 make things like the external perror(3) messages work. 142 143 See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for 144 regression tests. 145 146 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 147 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po 148 */ 149setlocale(LC_CTYPE,""); 150 charset =locale_charset(); 151bind_textdomain_codeset(domain, charset); 152/* the string is taken from v0.99.6~1 */ 153if(test_vsnprintf("%.*s",13,"David_K\345gedal") <0) 154setlocale(LC_CTYPE,"C"); 155} 156 157voidgit_setup_gettext(void) 158{ 159const char*podir =getenv("GIT_TEXTDOMAINDIR"); 160 161if(!podir) 162 podir = GIT_LOCALE_PATH; 163bindtextdomain("git", podir); 164setlocale(LC_MESSAGES,""); 165init_gettext_charset("git"); 166textdomain("git"); 167} 168 169/* return the number of columns of string 's' in current locale */ 170intgettext_width(const char*s) 171{ 172static int is_utf8 = -1; 173if(is_utf8 == -1) 174 is_utf8 = !strcmp(charset,"UTF-8"); 175 176return is_utf8 ?utf8_strwidth(s) :strlen(s); 177} 178#endif