gettext.con commit Merge branch 'br/blame-ignore' (209f075)
   1/*
   2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason
   3 */
   4
   5#include "cache.h"
   6#include "exec-cmd.h"
   7#include "gettext.h"
   8#include "strbuf.h"
   9#include "utf8.h"
  10#include "config.h"
  11
  12#ifndef NO_GETTEXT
  13#       include <locale.h>
  14#       include <libintl.h>
  15#       ifdef GIT_WINDOWS_NATIVE
  16
  17static const char *locale_charset(void)
  18{
  19        const char *env = getenv("LC_ALL"), *dot;
  20
  21        if (!env || !*env)
  22                env = getenv("LC_CTYPE");
  23        if (!env || !*env)
  24                env = getenv("LANG");
  25
  26        if (!env)
  27                return "UTF-8";
  28
  29        dot = strchr(env, '.');
  30        return !dot ? env : dot + 1;
  31}
  32
  33#       elif defined HAVE_LIBCHARSET_H
  34#               include <libcharset.h>
  35#       else
  36#               include <langinfo.h>
  37#               define locale_charset() nl_langinfo(CODESET)
  38#       endif
  39#endif
  40
  41static const char *charset;
  42
  43/*
  44 * Guess the user's preferred languages from the value in LANGUAGE environment
  45 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
  46 *
  47 * The result can be a colon-separated list like "ko:ja:en".
  48 */
  49const char *get_preferred_languages(void)
  50{
  51        const char *retval;
  52
  53        retval = getenv("LANGUAGE");
  54        if (retval && *retval)
  55                return retval;
  56
  57#ifndef NO_GETTEXT
  58        retval = setlocale(LC_MESSAGES, NULL);
  59        if (retval && *retval &&
  60                strcmp(retval, "C") &&
  61                strcmp(retval, "POSIX"))
  62                return retval;
  63#endif
  64
  65        return NULL;
  66}
  67
  68int use_gettext_poison(void)
  69{
  70        static int poison_requested = -1;
  71        if (poison_requested == -1) {
  72                const char *v = getenv("GIT_TEST_GETTEXT_POISON");
  73                poison_requested = v && strlen(v) ? 1 : 0;
  74        }
  75        return poison_requested;
  76}
  77
  78#ifndef NO_GETTEXT
  79static int test_vsnprintf(const char *fmt, ...)
  80{
  81        char buf[26];
  82        int ret;
  83        va_list ap;
  84        va_start(ap, fmt);
  85        ret = vsnprintf(buf, sizeof(buf), fmt, ap);
  86        va_end(ap);
  87        return ret;
  88}
  89
  90static void init_gettext_charset(const char *domain)
  91{
  92        /*
  93           This trick arranges for messages to be emitted in the user's
  94           requested encoding, but avoids setting LC_CTYPE from the
  95           environment for the whole program.
  96
  97           This primarily done to avoid a bug in vsnprintf in the GNU C
  98           Library [1]. which triggered a "your vsnprintf is broken" error
  99           on Git's own repository when inspecting v0.99.6~1 under a UTF-8
 100           locale.
 101
 102           That commit contains a ISO-8859-1 encoded author name, which
 103           the locale aware vsnprintf(3) won't interpolate in the format
 104           argument, due to mismatch between the data encoding and the
 105           locale.
 106
 107           Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at
 108           this point, because it'd require auditing all the code that uses C
 109           functions whose semantics are modified by LC_CTYPE.
 110
 111           But only setting LC_MESSAGES as we do creates a problem, since
 112           we declare the encoding of our PO files[2] the gettext
 113           implementation will try to recode it to the user's locale, but
 114           without LC_CTYPE it'll emit something like this on 'git init'
 115           under the Icelandic locale:
 116
 117               Bj? til t?ma Git lind ? /hlagh/.git/
 118
 119           Gettext knows about the encoding of our PO file, but we haven't
 120           told it about the user's encoding, so all the non-US-ASCII
 121           characters get encoded to question marks.
 122
 123           But we're in luck! We can set LC_CTYPE from the environment
 124           only while we call nl_langinfo and
 125           bind_textdomain_codeset. That suffices to tell gettext what
 126           encoding it should emit in, so it'll now say:
 127
 128               Bjó til tóma Git lind í /hlagh/.git/
 129
 130           And the equivalent ISO-8859-1 string will be emitted under a
 131           ISO-8859-1 locale.
 132
 133           With this change way we get the advantages of setting LC_CTYPE
 134           (talk to the user in his language/encoding), without the major
 135           drawbacks (changed semantics for C functions we rely on).
 136
 137           However foreign functions using other message catalogs that
 138           aren't using our neat trick will still have a problem, e.g. if
 139           we have to call perror(3):
 140
 141           #include <stdio.h>
 142           #include <locale.h>
 143           #include <errno.h>
 144
 145           int main(void)
 146           {
 147                   setlocale(LC_MESSAGES, "");
 148                   setlocale(LC_CTYPE, "C");
 149                   errno = ENODEV;
 150                   perror("test");
 151                   return 0;
 152           }
 153
 154           Running that will give you a message with question marks:
 155
 156           $ LANGUAGE= LANG=de_DE.utf8 ./test
 157           test: Kein passendes Ger?t gefunden
 158
 159           The vsnprintf bug has been fixed since glibc 2.17.
 160
 161           Then we could simply set LC_CTYPE from the environment, which would
 162           make things like the external perror(3) messages work.
 163
 164           See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for
 165           regression tests.
 166
 167           1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530
 168           2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
 169        */
 170        setlocale(LC_CTYPE, "");
 171        charset = locale_charset();
 172        bind_textdomain_codeset(domain, charset);
 173        /* the string is taken from v0.99.6~1 */
 174        if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
 175                setlocale(LC_CTYPE, "C");
 176}
 177
 178void git_setup_gettext(void)
 179{
 180        const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT);
 181        char *p = NULL;
 182
 183        if (!podir)
 184                podir = p = system_path(GIT_LOCALE_PATH);
 185
 186        use_gettext_poison(); /* getenv() reentrancy paranoia */
 187
 188        if (!is_directory(podir)) {
 189                free(p);
 190                return;
 191        }
 192
 193        bindtextdomain("git", podir);
 194        setlocale(LC_MESSAGES, "");
 195        setlocale(LC_TIME, "");
 196        init_gettext_charset("git");
 197        textdomain("git");
 198
 199        free(p);
 200}
 201
 202/* return the number of columns of string 's' in current locale */
 203int gettext_width(const char *s)
 204{
 205        static int is_utf8 = -1;
 206        if (is_utf8 == -1)
 207                is_utf8 = is_utf8_locale();
 208
 209        return is_utf8 ? utf8_strwidth(s) : strlen(s);
 210}
 211#endif
 212
 213int is_utf8_locale(void)
 214{
 215#ifdef NO_GETTEXT
 216        if (!charset) {
 217                const char *env = getenv("LC_ALL");
 218                if (!env || !*env)
 219                        env = getenv("LC_CTYPE");
 220                if (!env || !*env)
 221                        env = getenv("LANG");
 222                if (!env)
 223                        env = "";
 224                if (strchr(env, '.'))
 225                        env = strchr(env, '.') + 1;
 226                charset = xstrdup(env);
 227        }
 228#endif
 229        return is_encoding_utf8(charset);
 230}