gettext.con commit Fourth batch (bc12974)
   1/*
   2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason
   3 */
   4
   5#include "cache.h"
   6#include "exec-cmd.h"
   7#include "gettext.h"
   8#include "strbuf.h"
   9#include "utf8.h"
  10#include "config.h"
  11
  12#ifndef NO_GETTEXT
  13#       include <locale.h>
  14#       include <libintl.h>
  15#       ifdef GIT_WINDOWS_NATIVE
  16
  17static const char *locale_charset(void)
  18{
  19        const char *env = getenv("LC_ALL"), *dot;
  20
  21        if (!env || !*env)
  22                env = getenv("LC_CTYPE");
  23        if (!env || !*env)
  24                env = getenv("LANG");
  25
  26        if (!env)
  27                return "UTF-8";
  28
  29        dot = strchr(env, '.');
  30        return !dot ? env : dot + 1;
  31}
  32
  33#       elif defined HAVE_LIBCHARSET_H
  34#               include <libcharset.h>
  35#       else
  36#               include <langinfo.h>
  37#               define locale_charset() nl_langinfo(CODESET)
  38#       endif
  39#endif
  40
  41static const char *charset;
  42
  43/*
  44 * Guess the user's preferred languages from the value in LANGUAGE environment
  45 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
  46 *
  47 * The result can be a colon-separated list like "ko:ja:en".
  48 */
  49const char *get_preferred_languages(void)
  50{
  51        const char *retval;
  52
  53        retval = getenv("LANGUAGE");
  54        if (retval && *retval)
  55                return retval;
  56
  57#ifndef NO_GETTEXT
  58        retval = setlocale(LC_MESSAGES, NULL);
  59        if (retval && *retval &&
  60                strcmp(retval, "C") &&
  61                strcmp(retval, "POSIX"))
  62                return retval;
  63#endif
  64
  65        return NULL;
  66}
  67
  68int use_gettext_poison(void)
  69{
  70        static int poison_requested = -1;
  71        if (poison_requested == -1)
  72                poison_requested = git_env_bool("GIT_TEST_GETTEXT_POISON", 0);
  73        return poison_requested;
  74}
  75
  76#ifndef NO_GETTEXT
  77static int test_vsnprintf(const char *fmt, ...)
  78{
  79        char buf[26];
  80        int ret;
  81        va_list ap;
  82        va_start(ap, fmt);
  83        ret = vsnprintf(buf, sizeof(buf), fmt, ap);
  84        va_end(ap);
  85        return ret;
  86}
  87
  88static void init_gettext_charset(const char *domain)
  89{
  90        /*
  91           This trick arranges for messages to be emitted in the user's
  92           requested encoding, but avoids setting LC_CTYPE from the
  93           environment for the whole program.
  94
  95           This primarily done to avoid a bug in vsnprintf in the GNU C
  96           Library [1]. which triggered a "your vsnprintf is broken" error
  97           on Git's own repository when inspecting v0.99.6~1 under a UTF-8
  98           locale.
  99
 100           That commit contains a ISO-8859-1 encoded author name, which
 101           the locale aware vsnprintf(3) won't interpolate in the format
 102           argument, due to mismatch between the data encoding and the
 103           locale.
 104
 105           Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at
 106           this point, because it'd require auditing all the code that uses C
 107           functions whose semantics are modified by LC_CTYPE.
 108
 109           But only setting LC_MESSAGES as we do creates a problem, since
 110           we declare the encoding of our PO files[2] the gettext
 111           implementation will try to recode it to the user's locale, but
 112           without LC_CTYPE it'll emit something like this on 'git init'
 113           under the Icelandic locale:
 114
 115               Bj? til t?ma Git lind ? /hlagh/.git/
 116
 117           Gettext knows about the encoding of our PO file, but we haven't
 118           told it about the user's encoding, so all the non-US-ASCII
 119           characters get encoded to question marks.
 120
 121           But we're in luck! We can set LC_CTYPE from the environment
 122           only while we call nl_langinfo and
 123           bind_textdomain_codeset. That suffices to tell gettext what
 124           encoding it should emit in, so it'll now say:
 125
 126               Bjó til tóma Git lind í /hlagh/.git/
 127
 128           And the equivalent ISO-8859-1 string will be emitted under a
 129           ISO-8859-1 locale.
 130
 131           With this change way we get the advantages of setting LC_CTYPE
 132           (talk to the user in his language/encoding), without the major
 133           drawbacks (changed semantics for C functions we rely on).
 134
 135           However foreign functions using other message catalogs that
 136           aren't using our neat trick will still have a problem, e.g. if
 137           we have to call perror(3):
 138
 139           #include <stdio.h>
 140           #include <locale.h>
 141           #include <errno.h>
 142
 143           int main(void)
 144           {
 145                   setlocale(LC_MESSAGES, "");
 146                   setlocale(LC_CTYPE, "C");
 147                   errno = ENODEV;
 148                   perror("test");
 149                   return 0;
 150           }
 151
 152           Running that will give you a message with question marks:
 153
 154           $ LANGUAGE= LANG=de_DE.utf8 ./test
 155           test: Kein passendes Ger?t gefunden
 156
 157           The vsnprintf bug has been fixed since glibc 2.17.
 158
 159           Then we could simply set LC_CTYPE from the environment, which would
 160           make things like the external perror(3) messages work.
 161
 162           See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for
 163           regression tests.
 164
 165           1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530
 166           2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
 167        */
 168        setlocale(LC_CTYPE, "");
 169        charset = locale_charset();
 170        bind_textdomain_codeset(domain, charset);
 171        /* the string is taken from v0.99.6~1 */
 172        if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
 173                setlocale(LC_CTYPE, "C");
 174}
 175
 176void git_setup_gettext(void)
 177{
 178        const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT);
 179        char *p = NULL;
 180
 181        if (!podir)
 182                podir = p = system_path(GIT_LOCALE_PATH);
 183
 184        use_gettext_poison(); /* getenv() reentrancy paranoia */
 185
 186        if (!is_directory(podir)) {
 187                free(p);
 188                return;
 189        }
 190
 191        bindtextdomain("git", podir);
 192        setlocale(LC_MESSAGES, "");
 193        setlocale(LC_TIME, "");
 194        init_gettext_charset("git");
 195        textdomain("git");
 196
 197        free(p);
 198}
 199
 200/* return the number of columns of string 's' in current locale */
 201int gettext_width(const char *s)
 202{
 203        static int is_utf8 = -1;
 204        if (is_utf8 == -1)
 205                is_utf8 = is_utf8_locale();
 206
 207        return is_utf8 ? utf8_strwidth(s) : strlen(s);
 208}
 209#endif
 210
 211int is_utf8_locale(void)
 212{
 213#ifdef NO_GETTEXT
 214        if (!charset) {
 215                const char *env = getenv("LC_ALL");
 216                if (!env || !*env)
 217                        env = getenv("LC_CTYPE");
 218                if (!env || !*env)
 219                        env = getenv("LANG");
 220                if (!env)
 221                        env = "";
 222                if (strchr(env, '.'))
 223                        env = strchr(env, '.') + 1;
 224                charset = xstrdup(env);
 225        }
 226#endif
 227        return is_encoding_utf8(charset);
 228}