gettext.con commit Merge branch 'az/instaweb-py3-http-server' (abf39e3)
   1/*
   2 * Copyright (c) 2010 Ævar Arnfjörð Bjarmason
   3 */
   4
   5#include "cache.h"
   6#include "exec-cmd.h"
   7#include "gettext.h"
   8#include "strbuf.h"
   9#include "utf8.h"
  10#include "config.h"
  11
  12#ifndef NO_GETTEXT
  13#       include <locale.h>
  14#       include <libintl.h>
  15#       ifdef HAVE_LIBCHARSET_H
  16#               include <libcharset.h>
  17#       else
  18#               include <langinfo.h>
  19#               define locale_charset() nl_langinfo(CODESET)
  20#       endif
  21#endif
  22
  23static const char *charset;
  24
  25/*
  26 * Guess the user's preferred languages from the value in LANGUAGE environment
  27 * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
  28 *
  29 * The result can be a colon-separated list like "ko:ja:en".
  30 */
  31const char *get_preferred_languages(void)
  32{
  33        const char *retval;
  34
  35        retval = getenv("LANGUAGE");
  36        if (retval && *retval)
  37                return retval;
  38
  39#ifndef NO_GETTEXT
  40        retval = setlocale(LC_MESSAGES, NULL);
  41        if (retval && *retval &&
  42                strcmp(retval, "C") &&
  43                strcmp(retval, "POSIX"))
  44                return retval;
  45#endif
  46
  47        return NULL;
  48}
  49
  50int use_gettext_poison(void)
  51{
  52        static int poison_requested = -1;
  53        if (poison_requested == -1) {
  54                const char *v = getenv("GIT_TEST_GETTEXT_POISON");
  55                poison_requested = v && strlen(v) ? 1 : 0;
  56        }
  57        return poison_requested;
  58}
  59
  60#ifndef NO_GETTEXT
  61static int test_vsnprintf(const char *fmt, ...)
  62{
  63        char buf[26];
  64        int ret;
  65        va_list ap;
  66        va_start(ap, fmt);
  67        ret = vsnprintf(buf, sizeof(buf), fmt, ap);
  68        va_end(ap);
  69        return ret;
  70}
  71
  72static void init_gettext_charset(const char *domain)
  73{
  74        /*
  75           This trick arranges for messages to be emitted in the user's
  76           requested encoding, but avoids setting LC_CTYPE from the
  77           environment for the whole program.
  78
  79           This primarily done to avoid a bug in vsnprintf in the GNU C
  80           Library [1]. which triggered a "your vsnprintf is broken" error
  81           on Git's own repository when inspecting v0.99.6~1 under a UTF-8
  82           locale.
  83
  84           That commit contains a ISO-8859-1 encoded author name, which
  85           the locale aware vsnprintf(3) won't interpolate in the format
  86           argument, due to mismatch between the data encoding and the
  87           locale.
  88
  89           Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at
  90           this point, because it'd require auditing all the code that uses C
  91           functions whose semantics are modified by LC_CTYPE.
  92
  93           But only setting LC_MESSAGES as we do creates a problem, since
  94           we declare the encoding of our PO files[2] the gettext
  95           implementation will try to recode it to the user's locale, but
  96           without LC_CTYPE it'll emit something like this on 'git init'
  97           under the Icelandic locale:
  98
  99               Bj? til t?ma Git lind ? /hlagh/.git/
 100
 101           Gettext knows about the encoding of our PO file, but we haven't
 102           told it about the user's encoding, so all the non-US-ASCII
 103           characters get encoded to question marks.
 104
 105           But we're in luck! We can set LC_CTYPE from the environment
 106           only while we call nl_langinfo and
 107           bind_textdomain_codeset. That suffices to tell gettext what
 108           encoding it should emit in, so it'll now say:
 109
 110               Bjó til tóma Git lind í /hlagh/.git/
 111
 112           And the equivalent ISO-8859-1 string will be emitted under a
 113           ISO-8859-1 locale.
 114
 115           With this change way we get the advantages of setting LC_CTYPE
 116           (talk to the user in his language/encoding), without the major
 117           drawbacks (changed semantics for C functions we rely on).
 118
 119           However foreign functions using other message catalogs that
 120           aren't using our neat trick will still have a problem, e.g. if
 121           we have to call perror(3):
 122
 123           #include <stdio.h>
 124           #include <locale.h>
 125           #include <errno.h>
 126
 127           int main(void)
 128           {
 129                   setlocale(LC_MESSAGES, "");
 130                   setlocale(LC_CTYPE, "C");
 131                   errno = ENODEV;
 132                   perror("test");
 133                   return 0;
 134           }
 135
 136           Running that will give you a message with question marks:
 137
 138           $ LANGUAGE= LANG=de_DE.utf8 ./test
 139           test: Kein passendes Ger?t gefunden
 140
 141           The vsnprintf bug has been fixed since glibc 2.17.
 142
 143           Then we could simply set LC_CTYPE from the environment, which would
 144           make things like the external perror(3) messages work.
 145
 146           See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for
 147           regression tests.
 148
 149           1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530
 150           2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
 151        */
 152        setlocale(LC_CTYPE, "");
 153        charset = locale_charset();
 154        bind_textdomain_codeset(domain, charset);
 155        /* the string is taken from v0.99.6~1 */
 156        if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
 157                setlocale(LC_CTYPE, "C");
 158}
 159
 160void git_setup_gettext(void)
 161{
 162        const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT);
 163        char *p = NULL;
 164
 165        if (!podir)
 166                podir = p = system_path(GIT_LOCALE_PATH);
 167
 168        use_gettext_poison(); /* getenv() reentrancy paranoia */
 169
 170        if (!is_directory(podir)) {
 171                free(p);
 172                return;
 173        }
 174
 175        bindtextdomain("git", podir);
 176        setlocale(LC_MESSAGES, "");
 177        setlocale(LC_TIME, "");
 178        init_gettext_charset("git");
 179        textdomain("git");
 180
 181        free(p);
 182}
 183
 184/* return the number of columns of string 's' in current locale */
 185int gettext_width(const char *s)
 186{
 187        static int is_utf8 = -1;
 188        if (is_utf8 == -1)
 189                is_utf8 = is_utf8_locale();
 190
 191        return is_utf8 ? utf8_strwidth(s) : strlen(s);
 192}
 193#endif
 194
 195int is_utf8_locale(void)
 196{
 197#ifdef NO_GETTEXT
 198        if (!charset) {
 199                const char *env = getenv("LC_ALL");
 200                if (!env || !*env)
 201                        env = getenv("LC_CTYPE");
 202                if (!env || !*env)
 203                        env = getenv("LANG");
 204                if (!env)
 205                        env = "";
 206                if (strchr(env, '.'))
 207                        env = strchr(env, '.') + 1;
 208                charset = xstrdup(env);
 209        }
 210#endif
 211        return is_encoding_utf8(charset);
 212}