compat / precompose_utf8.con commit Merge branch 'rm/subtree-unwrap-tags' (a039a79)
   1/*
   2 * Converts filenames from decomposed unicode into precomposed unicode.
   3 * Used on MacOS X.
   4 */
   5
   6#define PRECOMPOSE_UNICODE_C
   7
   8#include "cache.h"
   9#include "utf8.h"
  10#include "precompose_utf8.h"
  11
  12typedef char *iconv_ibp;
  13static const char *repo_encoding = "UTF-8";
  14static const char *path_encoding = "UTF-8-MAC";
  15
  16static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
  17{
  18        const uint8_t *ptr = (const uint8_t *)s;
  19        size_t strlen_chars = 0;
  20        size_t ret = 0;
  21
  22        if (!ptr || !*ptr)
  23                return 0;
  24
  25        while (*ptr && maxlen) {
  26                if (*ptr & 0x80)
  27                        ret++;
  28                strlen_chars++;
  29                ptr++;
  30                maxlen--;
  31        }
  32        if (strlen_c)
  33                *strlen_c = strlen_chars;
  34
  35        return ret;
  36}
  37
  38
  39void probe_utf8_pathname_composition(void)
  40{
  41        struct strbuf path = STRBUF_INIT;
  42        static const char *auml_nfc = "\xc3\xa4";
  43        static const char *auml_nfd = "\x61\xcc\x88";
  44        int output_fd;
  45        if (precomposed_unicode != -1)
  46                return; /* We found it defined in the global config, respect it */
  47        git_path_buf(&path, "%s", auml_nfc);
  48        output_fd = open(path.buf, O_CREAT|O_EXCL|O_RDWR, 0600);
  49        if (output_fd >= 0) {
  50                close(output_fd);
  51                git_path_buf(&path, "%s", auml_nfd);
  52                precomposed_unicode = access(path.buf, R_OK) ? 0 : 1;
  53                git_config_set("core.precomposeunicode", precomposed_unicode ? "true" : "false");
  54                git_path_buf(&path, "%s", auml_nfc);
  55                if (unlink(path.buf))
  56                        die_errno(_("failed to unlink '%s'"), path.buf);
  57        }
  58        strbuf_release(&path);
  59}
  60
  61
  62void precompose_argv(int argc, const char **argv)
  63{
  64        int i = 0;
  65        const char *oldarg;
  66        char *newarg;
  67        iconv_t ic_precompose;
  68
  69        if (precomposed_unicode != 1)
  70                return;
  71
  72        ic_precompose = iconv_open(repo_encoding, path_encoding);
  73        if (ic_precompose == (iconv_t) -1)
  74                return;
  75
  76        while (i < argc) {
  77                size_t namelen;
  78                oldarg = argv[i];
  79                if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
  80                        newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, NULL);
  81                        if (newarg)
  82                                argv[i] = newarg;
  83                }
  84                i++;
  85        }
  86        iconv_close(ic_precompose);
  87}
  88
  89
  90PREC_DIR *precompose_utf8_opendir(const char *dirname)
  91{
  92        PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR));
  93        prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx));
  94        prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name);
  95
  96        prec_dir->dirp = opendir(dirname);
  97        if (!prec_dir->dirp) {
  98                free(prec_dir->dirent_nfc);
  99                free(prec_dir);
 100                return NULL;
 101        } else {
 102                int ret_errno = errno;
 103                prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding);
 104                /* if iconv_open() fails, die() in readdir() if needed */
 105                errno = ret_errno;
 106        }
 107
 108        return prec_dir;
 109}
 110
 111struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir)
 112{
 113        struct dirent *res;
 114        res = readdir(prec_dir->dirp);
 115        if (res) {
 116                size_t namelenz = strlen(res->d_name) + 1; /* \0 */
 117                size_t new_maxlen = namelenz;
 118
 119                int ret_errno = errno;
 120
 121                if (new_maxlen > prec_dir->dirent_nfc->max_name_len) {
 122                        size_t new_len = sizeof(dirent_prec_psx) + new_maxlen -
 123                                sizeof(prec_dir->dirent_nfc->d_name);
 124
 125                        prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len);
 126                        prec_dir->dirent_nfc->max_name_len = new_maxlen;
 127                }
 128
 129                prec_dir->dirent_nfc->d_ino  = res->d_ino;
 130                prec_dir->dirent_nfc->d_type = res->d_type;
 131
 132                if ((precomposed_unicode == 1) && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
 133                        if (prec_dir->ic_precompose == (iconv_t)-1) {
 134                                die("iconv_open(%s,%s) failed, but needed:\n"
 135                                                "    precomposed unicode is not supported.\n"
 136                                                "    If you want to use decomposed unicode, run\n"
 137                                                "    \"git config core.precomposeunicode false\"\n",
 138                                                repo_encoding, path_encoding);
 139                        } else {
 140                                iconv_ibp       cp = (iconv_ibp)res->d_name;
 141                                size_t inleft = namelenz;
 142                                char *outpos = &prec_dir->dirent_nfc->d_name[0];
 143                                size_t outsz = prec_dir->dirent_nfc->max_name_len;
 144                                errno = 0;
 145                                iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz);
 146                                if (errno || inleft) {
 147                                        /*
 148                                         * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF
 149                                         * MacOS X avoids illegal byte sequemces.
 150                                         * If they occur on a mounted drive (e.g. NFS) it is not worth to
 151                                         * die() for that, but rather let the user see the original name
 152                                        */
 153                                        namelenz = 0; /* trigger strlcpy */
 154                                }
 155                        }
 156                } else
 157                        namelenz = 0;
 158
 159                if (!namelenz)
 160                        strlcpy(prec_dir->dirent_nfc->d_name, res->d_name,
 161                                                        prec_dir->dirent_nfc->max_name_len);
 162
 163                errno = ret_errno;
 164                return prec_dir->dirent_nfc;
 165        }
 166        return NULL;
 167}
 168
 169
 170int precompose_utf8_closedir(PREC_DIR *prec_dir)
 171{
 172        int ret_value;
 173        int ret_errno;
 174        ret_value = closedir(prec_dir->dirp);
 175        ret_errno = errno;
 176        if (prec_dir->ic_precompose != (iconv_t)-1)
 177                iconv_close(prec_dir->ic_precompose);
 178        free(prec_dir->dirent_nfc);
 179        free(prec_dir);
 180        errno = ret_errno;
 181        return ret_value;
 182}