compat / precompose_utf8.con commit Merge branch 'jc/denoise-rm-to-resolve' (5e9d978)
   1/*
   2 * Converts filenames from decomposed unicode into precomposed unicode.
   3 * Used on MacOS X.
   4 */
   5
   6#define PRECOMPOSE_UNICODE_C
   7
   8#include "cache.h"
   9#include "config.h"
  10#include "utf8.h"
  11#include "precompose_utf8.h"
  12
  13typedef char *iconv_ibp;
  14static const char *repo_encoding = "UTF-8";
  15static const char *path_encoding = "UTF-8-MAC";
  16
  17static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
  18{
  19        const uint8_t *ptr = (const uint8_t *)s;
  20        size_t strlen_chars = 0;
  21        size_t ret = 0;
  22
  23        if (!ptr || !*ptr)
  24                return 0;
  25
  26        while (*ptr && maxlen) {
  27                if (*ptr & 0x80)
  28                        ret++;
  29                strlen_chars++;
  30                ptr++;
  31                maxlen--;
  32        }
  33        if (strlen_c)
  34                *strlen_c = strlen_chars;
  35
  36        return ret;
  37}
  38
  39
  40void probe_utf8_pathname_composition(void)
  41{
  42        struct strbuf path = STRBUF_INIT;
  43        static const char *auml_nfc = "\xc3\xa4";
  44        static const char *auml_nfd = "\x61\xcc\x88";
  45        int output_fd;
  46        if (precomposed_unicode != -1)
  47                return; /* We found it defined in the global config, respect it */
  48        git_path_buf(&path, "%s", auml_nfc);
  49        output_fd = open(path.buf, O_CREAT|O_EXCL|O_RDWR, 0600);
  50        if (output_fd >= 0) {
  51                close(output_fd);
  52                git_path_buf(&path, "%s", auml_nfd);
  53                precomposed_unicode = access(path.buf, R_OK) ? 0 : 1;
  54                git_config_set("core.precomposeunicode",
  55                               precomposed_unicode ? "true" : "false");
  56                git_path_buf(&path, "%s", auml_nfc);
  57                if (unlink(path.buf))
  58                        die_errno(_("failed to unlink '%s'"), path.buf);
  59        }
  60        strbuf_release(&path);
  61}
  62
  63
  64void precompose_argv(int argc, const char **argv)
  65{
  66        int i = 0;
  67        const char *oldarg;
  68        char *newarg;
  69        iconv_t ic_precompose;
  70
  71        if (precomposed_unicode != 1)
  72                return;
  73
  74        ic_precompose = iconv_open(repo_encoding, path_encoding);
  75        if (ic_precompose == (iconv_t) -1)
  76                return;
  77
  78        while (i < argc) {
  79                size_t namelen;
  80                oldarg = argv[i];
  81                if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
  82                        newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, 0, NULL);
  83                        if (newarg)
  84                                argv[i] = newarg;
  85                }
  86                i++;
  87        }
  88        iconv_close(ic_precompose);
  89}
  90
  91
  92PREC_DIR *precompose_utf8_opendir(const char *dirname)
  93{
  94        PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR));
  95        prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx));
  96        prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name);
  97
  98        prec_dir->dirp = opendir(dirname);
  99        if (!prec_dir->dirp) {
 100                free(prec_dir->dirent_nfc);
 101                free(prec_dir);
 102                return NULL;
 103        } else {
 104                int ret_errno = errno;
 105                prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding);
 106                /* if iconv_open() fails, die() in readdir() if needed */
 107                errno = ret_errno;
 108        }
 109
 110        return prec_dir;
 111}
 112
 113struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir)
 114{
 115        struct dirent *res;
 116        res = readdir(prec_dir->dirp);
 117        if (res) {
 118                size_t namelenz = strlen(res->d_name) + 1; /* \0 */
 119                size_t new_maxlen = namelenz;
 120
 121                int ret_errno = errno;
 122
 123                if (new_maxlen > prec_dir->dirent_nfc->max_name_len) {
 124                        size_t new_len = sizeof(dirent_prec_psx) + new_maxlen -
 125                                sizeof(prec_dir->dirent_nfc->d_name);
 126
 127                        prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len);
 128                        prec_dir->dirent_nfc->max_name_len = new_maxlen;
 129                }
 130
 131                prec_dir->dirent_nfc->d_ino  = res->d_ino;
 132                prec_dir->dirent_nfc->d_type = res->d_type;
 133
 134                if ((precomposed_unicode == 1) && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
 135                        if (prec_dir->ic_precompose == (iconv_t)-1) {
 136                                die("iconv_open(%s,%s) failed, but needed:\n"
 137                                                "    precomposed unicode is not supported.\n"
 138                                                "    If you want to use decomposed unicode, run\n"
 139                                                "    \"git config core.precomposeunicode false\"\n",
 140                                                repo_encoding, path_encoding);
 141                        } else {
 142                                iconv_ibp       cp = (iconv_ibp)res->d_name;
 143                                size_t inleft = namelenz;
 144                                char *outpos = &prec_dir->dirent_nfc->d_name[0];
 145                                size_t outsz = prec_dir->dirent_nfc->max_name_len;
 146                                errno = 0;
 147                                iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz);
 148                                if (errno || inleft) {
 149                                        /*
 150                                         * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF
 151                                         * MacOS X avoids illegal byte sequences.
 152                                         * If they occur on a mounted drive (e.g. NFS) it is not worth to
 153                                         * die() for that, but rather let the user see the original name
 154                                        */
 155                                        namelenz = 0; /* trigger strlcpy */
 156                                }
 157                        }
 158                } else
 159                        namelenz = 0;
 160
 161                if (!namelenz)
 162                        strlcpy(prec_dir->dirent_nfc->d_name, res->d_name,
 163                                                        prec_dir->dirent_nfc->max_name_len);
 164
 165                errno = ret_errno;
 166                return prec_dir->dirent_nfc;
 167        }
 168        return NULL;
 169}
 170
 171
 172int precompose_utf8_closedir(PREC_DIR *prec_dir)
 173{
 174        int ret_value;
 175        int ret_errno;
 176        ret_value = closedir(prec_dir->dirp);
 177        ret_errno = errno;
 178        if (prec_dir->ic_precompose != (iconv_t)-1)
 179                iconv_close(prec_dir->ic_precompose);
 180        free(prec_dir->dirent_nfc);
 181        free(prec_dir);
 182        errno = ret_errno;
 183        return ret_value;
 184}