compat / precompose_utf8.con commit Merge branch 'ls/p4-empty-file-on-lfs' into maint (cf479b4)
   1/*
   2 * Converts filenames from decomposed unicode into precomposed unicode.
   3 * Used on MacOS X.
   4 */
   5
   6#define PRECOMPOSE_UNICODE_C
   7
   8#include "cache.h"
   9#include "utf8.h"
  10#include "precompose_utf8.h"
  11
  12typedef char *iconv_ibp;
  13static const char *repo_encoding = "UTF-8";
  14static const char *path_encoding = "UTF-8-MAC";
  15
  16static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
  17{
  18        const uint8_t *ptr = (const uint8_t *)s;
  19        size_t strlen_chars = 0;
  20        size_t ret = 0;
  21
  22        if (!ptr || !*ptr)
  23                return 0;
  24
  25        while (*ptr && maxlen) {
  26                if (*ptr & 0x80)
  27                        ret++;
  28                strlen_chars++;
  29                ptr++;
  30                maxlen--;
  31        }
  32        if (strlen_c)
  33                *strlen_c = strlen_chars;
  34
  35        return ret;
  36}
  37
  38
  39void probe_utf8_pathname_composition(void)
  40{
  41        struct strbuf path = STRBUF_INIT;
  42        static const char *auml_nfc = "\xc3\xa4";
  43        static const char *auml_nfd = "\x61\xcc\x88";
  44        int output_fd;
  45        if (precomposed_unicode != -1)
  46                return; /* We found it defined in the global config, respect it */
  47        git_path_buf(&path, "%s", auml_nfc);
  48        output_fd = open(path.buf, O_CREAT|O_EXCL|O_RDWR, 0600);
  49        if (output_fd >= 0) {
  50                close(output_fd);
  51                git_path_buf(&path, "%s", auml_nfd);
  52                precomposed_unicode = access(path.buf, R_OK) ? 0 : 1;
  53                git_config_set("core.precomposeunicode",
  54                               precomposed_unicode ? "true" : "false");
  55                git_path_buf(&path, "%s", auml_nfc);
  56                if (unlink(path.buf))
  57                        die_errno(_("failed to unlink '%s'"), path.buf);
  58        }
  59        strbuf_release(&path);
  60}
  61
  62
  63void precompose_argv(int argc, const char **argv)
  64{
  65        int i = 0;
  66        const char *oldarg;
  67        char *newarg;
  68        iconv_t ic_precompose;
  69
  70        if (precomposed_unicode != 1)
  71                return;
  72
  73        ic_precompose = iconv_open(repo_encoding, path_encoding);
  74        if (ic_precompose == (iconv_t) -1)
  75                return;
  76
  77        while (i < argc) {
  78                size_t namelen;
  79                oldarg = argv[i];
  80                if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
  81                        newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, NULL);
  82                        if (newarg)
  83                                argv[i] = newarg;
  84                }
  85                i++;
  86        }
  87        iconv_close(ic_precompose);
  88}
  89
  90
  91PREC_DIR *precompose_utf8_opendir(const char *dirname)
  92{
  93        PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR));
  94        prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx));
  95        prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name);
  96
  97        prec_dir->dirp = opendir(dirname);
  98        if (!prec_dir->dirp) {
  99                free(prec_dir->dirent_nfc);
 100                free(prec_dir);
 101                return NULL;
 102        } else {
 103                int ret_errno = errno;
 104                prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding);
 105                /* if iconv_open() fails, die() in readdir() if needed */
 106                errno = ret_errno;
 107        }
 108
 109        return prec_dir;
 110}
 111
 112struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir)
 113{
 114        struct dirent *res;
 115        res = readdir(prec_dir->dirp);
 116        if (res) {
 117                size_t namelenz = strlen(res->d_name) + 1; /* \0 */
 118                size_t new_maxlen = namelenz;
 119
 120                int ret_errno = errno;
 121
 122                if (new_maxlen > prec_dir->dirent_nfc->max_name_len) {
 123                        size_t new_len = sizeof(dirent_prec_psx) + new_maxlen -
 124                                sizeof(prec_dir->dirent_nfc->d_name);
 125
 126                        prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len);
 127                        prec_dir->dirent_nfc->max_name_len = new_maxlen;
 128                }
 129
 130                prec_dir->dirent_nfc->d_ino  = res->d_ino;
 131                prec_dir->dirent_nfc->d_type = res->d_type;
 132
 133                if ((precomposed_unicode == 1) && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
 134                        if (prec_dir->ic_precompose == (iconv_t)-1) {
 135                                die("iconv_open(%s,%s) failed, but needed:\n"
 136                                                "    precomposed unicode is not supported.\n"
 137                                                "    If you want to use decomposed unicode, run\n"
 138                                                "    \"git config core.precomposeunicode false\"\n",
 139                                                repo_encoding, path_encoding);
 140                        } else {
 141                                iconv_ibp       cp = (iconv_ibp)res->d_name;
 142                                size_t inleft = namelenz;
 143                                char *outpos = &prec_dir->dirent_nfc->d_name[0];
 144                                size_t outsz = prec_dir->dirent_nfc->max_name_len;
 145                                errno = 0;
 146                                iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz);
 147                                if (errno || inleft) {
 148                                        /*
 149                                         * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF
 150                                         * MacOS X avoids illegal byte sequences.
 151                                         * If they occur on a mounted drive (e.g. NFS) it is not worth to
 152                                         * die() for that, but rather let the user see the original name
 153                                        */
 154                                        namelenz = 0; /* trigger strlcpy */
 155                                }
 156                        }
 157                } else
 158                        namelenz = 0;
 159
 160                if (!namelenz)
 161                        strlcpy(prec_dir->dirent_nfc->d_name, res->d_name,
 162                                                        prec_dir->dirent_nfc->max_name_len);
 163
 164                errno = ret_errno;
 165                return prec_dir->dirent_nfc;
 166        }
 167        return NULL;
 168}
 169
 170
 171int precompose_utf8_closedir(PREC_DIR *prec_dir)
 172{
 173        int ret_value;
 174        int ret_errno;
 175        ret_value = closedir(prec_dir->dirp);
 176        ret_errno = errno;
 177        if (prec_dir->ic_precompose != (iconv_t)-1)
 178                iconv_close(prec_dir->ic_precompose);
 179        free(prec_dir->dirent_nfc);
 180        free(prec_dir);
 181        errno = ret_errno;
 182        return ret_value;
 183}