compat / precompose_utf8.con commit Merge branch 'nd/attr-match-optim' (70d1825)
   1/*
   2 * Converts filenames from decomposed unicode into precomposed unicode.
   3 * Used on MacOS X.
   4 */
   5
   6#define PRECOMPOSE_UNICODE_C
   7
   8#include "cache.h"
   9#include "utf8.h"
  10#include "precompose_utf8.h"
  11
  12typedef char *iconv_ibp;
  13static const char *repo_encoding = "UTF-8";
  14static const char *path_encoding = "UTF-8-MAC";
  15
  16static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
  17{
  18        const uint8_t *ptr = (const uint8_t *)s;
  19        size_t strlen_chars = 0;
  20        size_t ret = 0;
  21
  22        if (!ptr || !*ptr)
  23                return 0;
  24
  25        while (*ptr && maxlen) {
  26                if (*ptr & 0x80)
  27                        ret++;
  28                strlen_chars++;
  29                ptr++;
  30                maxlen--;
  31        }
  32        if (strlen_c)
  33                *strlen_c = strlen_chars;
  34
  35        return ret;
  36}
  37
  38
  39void probe_utf8_pathname_composition(char *path, int len)
  40{
  41        static const char *auml_nfc = "\xc3\xa4";
  42        static const char *auml_nfd = "\x61\xcc\x88";
  43        int output_fd;
  44        if (precomposed_unicode != -1)
  45                return; /* We found it defined in the global config, respect it */
  46        strcpy(path + len, auml_nfc);
  47        output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
  48        if (output_fd >= 0) {
  49                close(output_fd);
  50                strcpy(path + len, auml_nfd);
  51                /* Indicate to the user, that we can configure it to true */
  52                if (!access(path, R_OK))
  53                        git_config_set("core.precomposeunicode", "false");
  54                /* To be backward compatible, set precomposed_unicode to 0 */
  55                precomposed_unicode = 0;
  56                strcpy(path + len, auml_nfc);
  57                if (unlink(path))
  58                        die_errno(_("failed to unlink '%s'"), path);
  59        }
  60}
  61
  62
  63void precompose_argv(int argc, const char **argv)
  64{
  65        int i = 0;
  66        const char *oldarg;
  67        char *newarg;
  68        iconv_t ic_precompose;
  69
  70        if (precomposed_unicode != 1)
  71                return;
  72
  73        ic_precompose = iconv_open(repo_encoding, path_encoding);
  74        if (ic_precompose == (iconv_t) -1)
  75                return;
  76
  77        while (i < argc) {
  78                size_t namelen;
  79                oldarg = argv[i];
  80                if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
  81                        newarg = reencode_string_iconv(oldarg, namelen, ic_precompose);
  82                        if (newarg)
  83                                argv[i] = newarg;
  84                }
  85                i++;
  86        }
  87        iconv_close(ic_precompose);
  88}
  89
  90
  91PREC_DIR *precompose_utf8_opendir(const char *dirname)
  92{
  93        PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR));
  94        prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx));
  95        prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name);
  96
  97        prec_dir->dirp = opendir(dirname);
  98        if (!prec_dir->dirp) {
  99                free(prec_dir->dirent_nfc);
 100                free(prec_dir);
 101                return NULL;
 102        } else {
 103                int ret_errno = errno;
 104                prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding);
 105                /* if iconv_open() fails, die() in readdir() if needed */
 106                errno = ret_errno;
 107        }
 108
 109        return prec_dir;
 110}
 111
 112struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir)
 113{
 114        struct dirent *res;
 115        res = readdir(prec_dir->dirp);
 116        if (res) {
 117                size_t namelenz = strlen(res->d_name) + 1; /* \0 */
 118                size_t new_maxlen = namelenz;
 119
 120                int ret_errno = errno;
 121
 122                if (new_maxlen > prec_dir->dirent_nfc->max_name_len) {
 123                        size_t new_len = sizeof(dirent_prec_psx) + new_maxlen -
 124                                sizeof(prec_dir->dirent_nfc->d_name);
 125
 126                        prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len);
 127                        prec_dir->dirent_nfc->max_name_len = new_maxlen;
 128                }
 129
 130                prec_dir->dirent_nfc->d_ino  = res->d_ino;
 131                prec_dir->dirent_nfc->d_type = res->d_type;
 132
 133                if ((precomposed_unicode == 1) && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
 134                        if (prec_dir->ic_precompose == (iconv_t)-1) {
 135                                die("iconv_open(%s,%s) failed, but needed:\n"
 136                                                "    precomposed unicode is not supported.\n"
 137                                                "    If you wnat to use decomposed unicode, run\n"
 138                                                "    \"git config core.precomposeunicode false\"\n",
 139                                                repo_encoding, path_encoding);
 140                        } else {
 141                                iconv_ibp       cp = (iconv_ibp)res->d_name;
 142                                size_t inleft = namelenz;
 143                                char *outpos = &prec_dir->dirent_nfc->d_name[0];
 144                                size_t outsz = prec_dir->dirent_nfc->max_name_len;
 145                                size_t cnt;
 146                                errno = 0;
 147                                cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz);
 148                                if (errno || inleft) {
 149                                        /*
 150                                         * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF
 151                                         * MacOS X avoids illegal byte sequemces.
 152                                         * If they occur on a mounted drive (e.g. NFS) it is not worth to
 153                                         * die() for that, but rather let the user see the original name
 154                                        */
 155                                        namelenz = 0; /* trigger strlcpy */
 156                                }
 157                        }
 158                } else
 159                        namelenz = 0;
 160
 161                if (!namelenz)
 162                        strlcpy(prec_dir->dirent_nfc->d_name, res->d_name,
 163                                                        prec_dir->dirent_nfc->max_name_len);
 164
 165                errno = ret_errno;
 166                return prec_dir->dirent_nfc;
 167        }
 168        return NULL;
 169}
 170
 171
 172int precompose_utf8_closedir(PREC_DIR *prec_dir)
 173{
 174        int ret_value;
 175        int ret_errno;
 176        ret_value = closedir(prec_dir->dirp);
 177        ret_errno = errno;
 178        if (prec_dir->ic_precompose != (iconv_t)-1)
 179                iconv_close(prec_dir->ic_precompose);
 180        free(prec_dir->dirent_nfc);
 181        free(prec_dir);
 182        errno = ret_errno;
 183        return ret_value;
 184}