compat / precompose_utf8.con commit Merge branch 'dk/blame-janitorial' (6784fab)
   1/*
   2 * Converts filenames from decomposed unicode into precomposed unicode.
   3 * Used on MacOS X.
   4 */
   5
   6#define PRECOMPOSE_UNICODE_C
   7
   8#include "cache.h"
   9#include "utf8.h"
  10#include "precompose_utf8.h"
  11
  12typedef char *iconv_ibp;
  13static const char *repo_encoding = "UTF-8";
  14static const char *path_encoding = "UTF-8-MAC";
  15
  16static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
  17{
  18        const uint8_t *ptr = (const uint8_t *)s;
  19        size_t strlen_chars = 0;
  20        size_t ret = 0;
  21
  22        if (!ptr || !*ptr)
  23                return 0;
  24
  25        while (*ptr && maxlen) {
  26                if (*ptr & 0x80)
  27                        ret++;
  28                strlen_chars++;
  29                ptr++;
  30                maxlen--;
  31        }
  32        if (strlen_c)
  33                *strlen_c = strlen_chars;
  34
  35        return ret;
  36}
  37
  38
  39void probe_utf8_pathname_composition(char *path, int len)
  40{
  41        static const char *auml_nfc = "\xc3\xa4";
  42        static const char *auml_nfd = "\x61\xcc\x88";
  43        int output_fd;
  44        if (precomposed_unicode != -1)
  45                return; /* We found it defined in the global config, respect it */
  46        strcpy(path + len, auml_nfc);
  47        output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
  48        if (output_fd >= 0) {
  49                close(output_fd);
  50                strcpy(path + len, auml_nfd);
  51                precomposed_unicode = access(path, R_OK) ? 0 : 1;
  52                git_config_set("core.precomposeunicode", precomposed_unicode ? "true" : "false");
  53                strcpy(path + len, auml_nfc);
  54                if (unlink(path))
  55                        die_errno(_("failed to unlink '%s'"), path);
  56        }
  57}
  58
  59
  60void precompose_argv(int argc, const char **argv)
  61{
  62        int i = 0;
  63        const char *oldarg;
  64        char *newarg;
  65        iconv_t ic_precompose;
  66
  67        if (precomposed_unicode != 1)
  68                return;
  69
  70        ic_precompose = iconv_open(repo_encoding, path_encoding);
  71        if (ic_precompose == (iconv_t) -1)
  72                return;
  73
  74        while (i < argc) {
  75                size_t namelen;
  76                oldarg = argv[i];
  77                if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
  78                        newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, NULL);
  79                        if (newarg)
  80                                argv[i] = newarg;
  81                }
  82                i++;
  83        }
  84        iconv_close(ic_precompose);
  85}
  86
  87
  88PREC_DIR *precompose_utf8_opendir(const char *dirname)
  89{
  90        PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR));
  91        prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx));
  92        prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name);
  93
  94        prec_dir->dirp = opendir(dirname);
  95        if (!prec_dir->dirp) {
  96                free(prec_dir->dirent_nfc);
  97                free(prec_dir);
  98                return NULL;
  99        } else {
 100                int ret_errno = errno;
 101                prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding);
 102                /* if iconv_open() fails, die() in readdir() if needed */
 103                errno = ret_errno;
 104        }
 105
 106        return prec_dir;
 107}
 108
 109struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir)
 110{
 111        struct dirent *res;
 112        res = readdir(prec_dir->dirp);
 113        if (res) {
 114                size_t namelenz = strlen(res->d_name) + 1; /* \0 */
 115                size_t new_maxlen = namelenz;
 116
 117                int ret_errno = errno;
 118
 119                if (new_maxlen > prec_dir->dirent_nfc->max_name_len) {
 120                        size_t new_len = sizeof(dirent_prec_psx) + new_maxlen -
 121                                sizeof(prec_dir->dirent_nfc->d_name);
 122
 123                        prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len);
 124                        prec_dir->dirent_nfc->max_name_len = new_maxlen;
 125                }
 126
 127                prec_dir->dirent_nfc->d_ino  = res->d_ino;
 128                prec_dir->dirent_nfc->d_type = res->d_type;
 129
 130                if ((precomposed_unicode == 1) && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
 131                        if (prec_dir->ic_precompose == (iconv_t)-1) {
 132                                die("iconv_open(%s,%s) failed, but needed:\n"
 133                                                "    precomposed unicode is not supported.\n"
 134                                                "    If you want to use decomposed unicode, run\n"
 135                                                "    \"git config core.precomposeunicode false\"\n",
 136                                                repo_encoding, path_encoding);
 137                        } else {
 138                                iconv_ibp       cp = (iconv_ibp)res->d_name;
 139                                size_t inleft = namelenz;
 140                                char *outpos = &prec_dir->dirent_nfc->d_name[0];
 141                                size_t outsz = prec_dir->dirent_nfc->max_name_len;
 142                                size_t cnt;
 143                                errno = 0;
 144                                cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz);
 145                                if (errno || inleft) {
 146                                        /*
 147                                         * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF
 148                                         * MacOS X avoids illegal byte sequemces.
 149                                         * If they occur on a mounted drive (e.g. NFS) it is not worth to
 150                                         * die() for that, but rather let the user see the original name
 151                                        */
 152                                        namelenz = 0; /* trigger strlcpy */
 153                                }
 154                        }
 155                } else
 156                        namelenz = 0;
 157
 158                if (!namelenz)
 159                        strlcpy(prec_dir->dirent_nfc->d_name, res->d_name,
 160                                                        prec_dir->dirent_nfc->max_name_len);
 161
 162                errno = ret_errno;
 163                return prec_dir->dirent_nfc;
 164        }
 165        return NULL;
 166}
 167
 168
 169int precompose_utf8_closedir(PREC_DIR *prec_dir)
 170{
 171        int ret_value;
 172        int ret_errno;
 173        ret_value = closedir(prec_dir->dirp);
 174        ret_errno = errno;
 175        if (prec_dir->ic_precompose != (iconv_t)-1)
 176                iconv_close(prec_dir->ic_precompose);
 177        free(prec_dir->dirent_nfc);
 178        free(prec_dir);
 179        errno = ret_errno;
 180        return ret_value;
 181}