Win32: add Unicode conversion functions
[gitweb.git] / compat / mingw.c
index fecb98bcff74d2cbcb56e8f9f56d1f3b06339469..6f1fb108e9968ecc984c838cdeafc30bfb512842 100644 (file)
@@ -304,23 +304,6 @@ int mingw_open (const char *filename, int oflags, ...)
        return fd;
 }
 
-#undef write
-ssize_t mingw_write(int fd, const void *buf, size_t count)
-{
-       /*
-        * While write() calls to a file on a local disk are translated
-        * into WriteFile() calls with a maximum size of 64KB on Windows
-        * XP and 256KB on Vista, no such cap is placed on writes to
-        * files over the network on Windows XP.  Unfortunately, there
-        * seems to be a limit of 32MB-28KB on X64 and 64MB-32KB on x86;
-        * bigger writes fail on Windows XP.
-        * So we cap to a nice 31MB here to avoid write failures over
-        * the net without changing the number of WriteFile() calls in
-        * the local case.
-        */
-       return write(fd, buf, min(count, 31 * 1024 * 1024));
-}
-
 static BOOL WINAPI ctrl_ignore(DWORD type)
 {
        return TRUE;
@@ -1840,3 +1823,127 @@ pid_t waitpid(pid_t pid, int *status, int options)
        errno = EINVAL;
        return -1;
 }
+
+int mingw_offset_1st_component(const char *path)
+{
+       int offset = 0;
+       if (has_dos_drive_prefix(path))
+               offset = 2;
+
+       /* unc paths */
+       else if (is_dir_sep(path[0]) && is_dir_sep(path[1])) {
+
+               /* skip server name */
+               char *pos = strpbrk(path + 2, "\\/");
+               if (!pos)
+                       return 0; /* Error: malformed unc path */
+
+               do {
+                       pos++;
+               } while (*pos && !is_dir_sep(*pos));
+
+               offset = pos - path;
+       }
+
+       return offset + is_dir_sep(path[offset]);
+}
+
+int xutftowcsn(wchar_t *wcs, const char *utfs, size_t wcslen, int utflen)
+{
+       int upos = 0, wpos = 0;
+       const unsigned char *utf = (const unsigned char*) utfs;
+       if (!utf || !wcs || wcslen < 1) {
+               errno = EINVAL;
+               return -1;
+       }
+       /* reserve space for \0 */
+       wcslen--;
+       if (utflen < 0)
+               utflen = INT_MAX;
+
+       while (upos < utflen) {
+               int c = utf[upos++] & 0xff;
+               if (utflen == INT_MAX && c == 0)
+                       break;
+
+               if (wpos >= wcslen) {
+                       wcs[wpos] = 0;
+                       errno = ERANGE;
+                       return -1;
+               }
+
+               if (c < 0x80) {
+                       /* ASCII */
+                       wcs[wpos++] = c;
+               } else if (c >= 0xc2 && c < 0xe0 && upos < utflen &&
+                               (utf[upos] & 0xc0) == 0x80) {
+                       /* 2-byte utf-8 */
+                       c = ((c & 0x1f) << 6);
+                       c |= (utf[upos++] & 0x3f);
+                       wcs[wpos++] = c;
+               } else if (c >= 0xe0 && c < 0xf0 && upos + 1 < utflen &&
+                               !(c == 0xe0 && utf[upos] < 0xa0) && /* over-long encoding */
+                               (utf[upos] & 0xc0) == 0x80 &&
+                               (utf[upos + 1] & 0xc0) == 0x80) {
+                       /* 3-byte utf-8 */
+                       c = ((c & 0x0f) << 12);
+                       c |= ((utf[upos++] & 0x3f) << 6);
+                       c |= (utf[upos++] & 0x3f);
+                       wcs[wpos++] = c;
+               } else if (c >= 0xf0 && c < 0xf5 && upos + 2 < utflen &&
+                               wpos + 1 < wcslen &&
+                               !(c == 0xf0 && utf[upos] < 0x90) && /* over-long encoding */
+                               !(c == 0xf4 && utf[upos] >= 0x90) && /* > \u10ffff */
+                               (utf[upos] & 0xc0) == 0x80 &&
+                               (utf[upos + 1] & 0xc0) == 0x80 &&
+                               (utf[upos + 2] & 0xc0) == 0x80) {
+                       /* 4-byte utf-8: convert to \ud8xx \udcxx surrogate pair */
+                       c = ((c & 0x07) << 18);
+                       c |= ((utf[upos++] & 0x3f) << 12);
+                       c |= ((utf[upos++] & 0x3f) << 6);
+                       c |= (utf[upos++] & 0x3f);
+                       c -= 0x10000;
+                       wcs[wpos++] = 0xd800 | (c >> 10);
+                       wcs[wpos++] = 0xdc00 | (c & 0x3ff);
+               } else if (c >= 0xa0) {
+                       /* invalid utf-8 byte, printable unicode char: convert 1:1 */
+                       wcs[wpos++] = c;
+               } else {
+                       /* invalid utf-8 byte, non-printable unicode: convert to hex */
+                       static const char *hex = "0123456789abcdef";
+                       wcs[wpos++] = hex[c >> 4];
+                       if (wpos < wcslen)
+                               wcs[wpos++] = hex[c & 0x0f];
+               }
+       }
+       wcs[wpos] = 0;
+       return wpos;
+}
+
+int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen)
+{
+       if (!wcs || !utf || utflen < 1) {
+               errno = EINVAL;
+               return -1;
+       }
+       utflen = WideCharToMultiByte(CP_UTF8, 0, wcs, -1, utf, utflen, NULL, NULL);
+       if (utflen)
+               return utflen - 1;
+       errno = ERANGE;
+       return -1;
+}
+
+void mingw_startup()
+{
+       /* copy executable name to argv[0] */
+       __argv[0] = xstrdup(_pgmptr);
+
+       /* initialize critical section for waitpid pinfo_t list */
+       InitializeCriticalSection(&pinfo_cs);
+
+       /* set up default file mode and file modes for stdin/out/err */
+       _fmode = _O_BINARY;
+       _setmode(_fileno(stdin), _O_BINARY);
+       _setmode(_fileno(stdout), _O_BINARY);
+       _setmode(_fileno(stderr), _O_BINARY);
+}