};
/* auxiliary function for binary search in interval table */
-static int bisearch(wchar_t ucs, const struct interval *table, int max) {
+static int bisearch(ucs_char_t ucs, const struct interval *table, int max)
+{
int min = 0;
int mid;
* ISO 8859-1 and WGL4 characters, Unicode control characters,
* etc.) have a column width of 1.
*
- * This implementation assumes that wchar_t characters are encoded
+ * This implementation assumes that ucs_char_t characters are encoded
* in ISO 10646.
*/
-static int wcwidth(wchar_t ch)
+static int git_wcwidth(ucs_char_t ch)
{
/*
* Sorted list of non-overlapping intervals of non-spacing characters,
}
/*
- * This function returns the number of columns occupied by the character
- * pointed to by the variable start. The pointer is updated to point at
- * the next character. If it was not valid UTF-8, the pointer is set to NULL.
+ * Pick one ucs character starting from the location *start points at,
+ * and return it, while updating the *start pointer to point at the
+ * end of that character. When remainder_p is not NULL, the location
+ * holds the number of bytes remaining in the string that we are allowed
+ * to pick from. Otherwise we are allowed to pick up to the NUL that
+ * would eventually appear in the string. *remainder_p is also reduced
+ * by the number of bytes we have consumed.
+ *
+ * If the string was not a valid UTF-8, *start pointer is set to NULL
+ * and the return value is undefined.
*/
-int utf8_width(const char **start)
+ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p)
{
unsigned char *s = (unsigned char *)*start;
- wchar_t ch;
+ ucs_char_t ch;
+ size_t remainder, incr;
- if (*s < 0x80) {
+ /*
+ * A caller that assumes NUL terminated text can choose
+ * not to bother with the remainder length. We will
+ * stop at the first NUL.
+ */
+ remainder = (remainder_p ? *remainder_p : 999);
+
+ if (remainder < 1) {
+ goto invalid;
+ } else if (*s < 0x80) {
/* 0xxxxxxx */
ch = *s;
- *start += 1;
+ incr = 1;
} else if ((s[0] & 0xe0) == 0xc0) {
/* 110XXXXx 10xxxxxx */
- if ((s[1] & 0xc0) != 0x80 ||
- /* overlong? */
- (s[0] & 0xfe) == 0xc0)
+ if (remainder < 2 ||
+ (s[1] & 0xc0) != 0x80 ||
+ (s[0] & 0xfe) == 0xc0)
goto invalid;
ch = ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
- *start += 2;
+ incr = 2;
} else if ((s[0] & 0xf0) == 0xe0) {
/* 1110XXXX 10Xxxxxx 10xxxxxx */
- if ((s[1] & 0xc0) != 0x80 ||
- (s[2] & 0xc0) != 0x80 ||
- /* overlong? */
- (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) ||
- /* surrogate? */
- (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) ||
- /* U+FFFE or U+FFFF? */
- (s[0] == 0xef && s[1] == 0xbf &&
- (s[2] & 0xfe) == 0xbe))
+ if (remainder < 3 ||
+ (s[1] & 0xc0) != 0x80 ||
+ (s[2] & 0xc0) != 0x80 ||
+ /* overlong? */
+ (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) ||
+ /* surrogate? */
+ (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) ||
+ /* U+FFFE or U+FFFF? */
+ (s[0] == 0xef && s[1] == 0xbf &&
+ (s[2] & 0xfe) == 0xbe))
goto invalid;
ch = ((s[0] & 0x0f) << 12) |
((s[1] & 0x3f) << 6) | (s[2] & 0x3f);
- *start += 3;
+ incr = 3;
} else if ((s[0] & 0xf8) == 0xf0) {
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
- if ((s[1] & 0xc0) != 0x80 ||
- (s[2] & 0xc0) != 0x80 ||
- (s[3] & 0xc0) != 0x80 ||
- /* overlong? */
- (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) ||
- /* > U+10FFFF? */
- (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4)
+ if (remainder < 4 ||
+ (s[1] & 0xc0) != 0x80 ||
+ (s[2] & 0xc0) != 0x80 ||
+ (s[3] & 0xc0) != 0x80 ||
+ /* overlong? */
+ (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) ||
+ /* > U+10FFFF? */
+ (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4)
goto invalid;
ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3f) << 12) |
((s[2] & 0x3f) << 6) | (s[3] & 0x3f);
- *start += 4;
+ incr = 4;
} else {
invalid:
*start = NULL;
return 0;
}
- return wcwidth(ch);
+ *start += incr;
+ if (remainder_p)
+ *remainder_p = remainder - incr;
+ return ch;
+}
+
+/*
+ * This function returns the number of columns occupied by the character
+ * pointed to by the variable start. The pointer is updated to point at
+ * the next character. When remainder_p is not NULL, it points at the
+ * location that stores the number of remaining bytes we can use to pick
+ * a character (see pick_one_utf8_char() above).
+ */
+int utf8_width(const char **start, size_t *remainder_p)
+{
+ ucs_char_t ch = pick_one_utf8_char(start, remainder_p);
+ if (!*start)
+ return 0;
+ return git_wcwidth(ch);
}
int is_utf8(const char *text)
text++;
continue;
}
- utf8_width(&text);
+ utf8_width(&text, NULL);
if (!text)
return 0;
}
/*
* Wrap the text, if necessary. The variable indent is the indent for the
* first line, indent2 is the indent for all other lines.
+ * If indent is negative, assume that already -indent columns have been
+ * consumed (and no extra indent is necessary for the first line).
*/
-void print_wrapped_text(const char *text, int indent, int indent2, int width)
+int print_wrapped_text(const char *text, int indent, int indent2, int width)
{
int w = indent, assume_utf8 = is_utf8(text);
const char *bol = text, *space = NULL;
+ if (indent < 0) {
+ w = -indent;
+ space = text;
+ }
+
for (;;) {
char c = *text;
if (!c || isspace(c)) {
else
print_spaces(indent);
fwrite(start, text - start, 1, stdout);
- if (!c) {
- putchar('\n');
- return;
- } else if (c == '\t')
+ if (!c)
+ return w;
+ else if (c == '\t')
w |= 0x07;
space = text;
w++;
}
else {
putchar('\n');
- text = bol = space + 1;
+ text = bol = space + isspace(*space);
space = NULL;
w = indent = indent2;
}
continue;
}
if (assume_utf8)
- w += utf8_width(&text);
+ w += utf8_width(&text, NULL);
else {
w++;
text++;
}
}
}
+
+int is_encoding_utf8(const char *name)
+{
+ if (!name)
+ return 1;
+ if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8"))
+ return 1;
+ return 0;
+}
+
+/*
+ * Given a buffer and its encoding, return it re-encoded
+ * with iconv. If the conversion fails, returns NULL.
+ */
+#ifndef NO_ICONV
+#ifdef OLD_ICONV
+ typedef const char * iconv_ibp;
+#else
+ typedef char * iconv_ibp;
+#endif
+char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
+{
+ iconv_t conv;
+ size_t insz, outsz, outalloc;
+ char *out, *outpos;
+ iconv_ibp cp;
+
+ if (!in_encoding)
+ return NULL;
+ conv = iconv_open(out_encoding, in_encoding);
+ if (conv == (iconv_t) -1)
+ return NULL;
+ insz = strlen(in);
+ outsz = insz;
+ outalloc = outsz + 1; /* for terminating NUL */
+ out = xmalloc(outalloc);
+ outpos = out;
+ cp = (iconv_ibp)in;
+
+ while (1) {
+ size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz);
+
+ if (cnt == -1) {
+ size_t sofar;
+ if (errno != E2BIG) {
+ free(out);
+ iconv_close(conv);
+ return NULL;
+ }
+ /* insz has remaining number of bytes.
+ * since we started outsz the same as insz,
+ * it is likely that insz is not enough for
+ * converting the rest.
+ */
+ sofar = outpos - out;
+ outalloc = sofar + insz * 2 + 32;
+ out = xrealloc(out, outalloc);
+ outpos = out + sofar;
+ outsz = outalloc - sofar - 1;
+ }
+ else {
+ *outpos = '\0';
+ break;
+ }
+ }
+ iconv_close(conv);
+ return out;
+}
+#endif