/* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */
+typedef unsigned int ucs_char_t; /* assuming 32bit int */
+
struct interval {
int first;
int last;
};
/* auxiliary function for binary search in interval table */
-static int bisearch(wchar_t ucs, const struct interval *table, int max) {
+static int bisearch(ucs_char_t ucs, const struct interval *table, int max) {
int min = 0;
int mid;
* ISO 8859-1 and WGL4 characters, Unicode control characters,
* etc.) have a column width of 1.
*
- * This implementation assumes that wchar_t characters are encoded
+ * This implementation assumes that ucs_char_t characters are encoded
* in ISO 10646.
*/
-static int wcwidth(wchar_t ch)
+static int wcwidth(ucs_char_t ch)
{
/*
* Sorted list of non-overlapping intervals of non-spacing characters,
int utf8_width(const char **start)
{
unsigned char *s = (unsigned char *)*start;
- wchar_t ch;
+ ucs_char_t ch;
if (*s < 0x80) {
/* 0xxxxxxx */
/*
* Wrap the text, if necessary. The variable indent is the indent for the
* first line, indent2 is the indent for all other lines.
+ * If indent is negative, assume that already -indent columns have been
+ * consumed (and no extra indent is necessary for the first line).
*/
-void print_wrapped_text(const char *text, int indent, int indent2, int width)
+int print_wrapped_text(const char *text, int indent, int indent2, int width)
{
int w = indent, assume_utf8 = is_utf8(text);
const char *bol = text, *space = NULL;
+ if (indent < 0) {
+ w = -indent;
+ space = text;
+ }
+
for (;;) {
char c = *text;
if (!c || isspace(c)) {
else
print_spaces(indent);
fwrite(start, text - start, 1, stdout);
- if (!c) {
- putchar('\n');
- return;
- } else if (c == '\t')
+ if (!c)
+ return w;
+ else if (c == '\t')
w |= 0x07;
space = text;
w++;
}
else {
putchar('\n');
- text = bol = space + 1;
+ text = bol = space + isspace(*space);
space = NULL;
w = indent = indent2;
}
text++;
}
}
+ return w;
+}
+
+int is_encoding_utf8(const char *name)
+{
+ if (!name)
+ return 1;
+ if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8"))
+ return 1;
+ return 0;
+}
+
+/*
+ * Given a buffer and its encoding, return it re-encoded
+ * with iconv. If the conversion fails, returns NULL.
+ */
+#ifndef NO_ICONV
+#ifdef OLD_ICONV
+ typedef const char * iconv_ibp;
+#else
+ typedef char * iconv_ibp;
+#endif
+char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
+{
+ iconv_t conv;
+ size_t insz, outsz, outalloc;
+ char *out, *outpos;
+ iconv_ibp cp;
+
+ if (!in_encoding)
+ return NULL;
+ conv = iconv_open(out_encoding, in_encoding);
+ if (conv == (iconv_t) -1)
+ return NULL;
+ insz = strlen(in);
+ outsz = insz;
+ outalloc = outsz + 1; /* for terminating NUL */
+ out = xmalloc(outalloc);
+ outpos = out;
+ cp = (iconv_ibp)in;
+
+ while (1) {
+ size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz);
+
+ if (cnt == -1) {
+ size_t sofar;
+ if (errno != E2BIG) {
+ free(out);
+ iconv_close(conv);
+ return NULL;
+ }
+ /* insz has remaining number of bytes.
+ * since we started outsz the same as insz,
+ * it is likely that insz is not enough for
+ * converting the rest.
+ */
+ sofar = outpos - out;
+ outalloc = sofar + insz * 2 + 32;
+ out = xrealloc(out, outalloc);
+ outpos = out + sofar;
+ outsz = outalloc - sofar - 1;
+ }
+ else {
+ *outpos = '\0';
+ break;
+ }
+ }
+ iconv_close(conv);
+ return out;
}
+#endif