1#include "test-tool.h"
2
3static const char *utf8_replace_character = "�";
4
5/*
6 * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
7 * in an XML file.
8 */
9int cmd__xml_encode(int argc, const char **argv)
10{
11 unsigned char buf[1024], tmp[4], *tmp2 = NULL;
12 ssize_t cur = 0, len = 1, remaining = 0;
13 unsigned char ch;
14
15 for (;;) {
16 if (++cur == len) {
17 len = xread(0, buf, sizeof(buf));
18 if (!len)
19 return 0;
20 if (len < 0)
21 die_errno("Could not read <stdin>");
22 cur = 0;
23 }
24 ch = buf[cur];
25
26 if (tmp2) {
27 if ((ch & 0xc0) != 0x80) {
28 fputs(utf8_replace_character, stdout);
29 tmp2 = NULL;
30 cur--;
31 continue;
32 }
33 *tmp2 = ch;
34 tmp2++;
35 if (--remaining == 0) {
36 fwrite(tmp, tmp2 - tmp, 1, stdout);
37 tmp2 = NULL;
38 }
39 continue;
40 }
41
42 if (!(ch & 0x80)) {
43 /* 0xxxxxxx */
44 if (ch == '&')
45 fputs("&", stdout);
46 else if (ch == '\'')
47 fputs("'", stdout);
48 else if (ch == '"')
49 fputs(""", stdout);
50 else if (ch == '<')
51 fputs("<", stdout);
52 else if (ch == '>')
53 fputs(">", stdout);
54 else if (ch >= 0x20)
55 fputc(ch, stdout);
56 else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
57 fprintf(stdout, "&#x%02x;", ch);
58 else
59 fputs(utf8_replace_character, stdout);
60 } else if ((ch & 0xe0) == 0xc0) {
61 /* 110XXXXx 10xxxxxx */
62 tmp[0] = ch;
63 remaining = 1;
64 tmp2 = tmp + 1;
65 } else if ((ch & 0xf0) == 0xe0) {
66 /* 1110XXXX 10Xxxxxx 10xxxxxx */
67 tmp[0] = ch;
68 remaining = 2;
69 tmp2 = tmp + 1;
70 } else if ((ch & 0xf8) == 0xf0) {
71 /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
72 tmp[0] = ch;
73 remaining = 3;
74 tmp2 = tmp + 1;
75 } else
76 fputs(utf8_replace_character, stdout);
77 }
78
79 return 0;
80}