t / helper / test-xml-encode.con commit Merge branch 'bb/unicode-12' (26623f8)
   1#include "test-tool.h"
   2
   3static const char *utf8_replace_character = "�";
   4
   5/*
   6 * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
   7 * in an XML file.
   8 */
   9int cmd__xml_encode(int argc, const char **argv)
  10{
  11        unsigned char buf[1024], tmp[4], *tmp2 = NULL;
  12        ssize_t cur = 0, len = 1, remaining = 0;
  13        unsigned char ch;
  14
  15        for (;;) {
  16                if (++cur == len) {
  17                        len = xread(0, buf, sizeof(buf));
  18                        if (!len)
  19                                return 0;
  20                        if (len < 0)
  21                                die_errno("Could not read <stdin>");
  22                        cur = 0;
  23                }
  24                ch = buf[cur];
  25
  26                if (tmp2) {
  27                        if ((ch & 0xc0) != 0x80) {
  28                                fputs(utf8_replace_character, stdout);
  29                                tmp2 = NULL;
  30                                cur--;
  31                                continue;
  32                        }
  33                        *tmp2 = ch;
  34                        tmp2++;
  35                        if (--remaining == 0) {
  36                                fwrite(tmp, tmp2 - tmp, 1, stdout);
  37                                tmp2 = NULL;
  38                        }
  39                        continue;
  40                }
  41
  42                if (!(ch & 0x80)) {
  43                        /* 0xxxxxxx */
  44                        if (ch == '&')
  45                                fputs("&amp;", stdout);
  46                        else if (ch == '\'')
  47                                fputs("&apos;", stdout);
  48                        else if (ch == '"')
  49                                fputs("&quot;", stdout);
  50                        else if (ch == '<')
  51                                fputs("&lt;", stdout);
  52                        else if (ch == '>')
  53                                fputs("&gt;", stdout);
  54                        else if (ch >= 0x20)
  55                                fputc(ch, stdout);
  56                        else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
  57                                fprintf(stdout, "&#x%02x;", ch);
  58                        else
  59                                fputs(utf8_replace_character, stdout);
  60                } else if ((ch & 0xe0) == 0xc0) {
  61                        /* 110XXXXx 10xxxxxx */
  62                        tmp[0] = ch;
  63                        remaining = 1;
  64                        tmp2 = tmp + 1;
  65                } else if ((ch & 0xf0) == 0xe0) {
  66                        /* 1110XXXX 10Xxxxxx 10xxxxxx */
  67                        tmp[0] = ch;
  68                        remaining = 2;
  69                        tmp2 = tmp + 1;
  70                } else if ((ch & 0xf8) == 0xf0) {
  71                        /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
  72                        tmp[0] = ch;
  73                        remaining = 3;
  74                        tmp2 = tmp + 1;
  75                } else
  76                        fputs(utf8_replace_character, stdout);
  77        }
  78
  79        return 0;
  80}