ws.con commit Merge branch 'so/cherry-pick-always-allow-m1' (77fbd96)
   1/*
   2 * Whitespace rules
   3 *
   4 * Copyright (c) 2007 Junio C Hamano
   5 */
   6#include "cache.h"
   7#include "attr.h"
   8
   9static struct whitespace_rule {
  10        const char *rule_name;
  11        unsigned rule_bits;
  12        unsigned loosens_error:1,
  13                exclude_default:1;
  14} whitespace_rule_names[] = {
  15        { "trailing-space", WS_TRAILING_SPACE, 0 },
  16        { "space-before-tab", WS_SPACE_BEFORE_TAB, 0 },
  17        { "indent-with-non-tab", WS_INDENT_WITH_NON_TAB, 0 },
  18        { "cr-at-eol", WS_CR_AT_EOL, 1 },
  19        { "blank-at-eol", WS_BLANK_AT_EOL, 0 },
  20        { "blank-at-eof", WS_BLANK_AT_EOF, 0 },
  21        { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 },
  22};
  23
  24unsigned parse_whitespace_rule(const char *string)
  25{
  26        unsigned rule = WS_DEFAULT_RULE;
  27
  28        while (string) {
  29                int i;
  30                size_t len;
  31                const char *ep;
  32                int negated = 0;
  33
  34                string = string + strspn(string, ", \t\n\r");
  35                ep = strchrnul(string, ',');
  36                len = ep - string;
  37
  38                if (*string == '-') {
  39                        negated = 1;
  40                        string++;
  41                        len--;
  42                }
  43                if (!len)
  44                        break;
  45                for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++) {
  46                        if (strncmp(whitespace_rule_names[i].rule_name,
  47                                    string, len))
  48                                continue;
  49                        if (negated)
  50                                rule &= ~whitespace_rule_names[i].rule_bits;
  51                        else
  52                                rule |= whitespace_rule_names[i].rule_bits;
  53                        break;
  54                }
  55                if (strncmp(string, "tabwidth=", 9) == 0) {
  56                        unsigned tabwidth = atoi(string + 9);
  57                        if (0 < tabwidth && tabwidth < 0100) {
  58                                rule &= ~WS_TAB_WIDTH_MASK;
  59                                rule |= tabwidth;
  60                        }
  61                        else
  62                                warning("tabwidth %.*s out of range",
  63                                        (int)(len - 9), string + 9);
  64                }
  65                string = ep;
  66        }
  67
  68        if (rule & WS_TAB_IN_INDENT && rule & WS_INDENT_WITH_NON_TAB)
  69                die("cannot enforce both tab-in-indent and indent-with-non-tab");
  70        return rule;
  71}
  72
  73unsigned whitespace_rule(struct index_state *istate, const char *pathname)
  74{
  75        static struct attr_check *attr_whitespace_rule;
  76        const char *value;
  77
  78        if (!attr_whitespace_rule)
  79                attr_whitespace_rule = attr_check_initl("whitespace", NULL);
  80
  81        git_check_attr(istate, pathname, attr_whitespace_rule);
  82        value = attr_whitespace_rule->items[0].value;
  83        if (ATTR_TRUE(value)) {
  84                /* true (whitespace) */
  85                unsigned all_rule = ws_tab_width(whitespace_rule_cfg);
  86                int i;
  87                for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++)
  88                        if (!whitespace_rule_names[i].loosens_error &&
  89                            !whitespace_rule_names[i].exclude_default)
  90                                all_rule |= whitespace_rule_names[i].rule_bits;
  91                return all_rule;
  92        } else if (ATTR_FALSE(value)) {
  93                /* false (-whitespace) */
  94                return ws_tab_width(whitespace_rule_cfg);
  95        } else if (ATTR_UNSET(value)) {
  96                /* reset to default (!whitespace) */
  97                return whitespace_rule_cfg;
  98        } else {
  99                /* string */
 100                return parse_whitespace_rule(value);
 101        }
 102}
 103
 104/* The returned string should be freed by the caller. */
 105char *whitespace_error_string(unsigned ws)
 106{
 107        struct strbuf err = STRBUF_INIT;
 108        if ((ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE)
 109                strbuf_addstr(&err, "trailing whitespace");
 110        else {
 111                if (ws & WS_BLANK_AT_EOL)
 112                        strbuf_addstr(&err, "trailing whitespace");
 113                if (ws & WS_BLANK_AT_EOF) {
 114                        if (err.len)
 115                                strbuf_addstr(&err, ", ");
 116                        strbuf_addstr(&err, "new blank line at EOF");
 117                }
 118        }
 119        if (ws & WS_SPACE_BEFORE_TAB) {
 120                if (err.len)
 121                        strbuf_addstr(&err, ", ");
 122                strbuf_addstr(&err, "space before tab in indent");
 123        }
 124        if (ws & WS_INDENT_WITH_NON_TAB) {
 125                if (err.len)
 126                        strbuf_addstr(&err, ", ");
 127                strbuf_addstr(&err, "indent with spaces");
 128        }
 129        if (ws & WS_TAB_IN_INDENT) {
 130                if (err.len)
 131                        strbuf_addstr(&err, ", ");
 132                strbuf_addstr(&err, "tab in indent");
 133        }
 134        return strbuf_detach(&err, NULL);
 135}
 136
 137/* If stream is non-NULL, emits the line after checking. */
 138static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 139                                FILE *stream, const char *set,
 140                                const char *reset, const char *ws)
 141{
 142        unsigned result = 0;
 143        int written = 0;
 144        int trailing_whitespace = -1;
 145        int trailing_newline = 0;
 146        int trailing_carriage_return = 0;
 147        int i;
 148
 149        /* Logic is simpler if we temporarily ignore the trailing newline. */
 150        if (len > 0 && line[len - 1] == '\n') {
 151                trailing_newline = 1;
 152                len--;
 153        }
 154        if ((ws_rule & WS_CR_AT_EOL) &&
 155            len > 0 && line[len - 1] == '\r') {
 156                trailing_carriage_return = 1;
 157                len--;
 158        }
 159
 160        /* Check for trailing whitespace. */
 161        if (ws_rule & WS_BLANK_AT_EOL) {
 162                for (i = len - 1; i >= 0; i--) {
 163                        if (isspace(line[i])) {
 164                                trailing_whitespace = i;
 165                                result |= WS_BLANK_AT_EOL;
 166                        }
 167                        else
 168                                break;
 169                }
 170        }
 171
 172        if (trailing_whitespace == -1)
 173                trailing_whitespace = len;
 174
 175        /* Check indentation */
 176        for (i = 0; i < trailing_whitespace; i++) {
 177                if (line[i] == ' ')
 178                        continue;
 179                if (line[i] != '\t')
 180                        break;
 181                if ((ws_rule & WS_SPACE_BEFORE_TAB) && written < i) {
 182                        result |= WS_SPACE_BEFORE_TAB;
 183                        if (stream) {
 184                                fputs(ws, stream);
 185                                fwrite(line + written, i - written, 1, stream);
 186                                fputs(reset, stream);
 187                                fwrite(line + i, 1, 1, stream);
 188                        }
 189                } else if (ws_rule & WS_TAB_IN_INDENT) {
 190                        result |= WS_TAB_IN_INDENT;
 191                        if (stream) {
 192                                fwrite(line + written, i - written, 1, stream);
 193                                fputs(ws, stream);
 194                                fwrite(line + i, 1, 1, stream);
 195                                fputs(reset, stream);
 196                        }
 197                } else if (stream) {
 198                        fwrite(line + written, i - written + 1, 1, stream);
 199                }
 200                written = i + 1;
 201        }
 202
 203        /* Check for indent using non-tab. */
 204        if ((ws_rule & WS_INDENT_WITH_NON_TAB) && i - written >= ws_tab_width(ws_rule)) {
 205                result |= WS_INDENT_WITH_NON_TAB;
 206                if (stream) {
 207                        fputs(ws, stream);
 208                        fwrite(line + written, i - written, 1, stream);
 209                        fputs(reset, stream);
 210                }
 211                written = i;
 212        }
 213
 214        if (stream) {
 215                /*
 216                 * Now the rest of the line starts at "written".
 217                 * The non-highlighted part ends at "trailing_whitespace".
 218                 */
 219
 220                /* Emit non-highlighted (middle) segment. */
 221                if (trailing_whitespace - written > 0) {
 222                        fputs(set, stream);
 223                        fwrite(line + written,
 224                            trailing_whitespace - written, 1, stream);
 225                        fputs(reset, stream);
 226                }
 227
 228                /* Highlight errors in trailing whitespace. */
 229                if (trailing_whitespace != len) {
 230                        fputs(ws, stream);
 231                        fwrite(line + trailing_whitespace,
 232                            len - trailing_whitespace, 1, stream);
 233                        fputs(reset, stream);
 234                }
 235                if (trailing_carriage_return)
 236                        fputc('\r', stream);
 237                if (trailing_newline)
 238                        fputc('\n', stream);
 239        }
 240        return result;
 241}
 242
 243void ws_check_emit(const char *line, int len, unsigned ws_rule,
 244                   FILE *stream, const char *set,
 245                   const char *reset, const char *ws)
 246{
 247        (void)ws_check_emit_1(line, len, ws_rule, stream, set, reset, ws);
 248}
 249
 250unsigned ws_check(const char *line, int len, unsigned ws_rule)
 251{
 252        return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL);
 253}
 254
 255int ws_blank_line(const char *line, int len, unsigned ws_rule)
 256{
 257        /*
 258         * We _might_ want to treat CR differently from other
 259         * whitespace characters when ws_rule has WS_CR_AT_EOL, but
 260         * for now we just use this stupid definition.
 261         */
 262        while (len-- > 0) {
 263                if (!isspace(*line))
 264                        return 0;
 265                line++;
 266        }
 267        return 1;
 268}
 269
 270/* Copy the line onto the end of the strbuf while fixing whitespaces */
 271void ws_fix_copy(struct strbuf *dst, const char *src, int len, unsigned ws_rule, int *error_count)
 272{
 273        /*
 274         * len is number of bytes to be copied from src, starting
 275         * at src.  Typically src[len-1] is '\n', unless this is
 276         * the incomplete last line.
 277         */
 278        int i;
 279        int add_nl_to_tail = 0;
 280        int add_cr_to_tail = 0;
 281        int fixed = 0;
 282        int last_tab_in_indent = -1;
 283        int last_space_in_indent = -1;
 284        int need_fix_leading_space = 0;
 285
 286        /*
 287         * Strip trailing whitespace
 288         */
 289        if (ws_rule & WS_BLANK_AT_EOL) {
 290                if (0 < len && src[len - 1] == '\n') {
 291                        add_nl_to_tail = 1;
 292                        len--;
 293                        if (0 < len && src[len - 1] == '\r') {
 294                                add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL);
 295                                len--;
 296                        }
 297                }
 298                if (0 < len && isspace(src[len - 1])) {
 299                        while (0 < len && isspace(src[len-1]))
 300                                len--;
 301                        fixed = 1;
 302                }
 303        }
 304
 305        /*
 306         * Check leading whitespaces (indent)
 307         */
 308        for (i = 0; i < len; i++) {
 309                char ch = src[i];
 310                if (ch == '\t') {
 311                        last_tab_in_indent = i;
 312                        if ((ws_rule & WS_SPACE_BEFORE_TAB) &&
 313                            0 <= last_space_in_indent)
 314                            need_fix_leading_space = 1;
 315                } else if (ch == ' ') {
 316                        last_space_in_indent = i;
 317                        if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
 318                            ws_tab_width(ws_rule) <= i - last_tab_in_indent)
 319                                need_fix_leading_space = 1;
 320                } else
 321                        break;
 322        }
 323
 324        if (need_fix_leading_space) {
 325                /* Process indent ourselves */
 326                int consecutive_spaces = 0;
 327                int last = last_tab_in_indent + 1;
 328
 329                if (ws_rule & WS_INDENT_WITH_NON_TAB) {
 330                        /* have "last" point at one past the indent */
 331                        if (last_tab_in_indent < last_space_in_indent)
 332                                last = last_space_in_indent + 1;
 333                        else
 334                                last = last_tab_in_indent + 1;
 335                }
 336
 337                /*
 338                 * between src[0..last-1], strip the funny spaces,
 339                 * updating them to tab as needed.
 340                 */
 341                for (i = 0; i < last; i++) {
 342                        char ch = src[i];
 343                        if (ch != ' ') {
 344                                consecutive_spaces = 0;
 345                                strbuf_addch(dst, ch);
 346                        } else {
 347                                consecutive_spaces++;
 348                                if (consecutive_spaces == ws_tab_width(ws_rule)) {
 349                                        strbuf_addch(dst, '\t');
 350                                        consecutive_spaces = 0;
 351                                }
 352                        }
 353                }
 354                while (0 < consecutive_spaces--)
 355                        strbuf_addch(dst, ' ');
 356                len -= last;
 357                src += last;
 358                fixed = 1;
 359        } else if ((ws_rule & WS_TAB_IN_INDENT) && last_tab_in_indent >= 0) {
 360                /* Expand tabs into spaces */
 361                int start = dst->len;
 362                int last = last_tab_in_indent + 1;
 363                for (i = 0; i < last; i++) {
 364                        if (src[i] == '\t')
 365                                do {
 366                                        strbuf_addch(dst, ' ');
 367                                } while ((dst->len - start) % ws_tab_width(ws_rule));
 368                        else
 369                                strbuf_addch(dst, src[i]);
 370                }
 371                len -= last;
 372                src += last;
 373                fixed = 1;
 374        }
 375
 376        strbuf_add(dst, src, len);
 377        if (add_cr_to_tail)
 378                strbuf_addch(dst, '\r');
 379        if (add_nl_to_tail)
 380                strbuf_addch(dst, '\n');
 381        if (fixed && error_count)
 382                (*error_count)++;
 383}