grep.con commit Merge branch 'rs/ctype' (d9fde06)
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static int is_fixed(const char *s)
  32{
  33        while (*s && !is_regex_special(*s))
  34                s++;
  35        return !*s;
  36}
  37
  38static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  39{
  40        int err;
  41
  42        if (opt->fixed || is_fixed(p->pattern))
  43                p->fixed = 1;
  44        if (opt->regflags & REG_ICASE)
  45                p->fixed = 0;
  46        if (p->fixed)
  47                return;
  48
  49        err = regcomp(&p->regexp, p->pattern, opt->regflags);
  50        if (err) {
  51                char errbuf[1024];
  52                char where[1024];
  53                if (p->no)
  54                        sprintf(where, "In '%s' at %d, ",
  55                                p->origin, p->no);
  56                else if (p->origin)
  57                        sprintf(where, "%s, ", p->origin);
  58                else
  59                        where[0] = 0;
  60                regerror(err, &p->regexp, errbuf, 1024);
  61                regfree(&p->regexp);
  62                die("%s'%s': %s", where, p->pattern, errbuf);
  63        }
  64}
  65
  66static struct grep_expr *compile_pattern_or(struct grep_pat **);
  67static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  68{
  69        struct grep_pat *p;
  70        struct grep_expr *x;
  71
  72        p = *list;
  73        switch (p->token) {
  74        case GREP_PATTERN: /* atom */
  75        case GREP_PATTERN_HEAD:
  76        case GREP_PATTERN_BODY:
  77                x = xcalloc(1, sizeof (struct grep_expr));
  78                x->node = GREP_NODE_ATOM;
  79                x->u.atom = p;
  80                *list = p->next;
  81                return x;
  82        case GREP_OPEN_PAREN:
  83                *list = p->next;
  84                x = compile_pattern_or(list);
  85                if (!x)
  86                        return NULL;
  87                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  88                        die("unmatched parenthesis");
  89                *list = (*list)->next;
  90                return x;
  91        default:
  92                return NULL;
  93        }
  94}
  95
  96static struct grep_expr *compile_pattern_not(struct grep_pat **list)
  97{
  98        struct grep_pat *p;
  99        struct grep_expr *x;
 100
 101        p = *list;
 102        switch (p->token) {
 103        case GREP_NOT:
 104                if (!p->next)
 105                        die("--not not followed by pattern expression");
 106                *list = p->next;
 107                x = xcalloc(1, sizeof (struct grep_expr));
 108                x->node = GREP_NODE_NOT;
 109                x->u.unary = compile_pattern_not(list);
 110                if (!x->u.unary)
 111                        die("--not followed by non pattern expression");
 112                return x;
 113        default:
 114                return compile_pattern_atom(list);
 115        }
 116}
 117
 118static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 119{
 120        struct grep_pat *p;
 121        struct grep_expr *x, *y, *z;
 122
 123        x = compile_pattern_not(list);
 124        p = *list;
 125        if (p && p->token == GREP_AND) {
 126                if (!p->next)
 127                        die("--and not followed by pattern expression");
 128                *list = p->next;
 129                y = compile_pattern_and(list);
 130                if (!y)
 131                        die("--and not followed by pattern expression");
 132                z = xcalloc(1, sizeof (struct grep_expr));
 133                z->node = GREP_NODE_AND;
 134                z->u.binary.left = x;
 135                z->u.binary.right = y;
 136                return z;
 137        }
 138        return x;
 139}
 140
 141static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 142{
 143        struct grep_pat *p;
 144        struct grep_expr *x, *y, *z;
 145
 146        x = compile_pattern_and(list);
 147        p = *list;
 148        if (x && p && p->token != GREP_CLOSE_PAREN) {
 149                y = compile_pattern_or(list);
 150                if (!y)
 151                        die("not a pattern expression %s", p->pattern);
 152                z = xcalloc(1, sizeof (struct grep_expr));
 153                z->node = GREP_NODE_OR;
 154                z->u.binary.left = x;
 155                z->u.binary.right = y;
 156                return z;
 157        }
 158        return x;
 159}
 160
 161static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 162{
 163        return compile_pattern_or(list);
 164}
 165
 166void compile_grep_patterns(struct grep_opt *opt)
 167{
 168        struct grep_pat *p;
 169
 170        if (opt->all_match)
 171                opt->extended = 1;
 172
 173        for (p = opt->pattern_list; p; p = p->next) {
 174                switch (p->token) {
 175                case GREP_PATTERN: /* atom */
 176                case GREP_PATTERN_HEAD:
 177                case GREP_PATTERN_BODY:
 178                        compile_regexp(p, opt);
 179                        break;
 180                default:
 181                        opt->extended = 1;
 182                        break;
 183                }
 184        }
 185
 186        if (!opt->extended)
 187                return;
 188
 189        /* Then bundle them up in an expression.
 190         * A classic recursive descent parser would do.
 191         */
 192        p = opt->pattern_list;
 193        opt->pattern_expression = compile_pattern_expr(&p);
 194        if (p)
 195                die("incomplete pattern expression: %s", p->pattern);
 196}
 197
 198static void free_pattern_expr(struct grep_expr *x)
 199{
 200        switch (x->node) {
 201        case GREP_NODE_ATOM:
 202                break;
 203        case GREP_NODE_NOT:
 204                free_pattern_expr(x->u.unary);
 205                break;
 206        case GREP_NODE_AND:
 207        case GREP_NODE_OR:
 208                free_pattern_expr(x->u.binary.left);
 209                free_pattern_expr(x->u.binary.right);
 210                break;
 211        }
 212        free(x);
 213}
 214
 215void free_grep_patterns(struct grep_opt *opt)
 216{
 217        struct grep_pat *p, *n;
 218
 219        for (p = opt->pattern_list; p; p = n) {
 220                n = p->next;
 221                switch (p->token) {
 222                case GREP_PATTERN: /* atom */
 223                case GREP_PATTERN_HEAD:
 224                case GREP_PATTERN_BODY:
 225                        regfree(&p->regexp);
 226                        break;
 227                default:
 228                        break;
 229                }
 230                free(p);
 231        }
 232
 233        if (!opt->extended)
 234                return;
 235        free_pattern_expr(opt->pattern_expression);
 236}
 237
 238static char *end_of_line(char *cp, unsigned long *left)
 239{
 240        unsigned long l = *left;
 241        while (l && *cp != '\n') {
 242                l--;
 243                cp++;
 244        }
 245        *left = l;
 246        return cp;
 247}
 248
 249static int word_char(char ch)
 250{
 251        return isalnum(ch) || ch == '_';
 252}
 253
 254static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 255                      const char *name, unsigned lno, char sign)
 256{
 257        if (opt->null_following_name)
 258                sign = '\0';
 259        if (opt->pathname)
 260                printf("%s%c", name, sign);
 261        if (opt->linenum)
 262                printf("%d%c", lno, sign);
 263        printf("%.*s\n", (int)(eol-bol), bol);
 264}
 265
 266static void show_name(struct grep_opt *opt, const char *name)
 267{
 268        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
 269}
 270
 271static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 272{
 273        char *hit = strstr(line, pattern);
 274        if (!hit) {
 275                match->rm_so = match->rm_eo = -1;
 276                return REG_NOMATCH;
 277        }
 278        else {
 279                match->rm_so = hit - line;
 280                match->rm_eo = match->rm_so + strlen(pattern);
 281                return 0;
 282        }
 283}
 284
 285static int strip_timestamp(char *bol, char **eol_p)
 286{
 287        char *eol = *eol_p;
 288        int ch;
 289
 290        while (bol < --eol) {
 291                if (*eol != '>')
 292                        continue;
 293                *eol_p = ++eol;
 294                ch = *eol;
 295                *eol = '\0';
 296                return ch;
 297        }
 298        return 0;
 299}
 300
 301static struct {
 302        const char *field;
 303        size_t len;
 304} header_field[] = {
 305        { "author ", 7 },
 306        { "committer ", 10 },
 307};
 308
 309static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 310{
 311        int hit = 0;
 312        int saved_ch = 0;
 313        regmatch_t pmatch[10];
 314
 315        if ((p->token != GREP_PATTERN) &&
 316            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 317                return 0;
 318
 319        if (p->token == GREP_PATTERN_HEAD) {
 320                const char *field;
 321                size_t len;
 322                assert(p->field < ARRAY_SIZE(header_field));
 323                field = header_field[p->field].field;
 324                len = header_field[p->field].len;
 325                if (strncmp(bol, field, len))
 326                        return 0;
 327                bol += len;
 328                saved_ch = strip_timestamp(bol, &eol);
 329        }
 330
 331 again:
 332        if (!p->fixed) {
 333                regex_t *exp = &p->regexp;
 334                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 335                               pmatch, 0);
 336        }
 337        else {
 338                hit = !fixmatch(p->pattern, bol, pmatch);
 339        }
 340
 341        if (hit && opt->word_regexp) {
 342                if ((pmatch[0].rm_so < 0) ||
 343                    (eol - bol) <= pmatch[0].rm_so ||
 344                    (pmatch[0].rm_eo < 0) ||
 345                    (eol - bol) < pmatch[0].rm_eo)
 346                        die("regexp returned nonsense");
 347
 348                /* Match beginning must be either beginning of the
 349                 * line, or at word boundary (i.e. the last char must
 350                 * not be a word char).  Similarly, match end must be
 351                 * either end of the line, or at word boundary
 352                 * (i.e. the next char must not be a word char).
 353                 */
 354                if ( ((pmatch[0].rm_so == 0) ||
 355                      !word_char(bol[pmatch[0].rm_so-1])) &&
 356                     ((pmatch[0].rm_eo == (eol-bol)) ||
 357                      !word_char(bol[pmatch[0].rm_eo])) )
 358                        ;
 359                else
 360                        hit = 0;
 361
 362                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 363                        /* There could be more than one match on the
 364                         * line, and the first match might not be
 365                         * strict word match.  But later ones could be!
 366                         * Forward to the next possible start, i.e. the
 367                         * next position following a non-word char.
 368                         */
 369                        bol = pmatch[0].rm_so + bol + 1;
 370                        while (word_char(bol[-1]) && bol < eol)
 371                                bol++;
 372                        if (bol < eol)
 373                                goto again;
 374                }
 375        }
 376        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 377                *eol = saved_ch;
 378        return hit;
 379}
 380
 381static int match_expr_eval(struct grep_opt *o,
 382                           struct grep_expr *x,
 383                           char *bol, char *eol,
 384                           enum grep_context ctx,
 385                           int collect_hits)
 386{
 387        int h = 0;
 388
 389        switch (x->node) {
 390        case GREP_NODE_ATOM:
 391                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 392                break;
 393        case GREP_NODE_NOT:
 394                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 395                break;
 396        case GREP_NODE_AND:
 397                if (!collect_hits)
 398                        return (match_expr_eval(o, x->u.binary.left,
 399                                                bol, eol, ctx, 0) &&
 400                                match_expr_eval(o, x->u.binary.right,
 401                                                bol, eol, ctx, 0));
 402                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 403                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 404                break;
 405        case GREP_NODE_OR:
 406                if (!collect_hits)
 407                        return (match_expr_eval(o, x->u.binary.left,
 408                                                bol, eol, ctx, 0) ||
 409                                match_expr_eval(o, x->u.binary.right,
 410                                                bol, eol, ctx, 0));
 411                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 412                x->u.binary.left->hit |= h;
 413                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 414                break;
 415        default:
 416                die("Unexpected node type (internal error) %d", x->node);
 417        }
 418        if (collect_hits)
 419                x->hit |= h;
 420        return h;
 421}
 422
 423static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 424                      enum grep_context ctx, int collect_hits)
 425{
 426        struct grep_expr *x = opt->pattern_expression;
 427        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 428}
 429
 430static int match_line(struct grep_opt *opt, char *bol, char *eol,
 431                      enum grep_context ctx, int collect_hits)
 432{
 433        struct grep_pat *p;
 434        if (opt->extended)
 435                return match_expr(opt, bol, eol, ctx, collect_hits);
 436
 437        /* we do not call with collect_hits without being extended */
 438        for (p = opt->pattern_list; p; p = p->next) {
 439                if (match_one_pattern(opt, p, bol, eol, ctx))
 440                        return 1;
 441        }
 442        return 0;
 443}
 444
 445static int grep_buffer_1(struct grep_opt *opt, const char *name,
 446                         char *buf, unsigned long size, int collect_hits)
 447{
 448        char *bol = buf;
 449        unsigned long left = size;
 450        unsigned lno = 1;
 451        struct pre_context_line {
 452                char *bol;
 453                char *eol;
 454        } *prev = NULL, *pcl;
 455        unsigned last_hit = 0;
 456        unsigned last_shown = 0;
 457        int binary_match_only = 0;
 458        const char *hunk_mark = "";
 459        unsigned count = 0;
 460        enum grep_context ctx = GREP_CONTEXT_HEAD;
 461
 462        if (buffer_is_binary(buf, size)) {
 463                switch (opt->binary) {
 464                case GREP_BINARY_DEFAULT:
 465                        binary_match_only = 1;
 466                        break;
 467                case GREP_BINARY_NOMATCH:
 468                        return 0; /* Assume unmatch */
 469                        break;
 470                default:
 471                        break;
 472                }
 473        }
 474
 475        if (opt->pre_context)
 476                prev = xcalloc(opt->pre_context, sizeof(*prev));
 477        if (opt->pre_context || opt->post_context)
 478                hunk_mark = "--\n";
 479
 480        while (left) {
 481                char *eol, ch;
 482                int hit;
 483
 484                eol = end_of_line(bol, &left);
 485                ch = *eol;
 486                *eol = 0;
 487
 488                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 489                        ctx = GREP_CONTEXT_BODY;
 490
 491                hit = match_line(opt, bol, eol, ctx, collect_hits);
 492                *eol = ch;
 493
 494                if (collect_hits)
 495                        goto next_line;
 496
 497                /* "grep -v -e foo -e bla" should list lines
 498                 * that do not have either, so inversion should
 499                 * be done outside.
 500                 */
 501                if (opt->invert)
 502                        hit = !hit;
 503                if (opt->unmatch_name_only) {
 504                        if (hit)
 505                                return 0;
 506                        goto next_line;
 507                }
 508                if (hit) {
 509                        count++;
 510                        if (opt->status_only)
 511                                return 1;
 512                        if (binary_match_only) {
 513                                printf("Binary file %s matches\n", name);
 514                                return 1;
 515                        }
 516                        if (opt->name_only) {
 517                                show_name(opt, name);
 518                                return 1;
 519                        }
 520                        /* Hit at this line.  If we haven't shown the
 521                         * pre-context lines, we would need to show them.
 522                         * When asked to do "count", this still show
 523                         * the context which is nonsense, but the user
 524                         * deserves to get that ;-).
 525                         */
 526                        if (opt->pre_context) {
 527                                unsigned from;
 528                                if (opt->pre_context < lno)
 529                                        from = lno - opt->pre_context;
 530                                else
 531                                        from = 1;
 532                                if (from <= last_shown)
 533                                        from = last_shown + 1;
 534                                if (last_shown && from != last_shown + 1)
 535                                        fputs(hunk_mark, stdout);
 536                                while (from < lno) {
 537                                        pcl = &prev[lno-from-1];
 538                                        show_line(opt, pcl->bol, pcl->eol,
 539                                                  name, from, '-');
 540                                        from++;
 541                                }
 542                                last_shown = lno-1;
 543                        }
 544                        if (last_shown && lno != last_shown + 1)
 545                                fputs(hunk_mark, stdout);
 546                        if (!opt->count)
 547                                show_line(opt, bol, eol, name, lno, ':');
 548                        last_shown = last_hit = lno;
 549                }
 550                else if (last_hit &&
 551                         lno <= last_hit + opt->post_context) {
 552                        /* If the last hit is within the post context,
 553                         * we need to show this line.
 554                         */
 555                        if (last_shown && lno != last_shown + 1)
 556                                fputs(hunk_mark, stdout);
 557                        show_line(opt, bol, eol, name, lno, '-');
 558                        last_shown = lno;
 559                }
 560                if (opt->pre_context) {
 561                        memmove(prev+1, prev,
 562                                (opt->pre_context-1) * sizeof(*prev));
 563                        prev->bol = bol;
 564                        prev->eol = eol;
 565                }
 566
 567        next_line:
 568                bol = eol + 1;
 569                if (!left)
 570                        break;
 571                left--;
 572                lno++;
 573        }
 574
 575        free(prev);
 576        if (collect_hits)
 577                return 0;
 578
 579        if (opt->status_only)
 580                return 0;
 581        if (opt->unmatch_name_only) {
 582                /* We did not see any hit, so we want to show this */
 583                show_name(opt, name);
 584                return 1;
 585        }
 586
 587        /* NEEDSWORK:
 588         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 589         * which feels mostly useless but sometimes useful.  Maybe
 590         * make it another option?  For now suppress them.
 591         */
 592        if (opt->count && count)
 593                printf("%s%c%u\n", name,
 594                       opt->null_following_name ? '\0' : ':', count);
 595        return !!last_hit;
 596}
 597
 598static void clr_hit_marker(struct grep_expr *x)
 599{
 600        /* All-hit markers are meaningful only at the very top level
 601         * OR node.
 602         */
 603        while (1) {
 604                x->hit = 0;
 605                if (x->node != GREP_NODE_OR)
 606                        return;
 607                x->u.binary.left->hit = 0;
 608                x = x->u.binary.right;
 609        }
 610}
 611
 612static int chk_hit_marker(struct grep_expr *x)
 613{
 614        /* Top level nodes have hit markers.  See if they all are hits */
 615        while (1) {
 616                if (x->node != GREP_NODE_OR)
 617                        return x->hit;
 618                if (!x->u.binary.left->hit)
 619                        return 0;
 620                x = x->u.binary.right;
 621        }
 622}
 623
 624int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 625{
 626        /*
 627         * we do not have to do the two-pass grep when we do not check
 628         * buffer-wide "all-match".
 629         */
 630        if (!opt->all_match)
 631                return grep_buffer_1(opt, name, buf, size, 0);
 632
 633        /* Otherwise the toplevel "or" terms hit a bit differently.
 634         * We first clear hit markers from them.
 635         */
 636        clr_hit_marker(opt->pattern_expression);
 637        grep_buffer_1(opt, name, buf, size, 1);
 638
 639        if (!chk_hit_marker(opt->pattern_expression))
 640                return 0;
 641
 642        return grep_buffer_1(opt, name, buf, size, 0);
 643}