f9a45258aa0485df6f6205aa8121bdad07a480b0
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static int isregexspecial(int c)
  32{
  33        return c == '\0' || is_glob_special(c) ||
  34                c == '$' || c == '(' || c == ')' || c == '+' ||
  35                c == '.' || c == '^' || c == '{' || c == '|';
  36}
  37
  38static int is_fixed(const char *s)
  39{
  40        while (!isregexspecial(*s))
  41                s++;
  42        return !*s;
  43}
  44
  45static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  46{
  47        int err;
  48
  49        if (opt->fixed || is_fixed(p->pattern))
  50                p->fixed = 1;
  51        if (opt->regflags & REG_ICASE)
  52                p->fixed = 0;
  53        if (p->fixed)
  54                return;
  55
  56        err = regcomp(&p->regexp, p->pattern, opt->regflags);
  57        if (err) {
  58                char errbuf[1024];
  59                char where[1024];
  60                if (p->no)
  61                        sprintf(where, "In '%s' at %d, ",
  62                                p->origin, p->no);
  63                else if (p->origin)
  64                        sprintf(where, "%s, ", p->origin);
  65                else
  66                        where[0] = 0;
  67                regerror(err, &p->regexp, errbuf, 1024);
  68                regfree(&p->regexp);
  69                die("%s'%s': %s", where, p->pattern, errbuf);
  70        }
  71}
  72
  73static struct grep_expr *compile_pattern_or(struct grep_pat **);
  74static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  75{
  76        struct grep_pat *p;
  77        struct grep_expr *x;
  78
  79        p = *list;
  80        switch (p->token) {
  81        case GREP_PATTERN: /* atom */
  82        case GREP_PATTERN_HEAD:
  83        case GREP_PATTERN_BODY:
  84                x = xcalloc(1, sizeof (struct grep_expr));
  85                x->node = GREP_NODE_ATOM;
  86                x->u.atom = p;
  87                *list = p->next;
  88                return x;
  89        case GREP_OPEN_PAREN:
  90                *list = p->next;
  91                x = compile_pattern_or(list);
  92                if (!x)
  93                        return NULL;
  94                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  95                        die("unmatched parenthesis");
  96                *list = (*list)->next;
  97                return x;
  98        default:
  99                return NULL;
 100        }
 101}
 102
 103static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 104{
 105        struct grep_pat *p;
 106        struct grep_expr *x;
 107
 108        p = *list;
 109        switch (p->token) {
 110        case GREP_NOT:
 111                if (!p->next)
 112                        die("--not not followed by pattern expression");
 113                *list = p->next;
 114                x = xcalloc(1, sizeof (struct grep_expr));
 115                x->node = GREP_NODE_NOT;
 116                x->u.unary = compile_pattern_not(list);
 117                if (!x->u.unary)
 118                        die("--not followed by non pattern expression");
 119                return x;
 120        default:
 121                return compile_pattern_atom(list);
 122        }
 123}
 124
 125static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 126{
 127        struct grep_pat *p;
 128        struct grep_expr *x, *y, *z;
 129
 130        x = compile_pattern_not(list);
 131        p = *list;
 132        if (p && p->token == GREP_AND) {
 133                if (!p->next)
 134                        die("--and not followed by pattern expression");
 135                *list = p->next;
 136                y = compile_pattern_and(list);
 137                if (!y)
 138                        die("--and not followed by pattern expression");
 139                z = xcalloc(1, sizeof (struct grep_expr));
 140                z->node = GREP_NODE_AND;
 141                z->u.binary.left = x;
 142                z->u.binary.right = y;
 143                return z;
 144        }
 145        return x;
 146}
 147
 148static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 149{
 150        struct grep_pat *p;
 151        struct grep_expr *x, *y, *z;
 152
 153        x = compile_pattern_and(list);
 154        p = *list;
 155        if (x && p && p->token != GREP_CLOSE_PAREN) {
 156                y = compile_pattern_or(list);
 157                if (!y)
 158                        die("not a pattern expression %s", p->pattern);
 159                z = xcalloc(1, sizeof (struct grep_expr));
 160                z->node = GREP_NODE_OR;
 161                z->u.binary.left = x;
 162                z->u.binary.right = y;
 163                return z;
 164        }
 165        return x;
 166}
 167
 168static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 169{
 170        return compile_pattern_or(list);
 171}
 172
 173void compile_grep_patterns(struct grep_opt *opt)
 174{
 175        struct grep_pat *p;
 176
 177        if (opt->all_match)
 178                opt->extended = 1;
 179
 180        for (p = opt->pattern_list; p; p = p->next) {
 181                switch (p->token) {
 182                case GREP_PATTERN: /* atom */
 183                case GREP_PATTERN_HEAD:
 184                case GREP_PATTERN_BODY:
 185                        compile_regexp(p, opt);
 186                        break;
 187                default:
 188                        opt->extended = 1;
 189                        break;
 190                }
 191        }
 192
 193        if (!opt->extended)
 194                return;
 195
 196        /* Then bundle them up in an expression.
 197         * A classic recursive descent parser would do.
 198         */
 199        p = opt->pattern_list;
 200        opt->pattern_expression = compile_pattern_expr(&p);
 201        if (p)
 202                die("incomplete pattern expression: %s", p->pattern);
 203}
 204
 205static void free_pattern_expr(struct grep_expr *x)
 206{
 207        switch (x->node) {
 208        case GREP_NODE_ATOM:
 209                break;
 210        case GREP_NODE_NOT:
 211                free_pattern_expr(x->u.unary);
 212                break;
 213        case GREP_NODE_AND:
 214        case GREP_NODE_OR:
 215                free_pattern_expr(x->u.binary.left);
 216                free_pattern_expr(x->u.binary.right);
 217                break;
 218        }
 219        free(x);
 220}
 221
 222void free_grep_patterns(struct grep_opt *opt)
 223{
 224        struct grep_pat *p, *n;
 225
 226        for (p = opt->pattern_list; p; p = n) {
 227                n = p->next;
 228                switch (p->token) {
 229                case GREP_PATTERN: /* atom */
 230                case GREP_PATTERN_HEAD:
 231                case GREP_PATTERN_BODY:
 232                        regfree(&p->regexp);
 233                        break;
 234                default:
 235                        break;
 236                }
 237                free(p);
 238        }
 239
 240        if (!opt->extended)
 241                return;
 242        free_pattern_expr(opt->pattern_expression);
 243}
 244
 245static char *end_of_line(char *cp, unsigned long *left)
 246{
 247        unsigned long l = *left;
 248        while (l && *cp != '\n') {
 249                l--;
 250                cp++;
 251        }
 252        *left = l;
 253        return cp;
 254}
 255
 256static int word_char(char ch)
 257{
 258        return isalnum(ch) || ch == '_';
 259}
 260
 261static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 262                      const char *name, unsigned lno, char sign)
 263{
 264        if (opt->null_following_name)
 265                sign = '\0';
 266        if (opt->pathname)
 267                printf("%s%c", name, sign);
 268        if (opt->linenum)
 269                printf("%d%c", lno, sign);
 270        printf("%.*s\n", (int)(eol-bol), bol);
 271}
 272
 273static void show_name(struct grep_opt *opt, const char *name)
 274{
 275        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
 276}
 277
 278static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 279{
 280        char *hit = strstr(line, pattern);
 281        if (!hit) {
 282                match->rm_so = match->rm_eo = -1;
 283                return REG_NOMATCH;
 284        }
 285        else {
 286                match->rm_so = hit - line;
 287                match->rm_eo = match->rm_so + strlen(pattern);
 288                return 0;
 289        }
 290}
 291
 292static int strip_timestamp(char *bol, char **eol_p)
 293{
 294        char *eol = *eol_p;
 295        int ch;
 296
 297        while (bol < --eol) {
 298                if (*eol != '>')
 299                        continue;
 300                *eol_p = ++eol;
 301                ch = *eol;
 302                *eol = '\0';
 303                return ch;
 304        }
 305        return 0;
 306}
 307
 308static struct {
 309        const char *field;
 310        size_t len;
 311} header_field[] = {
 312        { "author ", 7 },
 313        { "committer ", 10 },
 314};
 315
 316static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 317{
 318        int hit = 0;
 319        int saved_ch = 0;
 320        regmatch_t pmatch[10];
 321
 322        if ((p->token != GREP_PATTERN) &&
 323            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 324                return 0;
 325
 326        if (p->token == GREP_PATTERN_HEAD) {
 327                const char *field;
 328                size_t len;
 329                assert(p->field < ARRAY_SIZE(header_field));
 330                field = header_field[p->field].field;
 331                len = header_field[p->field].len;
 332                if (strncmp(bol, field, len))
 333                        return 0;
 334                bol += len;
 335                saved_ch = strip_timestamp(bol, &eol);
 336        }
 337
 338 again:
 339        if (!p->fixed) {
 340                regex_t *exp = &p->regexp;
 341                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 342                               pmatch, 0);
 343        }
 344        else {
 345                hit = !fixmatch(p->pattern, bol, pmatch);
 346        }
 347
 348        if (hit && opt->word_regexp) {
 349                if ((pmatch[0].rm_so < 0) ||
 350                    (eol - bol) <= pmatch[0].rm_so ||
 351                    (pmatch[0].rm_eo < 0) ||
 352                    (eol - bol) < pmatch[0].rm_eo)
 353                        die("regexp returned nonsense");
 354
 355                /* Match beginning must be either beginning of the
 356                 * line, or at word boundary (i.e. the last char must
 357                 * not be a word char).  Similarly, match end must be
 358                 * either end of the line, or at word boundary
 359                 * (i.e. the next char must not be a word char).
 360                 */
 361                if ( ((pmatch[0].rm_so == 0) ||
 362                      !word_char(bol[pmatch[0].rm_so-1])) &&
 363                     ((pmatch[0].rm_eo == (eol-bol)) ||
 364                      !word_char(bol[pmatch[0].rm_eo])) )
 365                        ;
 366                else
 367                        hit = 0;
 368
 369                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 370                        /* There could be more than one match on the
 371                         * line, and the first match might not be
 372                         * strict word match.  But later ones could be!
 373                         * Forward to the next possible start, i.e. the
 374                         * next position following a non-word char.
 375                         */
 376                        bol = pmatch[0].rm_so + bol + 1;
 377                        while (word_char(bol[-1]) && bol < eol)
 378                                bol++;
 379                        if (bol < eol)
 380                                goto again;
 381                }
 382        }
 383        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 384                *eol = saved_ch;
 385        return hit;
 386}
 387
 388static int match_expr_eval(struct grep_opt *o,
 389                           struct grep_expr *x,
 390                           char *bol, char *eol,
 391                           enum grep_context ctx,
 392                           int collect_hits)
 393{
 394        int h = 0;
 395
 396        switch (x->node) {
 397        case GREP_NODE_ATOM:
 398                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 399                break;
 400        case GREP_NODE_NOT:
 401                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 402                break;
 403        case GREP_NODE_AND:
 404                if (!collect_hits)
 405                        return (match_expr_eval(o, x->u.binary.left,
 406                                                bol, eol, ctx, 0) &&
 407                                match_expr_eval(o, x->u.binary.right,
 408                                                bol, eol, ctx, 0));
 409                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 410                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 411                break;
 412        case GREP_NODE_OR:
 413                if (!collect_hits)
 414                        return (match_expr_eval(o, x->u.binary.left,
 415                                                bol, eol, ctx, 0) ||
 416                                match_expr_eval(o, x->u.binary.right,
 417                                                bol, eol, ctx, 0));
 418                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 419                x->u.binary.left->hit |= h;
 420                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 421                break;
 422        default:
 423                die("Unexpected node type (internal error) %d", x->node);
 424        }
 425        if (collect_hits)
 426                x->hit |= h;
 427        return h;
 428}
 429
 430static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 431                      enum grep_context ctx, int collect_hits)
 432{
 433        struct grep_expr *x = opt->pattern_expression;
 434        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 435}
 436
 437static int match_line(struct grep_opt *opt, char *bol, char *eol,
 438                      enum grep_context ctx, int collect_hits)
 439{
 440        struct grep_pat *p;
 441        if (opt->extended)
 442                return match_expr(opt, bol, eol, ctx, collect_hits);
 443
 444        /* we do not call with collect_hits without being extended */
 445        for (p = opt->pattern_list; p; p = p->next) {
 446                if (match_one_pattern(opt, p, bol, eol, ctx))
 447                        return 1;
 448        }
 449        return 0;
 450}
 451
 452static int grep_buffer_1(struct grep_opt *opt, const char *name,
 453                         char *buf, unsigned long size, int collect_hits)
 454{
 455        char *bol = buf;
 456        unsigned long left = size;
 457        unsigned lno = 1;
 458        struct pre_context_line {
 459                char *bol;
 460                char *eol;
 461        } *prev = NULL, *pcl;
 462        unsigned last_hit = 0;
 463        unsigned last_shown = 0;
 464        int binary_match_only = 0;
 465        const char *hunk_mark = "";
 466        unsigned count = 0;
 467        enum grep_context ctx = GREP_CONTEXT_HEAD;
 468
 469        if (buffer_is_binary(buf, size)) {
 470                switch (opt->binary) {
 471                case GREP_BINARY_DEFAULT:
 472                        binary_match_only = 1;
 473                        break;
 474                case GREP_BINARY_NOMATCH:
 475                        return 0; /* Assume unmatch */
 476                        break;
 477                default:
 478                        break;
 479                }
 480        }
 481
 482        if (opt->pre_context)
 483                prev = xcalloc(opt->pre_context, sizeof(*prev));
 484        if (opt->pre_context || opt->post_context)
 485                hunk_mark = "--\n";
 486
 487        while (left) {
 488                char *eol, ch;
 489                int hit;
 490
 491                eol = end_of_line(bol, &left);
 492                ch = *eol;
 493                *eol = 0;
 494
 495                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 496                        ctx = GREP_CONTEXT_BODY;
 497
 498                hit = match_line(opt, bol, eol, ctx, collect_hits);
 499                *eol = ch;
 500
 501                if (collect_hits)
 502                        goto next_line;
 503
 504                /* "grep -v -e foo -e bla" should list lines
 505                 * that do not have either, so inversion should
 506                 * be done outside.
 507                 */
 508                if (opt->invert)
 509                        hit = !hit;
 510                if (opt->unmatch_name_only) {
 511                        if (hit)
 512                                return 0;
 513                        goto next_line;
 514                }
 515                if (hit) {
 516                        count++;
 517                        if (opt->status_only)
 518                                return 1;
 519                        if (binary_match_only) {
 520                                printf("Binary file %s matches\n", name);
 521                                return 1;
 522                        }
 523                        if (opt->name_only) {
 524                                show_name(opt, name);
 525                                return 1;
 526                        }
 527                        /* Hit at this line.  If we haven't shown the
 528                         * pre-context lines, we would need to show them.
 529                         * When asked to do "count", this still show
 530                         * the context which is nonsense, but the user
 531                         * deserves to get that ;-).
 532                         */
 533                        if (opt->pre_context) {
 534                                unsigned from;
 535                                if (opt->pre_context < lno)
 536                                        from = lno - opt->pre_context;
 537                                else
 538                                        from = 1;
 539                                if (from <= last_shown)
 540                                        from = last_shown + 1;
 541                                if (last_shown && from != last_shown + 1)
 542                                        fputs(hunk_mark, stdout);
 543                                while (from < lno) {
 544                                        pcl = &prev[lno-from-1];
 545                                        show_line(opt, pcl->bol, pcl->eol,
 546                                                  name, from, '-');
 547                                        from++;
 548                                }
 549                                last_shown = lno-1;
 550                        }
 551                        if (last_shown && lno != last_shown + 1)
 552                                fputs(hunk_mark, stdout);
 553                        if (!opt->count)
 554                                show_line(opt, bol, eol, name, lno, ':');
 555                        last_shown = last_hit = lno;
 556                }
 557                else if (last_hit &&
 558                         lno <= last_hit + opt->post_context) {
 559                        /* If the last hit is within the post context,
 560                         * we need to show this line.
 561                         */
 562                        if (last_shown && lno != last_shown + 1)
 563                                fputs(hunk_mark, stdout);
 564                        show_line(opt, bol, eol, name, lno, '-');
 565                        last_shown = lno;
 566                }
 567                if (opt->pre_context) {
 568                        memmove(prev+1, prev,
 569                                (opt->pre_context-1) * sizeof(*prev));
 570                        prev->bol = bol;
 571                        prev->eol = eol;
 572                }
 573
 574        next_line:
 575                bol = eol + 1;
 576                if (!left)
 577                        break;
 578                left--;
 579                lno++;
 580        }
 581
 582        free(prev);
 583        if (collect_hits)
 584                return 0;
 585
 586        if (opt->status_only)
 587                return 0;
 588        if (opt->unmatch_name_only) {
 589                /* We did not see any hit, so we want to show this */
 590                show_name(opt, name);
 591                return 1;
 592        }
 593
 594        /* NEEDSWORK:
 595         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 596         * which feels mostly useless but sometimes useful.  Maybe
 597         * make it another option?  For now suppress them.
 598         */
 599        if (opt->count && count)
 600                printf("%s%c%u\n", name,
 601                       opt->null_following_name ? '\0' : ':', count);
 602        return !!last_hit;
 603}
 604
 605static void clr_hit_marker(struct grep_expr *x)
 606{
 607        /* All-hit markers are meaningful only at the very top level
 608         * OR node.
 609         */
 610        while (1) {
 611                x->hit = 0;
 612                if (x->node != GREP_NODE_OR)
 613                        return;
 614                x->u.binary.left->hit = 0;
 615                x = x->u.binary.right;
 616        }
 617}
 618
 619static int chk_hit_marker(struct grep_expr *x)
 620{
 621        /* Top level nodes have hit markers.  See if they all are hits */
 622        while (1) {
 623                if (x->node != GREP_NODE_OR)
 624                        return x->hit;
 625                if (!x->u.binary.left->hit)
 626                        return 0;
 627                x = x->u.binary.right;
 628        }
 629}
 630
 631int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 632{
 633        /*
 634         * we do not have to do the two-pass grep when we do not check
 635         * buffer-wide "all-match".
 636         */
 637        if (!opt->all_match)
 638                return grep_buffer_1(opt, name, buf, size, 0);
 639
 640        /* Otherwise the toplevel "or" terms hit a bit differently.
 641         * We first clear hit markers from them.
 642         */
 643        clr_hit_marker(opt->pattern_expression);
 644        grep_buffer_1(opt, name, buf, size, 1);
 645
 646        if (!chk_hit_marker(opt->pattern_expression))
 647                return 0;
 648
 649        return grep_buffer_1(opt, name, buf, size, 0);
 650}