grep.con commit Merge branch 'maint' (15dc66a)
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  32{
  33        int err = regcomp(&p->regexp, p->pattern, opt->regflags);
  34        if (err) {
  35                char errbuf[1024];
  36                char where[1024];
  37                if (p->no)
  38                        sprintf(where, "In '%s' at %d, ",
  39                                p->origin, p->no);
  40                else if (p->origin)
  41                        sprintf(where, "%s, ", p->origin);
  42                else
  43                        where[0] = 0;
  44                regerror(err, &p->regexp, errbuf, 1024);
  45                regfree(&p->regexp);
  46                die("%s'%s': %s", where, p->pattern, errbuf);
  47        }
  48}
  49
  50static struct grep_expr *compile_pattern_or(struct grep_pat **);
  51static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  52{
  53        struct grep_pat *p;
  54        struct grep_expr *x;
  55
  56        p = *list;
  57        switch (p->token) {
  58        case GREP_PATTERN: /* atom */
  59        case GREP_PATTERN_HEAD:
  60        case GREP_PATTERN_BODY:
  61                x = xcalloc(1, sizeof (struct grep_expr));
  62                x->node = GREP_NODE_ATOM;
  63                x->u.atom = p;
  64                *list = p->next;
  65                return x;
  66        case GREP_OPEN_PAREN:
  67                *list = p->next;
  68                x = compile_pattern_or(list);
  69                if (!x)
  70                        return NULL;
  71                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  72                        die("unmatched parenthesis");
  73                *list = (*list)->next;
  74                return x;
  75        default:
  76                return NULL;
  77        }
  78}
  79
  80static struct grep_expr *compile_pattern_not(struct grep_pat **list)
  81{
  82        struct grep_pat *p;
  83        struct grep_expr *x;
  84
  85        p = *list;
  86        switch (p->token) {
  87        case GREP_NOT:
  88                if (!p->next)
  89                        die("--not not followed by pattern expression");
  90                *list = p->next;
  91                x = xcalloc(1, sizeof (struct grep_expr));
  92                x->node = GREP_NODE_NOT;
  93                x->u.unary = compile_pattern_not(list);
  94                if (!x->u.unary)
  95                        die("--not followed by non pattern expression");
  96                return x;
  97        default:
  98                return compile_pattern_atom(list);
  99        }
 100}
 101
 102static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 103{
 104        struct grep_pat *p;
 105        struct grep_expr *x, *y, *z;
 106
 107        x = compile_pattern_not(list);
 108        p = *list;
 109        if (p && p->token == GREP_AND) {
 110                if (!p->next)
 111                        die("--and not followed by pattern expression");
 112                *list = p->next;
 113                y = compile_pattern_and(list);
 114                if (!y)
 115                        die("--and not followed by pattern expression");
 116                z = xcalloc(1, sizeof (struct grep_expr));
 117                z->node = GREP_NODE_AND;
 118                z->u.binary.left = x;
 119                z->u.binary.right = y;
 120                return z;
 121        }
 122        return x;
 123}
 124
 125static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 126{
 127        struct grep_pat *p;
 128        struct grep_expr *x, *y, *z;
 129
 130        x = compile_pattern_and(list);
 131        p = *list;
 132        if (x && p && p->token != GREP_CLOSE_PAREN) {
 133                y = compile_pattern_or(list);
 134                if (!y)
 135                        die("not a pattern expression %s", p->pattern);
 136                z = xcalloc(1, sizeof (struct grep_expr));
 137                z->node = GREP_NODE_OR;
 138                z->u.binary.left = x;
 139                z->u.binary.right = y;
 140                return z;
 141        }
 142        return x;
 143}
 144
 145static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 146{
 147        return compile_pattern_or(list);
 148}
 149
 150void compile_grep_patterns(struct grep_opt *opt)
 151{
 152        struct grep_pat *p;
 153
 154        if (opt->all_match)
 155                opt->extended = 1;
 156
 157        for (p = opt->pattern_list; p; p = p->next) {
 158                switch (p->token) {
 159                case GREP_PATTERN: /* atom */
 160                case GREP_PATTERN_HEAD:
 161                case GREP_PATTERN_BODY:
 162                        if (!opt->fixed)
 163                                compile_regexp(p, opt);
 164                        break;
 165                default:
 166                        opt->extended = 1;
 167                        break;
 168                }
 169        }
 170
 171        if (!opt->extended)
 172                return;
 173
 174        /* Then bundle them up in an expression.
 175         * A classic recursive descent parser would do.
 176         */
 177        p = opt->pattern_list;
 178        opt->pattern_expression = compile_pattern_expr(&p);
 179        if (p)
 180                die("incomplete pattern expression: %s", p->pattern);
 181}
 182
 183static void free_pattern_expr(struct grep_expr *x)
 184{
 185        switch (x->node) {
 186        case GREP_NODE_ATOM:
 187                break;
 188        case GREP_NODE_NOT:
 189                free_pattern_expr(x->u.unary);
 190                break;
 191        case GREP_NODE_AND:
 192        case GREP_NODE_OR:
 193                free_pattern_expr(x->u.binary.left);
 194                free_pattern_expr(x->u.binary.right);
 195                break;
 196        }
 197        free(x);
 198}
 199
 200void free_grep_patterns(struct grep_opt *opt)
 201{
 202        struct grep_pat *p, *n;
 203
 204        for (p = opt->pattern_list; p; p = n) {
 205                n = p->next;
 206                switch (p->token) {
 207                case GREP_PATTERN: /* atom */
 208                case GREP_PATTERN_HEAD:
 209                case GREP_PATTERN_BODY:
 210                        regfree(&p->regexp);
 211                        break;
 212                default:
 213                        break;
 214                }
 215                free(p);
 216        }
 217
 218        if (!opt->extended)
 219                return;
 220        free_pattern_expr(opt->pattern_expression);
 221}
 222
 223static char *end_of_line(char *cp, unsigned long *left)
 224{
 225        unsigned long l = *left;
 226        while (l && *cp != '\n') {
 227                l--;
 228                cp++;
 229        }
 230        *left = l;
 231        return cp;
 232}
 233
 234static int word_char(char ch)
 235{
 236        return isalnum(ch) || ch == '_';
 237}
 238
 239static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 240                      const char *name, unsigned lno, char sign)
 241{
 242        if (opt->pathname)
 243                printf("%s%c", name, sign);
 244        if (opt->linenum)
 245                printf("%d%c", lno, sign);
 246        printf("%.*s\n", (int)(eol-bol), bol);
 247}
 248
 249static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 250{
 251        char *hit = strstr(line, pattern);
 252        if (!hit) {
 253                match->rm_so = match->rm_eo = -1;
 254                return REG_NOMATCH;
 255        }
 256        else {
 257                match->rm_so = hit - line;
 258                match->rm_eo = match->rm_so + strlen(pattern);
 259                return 0;
 260        }
 261}
 262
 263static int strip_timestamp(char *bol, char **eol_p)
 264{
 265        char *eol = *eol_p;
 266        int ch;
 267
 268        while (bol < --eol) {
 269                if (*eol != '>')
 270                        continue;
 271                *eol_p = ++eol;
 272                ch = *eol;
 273                *eol = '\0';
 274                return ch;
 275        }
 276        return 0;
 277}
 278
 279static struct {
 280        const char *field;
 281        size_t len;
 282} header_field[] = {
 283        { "author ", 7 },
 284        { "committer ", 10 },
 285};
 286
 287static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 288{
 289        int hit = 0;
 290        int at_true_bol = 1;
 291        int saved_ch = 0;
 292        regmatch_t pmatch[10];
 293
 294        if ((p->token != GREP_PATTERN) &&
 295            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 296                return 0;
 297
 298        if (p->token == GREP_PATTERN_HEAD) {
 299                const char *field;
 300                size_t len;
 301                assert(p->field < ARRAY_SIZE(header_field));
 302                field = header_field[p->field].field;
 303                len = header_field[p->field].len;
 304                if (strncmp(bol, field, len))
 305                        return 0;
 306                bol += len;
 307                saved_ch = strip_timestamp(bol, &eol);
 308        }
 309
 310 again:
 311        if (!opt->fixed) {
 312                regex_t *exp = &p->regexp;
 313                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 314                               pmatch, 0);
 315        }
 316        else {
 317                hit = !fixmatch(p->pattern, bol, pmatch);
 318        }
 319
 320        if (hit && opt->word_regexp) {
 321                if ((pmatch[0].rm_so < 0) ||
 322                    (eol - bol) <= pmatch[0].rm_so ||
 323                    (pmatch[0].rm_eo < 0) ||
 324                    (eol - bol) < pmatch[0].rm_eo)
 325                        die("regexp returned nonsense");
 326
 327                /* Match beginning must be either beginning of the
 328                 * line, or at word boundary (i.e. the last char must
 329                 * not be a word char).  Similarly, match end must be
 330                 * either end of the line, or at word boundary
 331                 * (i.e. the next char must not be a word char).
 332                 */
 333                if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
 334                      !word_char(bol[pmatch[0].rm_so-1])) &&
 335                     ((pmatch[0].rm_eo == (eol-bol)) ||
 336                      !word_char(bol[pmatch[0].rm_eo])) )
 337                        ;
 338                else
 339                        hit = 0;
 340
 341                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 342                        /* There could be more than one match on the
 343                         * line, and the first match might not be
 344                         * strict word match.  But later ones could be!
 345                         */
 346                        bol = pmatch[0].rm_so + bol + 1;
 347                        at_true_bol = 0;
 348                        goto again;
 349                }
 350        }
 351        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 352                *eol = saved_ch;
 353        return hit;
 354}
 355
 356static int match_expr_eval(struct grep_opt *o,
 357                           struct grep_expr *x,
 358                           char *bol, char *eol,
 359                           enum grep_context ctx,
 360                           int collect_hits)
 361{
 362        int h = 0;
 363
 364        switch (x->node) {
 365        case GREP_NODE_ATOM:
 366                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 367                break;
 368        case GREP_NODE_NOT:
 369                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 370                break;
 371        case GREP_NODE_AND:
 372                if (!collect_hits)
 373                        return (match_expr_eval(o, x->u.binary.left,
 374                                                bol, eol, ctx, 0) &&
 375                                match_expr_eval(o, x->u.binary.right,
 376                                                bol, eol, ctx, 0));
 377                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 378                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 379                break;
 380        case GREP_NODE_OR:
 381                if (!collect_hits)
 382                        return (match_expr_eval(o, x->u.binary.left,
 383                                                bol, eol, ctx, 0) ||
 384                                match_expr_eval(o, x->u.binary.right,
 385                                                bol, eol, ctx, 0));
 386                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 387                x->u.binary.left->hit |= h;
 388                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 389                break;
 390        default:
 391                die("Unexpected node type (internal error) %d\n", x->node);
 392        }
 393        if (collect_hits)
 394                x->hit |= h;
 395        return h;
 396}
 397
 398static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 399                      enum grep_context ctx, int collect_hits)
 400{
 401        struct grep_expr *x = opt->pattern_expression;
 402        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 403}
 404
 405static int match_line(struct grep_opt *opt, char *bol, char *eol,
 406                      enum grep_context ctx, int collect_hits)
 407{
 408        struct grep_pat *p;
 409        if (opt->extended)
 410                return match_expr(opt, bol, eol, ctx, collect_hits);
 411
 412        /* we do not call with collect_hits without being extended */
 413        for (p = opt->pattern_list; p; p = p->next) {
 414                if (match_one_pattern(opt, p, bol, eol, ctx))
 415                        return 1;
 416        }
 417        return 0;
 418}
 419
 420static int grep_buffer_1(struct grep_opt *opt, const char *name,
 421                         char *buf, unsigned long size, int collect_hits)
 422{
 423        char *bol = buf;
 424        unsigned long left = size;
 425        unsigned lno = 1;
 426        struct pre_context_line {
 427                char *bol;
 428                char *eol;
 429        } *prev = NULL, *pcl;
 430        unsigned last_hit = 0;
 431        unsigned last_shown = 0;
 432        int binary_match_only = 0;
 433        const char *hunk_mark = "";
 434        unsigned count = 0;
 435        enum grep_context ctx = GREP_CONTEXT_HEAD;
 436
 437        if (buffer_is_binary(buf, size)) {
 438                switch (opt->binary) {
 439                case GREP_BINARY_DEFAULT:
 440                        binary_match_only = 1;
 441                        break;
 442                case GREP_BINARY_NOMATCH:
 443                        return 0; /* Assume unmatch */
 444                        break;
 445                default:
 446                        break;
 447                }
 448        }
 449
 450        if (opt->pre_context)
 451                prev = xcalloc(opt->pre_context, sizeof(*prev));
 452        if (opt->pre_context || opt->post_context)
 453                hunk_mark = "--\n";
 454
 455        while (left) {
 456                char *eol, ch;
 457                int hit;
 458
 459                eol = end_of_line(bol, &left);
 460                ch = *eol;
 461                *eol = 0;
 462
 463                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 464                        ctx = GREP_CONTEXT_BODY;
 465
 466                hit = match_line(opt, bol, eol, ctx, collect_hits);
 467                *eol = ch;
 468
 469                if (collect_hits)
 470                        goto next_line;
 471
 472                /* "grep -v -e foo -e bla" should list lines
 473                 * that do not have either, so inversion should
 474                 * be done outside.
 475                 */
 476                if (opt->invert)
 477                        hit = !hit;
 478                if (opt->unmatch_name_only) {
 479                        if (hit)
 480                                return 0;
 481                        goto next_line;
 482                }
 483                if (hit) {
 484                        count++;
 485                        if (opt->status_only)
 486                                return 1;
 487                        if (binary_match_only) {
 488                                printf("Binary file %s matches\n", name);
 489                                return 1;
 490                        }
 491                        if (opt->name_only) {
 492                                printf("%s\n", name);
 493                                return 1;
 494                        }
 495                        /* Hit at this line.  If we haven't shown the
 496                         * pre-context lines, we would need to show them.
 497                         * When asked to do "count", this still show
 498                         * the context which is nonsense, but the user
 499                         * deserves to get that ;-).
 500                         */
 501                        if (opt->pre_context) {
 502                                unsigned from;
 503                                if (opt->pre_context < lno)
 504                                        from = lno - opt->pre_context;
 505                                else
 506                                        from = 1;
 507                                if (from <= last_shown)
 508                                        from = last_shown + 1;
 509                                if (last_shown && from != last_shown + 1)
 510                                        printf(hunk_mark);
 511                                while (from < lno) {
 512                                        pcl = &prev[lno-from-1];
 513                                        show_line(opt, pcl->bol, pcl->eol,
 514                                                  name, from, '-');
 515                                        from++;
 516                                }
 517                                last_shown = lno-1;
 518                        }
 519                        if (last_shown && lno != last_shown + 1)
 520                                printf(hunk_mark);
 521                        if (!opt->count)
 522                                show_line(opt, bol, eol, name, lno, ':');
 523                        last_shown = last_hit = lno;
 524                }
 525                else if (last_hit &&
 526                         lno <= last_hit + opt->post_context) {
 527                        /* If the last hit is within the post context,
 528                         * we need to show this line.
 529                         */
 530                        if (last_shown && lno != last_shown + 1)
 531                                printf(hunk_mark);
 532                        show_line(opt, bol, eol, name, lno, '-');
 533                        last_shown = lno;
 534                }
 535                if (opt->pre_context) {
 536                        memmove(prev+1, prev,
 537                                (opt->pre_context-1) * sizeof(*prev));
 538                        prev->bol = bol;
 539                        prev->eol = eol;
 540                }
 541
 542        next_line:
 543                bol = eol + 1;
 544                if (!left)
 545                        break;
 546                left--;
 547                lno++;
 548        }
 549
 550        free(prev);
 551        if (collect_hits)
 552                return 0;
 553
 554        if (opt->status_only)
 555                return 0;
 556        if (opt->unmatch_name_only) {
 557                /* We did not see any hit, so we want to show this */
 558                printf("%s\n", name);
 559                return 1;
 560        }
 561
 562        /* NEEDSWORK:
 563         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 564         * which feels mostly useless but sometimes useful.  Maybe
 565         * make it another option?  For now suppress them.
 566         */
 567        if (opt->count && count)
 568                printf("%s:%u\n", name, count);
 569        return !!last_hit;
 570}
 571
 572static void clr_hit_marker(struct grep_expr *x)
 573{
 574        /* All-hit markers are meaningful only at the very top level
 575         * OR node.
 576         */
 577        while (1) {
 578                x->hit = 0;
 579                if (x->node != GREP_NODE_OR)
 580                        return;
 581                x->u.binary.left->hit = 0;
 582                x = x->u.binary.right;
 583        }
 584}
 585
 586static int chk_hit_marker(struct grep_expr *x)
 587{
 588        /* Top level nodes have hit markers.  See if they all are hits */
 589        while (1) {
 590                if (x->node != GREP_NODE_OR)
 591                        return x->hit;
 592                if (!x->u.binary.left->hit)
 593                        return 0;
 594                x = x->u.binary.right;
 595        }
 596}
 597
 598int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 599{
 600        /*
 601         * we do not have to do the two-pass grep when we do not check
 602         * buffer-wide "all-match".
 603         */
 604        if (!opt->all_match)
 605                return grep_buffer_1(opt, name, buf, size, 0);
 606
 607        /* Otherwise the toplevel "or" terms hit a bit differently.
 608         * We first clear hit markers from them.
 609         */
 610        clr_hit_marker(opt->pattern_expression);
 611        grep_buffer_1(opt, name, buf, size, 1);
 612
 613        if (!chk_hit_marker(opt->pattern_expression))
 614                return 0;
 615
 616        return grep_buffer_1(opt, name, buf, size, 0);
 617}