22a56b5d543d7924d4bd76d3d220b026b23ed3a9
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  32{
  33        int err = regcomp(&p->regexp, p->pattern, opt->regflags);
  34        if (err) {
  35                char errbuf[1024];
  36                char where[1024];
  37                if (p->no)
  38                        sprintf(where, "In '%s' at %d, ",
  39                                p->origin, p->no);
  40                else if (p->origin)
  41                        sprintf(where, "%s, ", p->origin);
  42                else
  43                        where[0] = 0;
  44                regerror(err, &p->regexp, errbuf, 1024);
  45                regfree(&p->regexp);
  46                die("%s'%s': %s", where, p->pattern, errbuf);
  47        }
  48}
  49
  50static struct grep_expr *compile_pattern_or(struct grep_pat **);
  51static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  52{
  53        struct grep_pat *p;
  54        struct grep_expr *x;
  55
  56        p = *list;
  57        switch (p->token) {
  58        case GREP_PATTERN: /* atom */
  59        case GREP_PATTERN_HEAD:
  60        case GREP_PATTERN_BODY:
  61                x = xcalloc(1, sizeof (struct grep_expr));
  62                x->node = GREP_NODE_ATOM;
  63                x->u.atom = p;
  64                *list = p->next;
  65                return x;
  66        case GREP_OPEN_PAREN:
  67                *list = p->next;
  68                x = compile_pattern_or(list);
  69                if (!x)
  70                        return NULL;
  71                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  72                        die("unmatched parenthesis");
  73                *list = (*list)->next;
  74                return x;
  75        default:
  76                return NULL;
  77        }
  78}
  79
  80static struct grep_expr *compile_pattern_not(struct grep_pat **list)
  81{
  82        struct grep_pat *p;
  83        struct grep_expr *x;
  84
  85        p = *list;
  86        switch (p->token) {
  87        case GREP_NOT:
  88                if (!p->next)
  89                        die("--not not followed by pattern expression");
  90                *list = p->next;
  91                x = xcalloc(1, sizeof (struct grep_expr));
  92                x->node = GREP_NODE_NOT;
  93                x->u.unary = compile_pattern_not(list);
  94                if (!x->u.unary)
  95                        die("--not followed by non pattern expression");
  96                return x;
  97        default:
  98                return compile_pattern_atom(list);
  99        }
 100}
 101
 102static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 103{
 104        struct grep_pat *p;
 105        struct grep_expr *x, *y, *z;
 106
 107        x = compile_pattern_not(list);
 108        p = *list;
 109        if (p && p->token == GREP_AND) {
 110                if (!p->next)
 111                        die("--and not followed by pattern expression");
 112                *list = p->next;
 113                y = compile_pattern_and(list);
 114                if (!y)
 115                        die("--and not followed by pattern expression");
 116                z = xcalloc(1, sizeof (struct grep_expr));
 117                z->node = GREP_NODE_AND;
 118                z->u.binary.left = x;
 119                z->u.binary.right = y;
 120                return z;
 121        }
 122        return x;
 123}
 124
 125static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 126{
 127        struct grep_pat *p;
 128        struct grep_expr *x, *y, *z;
 129
 130        x = compile_pattern_and(list);
 131        p = *list;
 132        if (x && p && p->token != GREP_CLOSE_PAREN) {
 133                y = compile_pattern_or(list);
 134                if (!y)
 135                        die("not a pattern expression %s", p->pattern);
 136                z = xcalloc(1, sizeof (struct grep_expr));
 137                z->node = GREP_NODE_OR;
 138                z->u.binary.left = x;
 139                z->u.binary.right = y;
 140                return z;
 141        }
 142        return x;
 143}
 144
 145static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 146{
 147        return compile_pattern_or(list);
 148}
 149
 150void compile_grep_patterns(struct grep_opt *opt)
 151{
 152        struct grep_pat *p;
 153
 154        if (opt->all_match)
 155                opt->extended = 1;
 156
 157        for (p = opt->pattern_list; p; p = p->next) {
 158                switch (p->token) {
 159                case GREP_PATTERN: /* atom */
 160                case GREP_PATTERN_HEAD:
 161                case GREP_PATTERN_BODY:
 162                        if (!opt->fixed)
 163                                compile_regexp(p, opt);
 164                        break;
 165                default:
 166                        opt->extended = 1;
 167                        break;
 168                }
 169        }
 170
 171        if (!opt->extended)
 172                return;
 173
 174        /* Then bundle them up in an expression.
 175         * A classic recursive descent parser would do.
 176         */
 177        p = opt->pattern_list;
 178        opt->pattern_expression = compile_pattern_expr(&p);
 179        if (p)
 180                die("incomplete pattern expression: %s", p->pattern);
 181}
 182
 183static void free_pattern_expr(struct grep_expr *x)
 184{
 185        switch (x->node) {
 186        case GREP_NODE_ATOM:
 187                break;
 188        case GREP_NODE_NOT:
 189                free_pattern_expr(x->u.unary);
 190                break;
 191        case GREP_NODE_AND:
 192        case GREP_NODE_OR:
 193                free_pattern_expr(x->u.binary.left);
 194                free_pattern_expr(x->u.binary.right);
 195                break;
 196        }
 197        free(x);
 198}
 199
 200void free_grep_patterns(struct grep_opt *opt)
 201{
 202        struct grep_pat *p, *n;
 203
 204        for (p = opt->pattern_list; p; p = n) {
 205                n = p->next;
 206                switch (p->token) {
 207                case GREP_PATTERN: /* atom */
 208                case GREP_PATTERN_HEAD:
 209                case GREP_PATTERN_BODY:
 210                        regfree(&p->regexp);
 211                        break;
 212                default:
 213                        break;
 214                }
 215                free(p);
 216        }
 217
 218        if (!opt->extended)
 219                return;
 220        free_pattern_expr(opt->pattern_expression);
 221}
 222
 223static char *end_of_line(char *cp, unsigned long *left)
 224{
 225        unsigned long l = *left;
 226        while (l && *cp != '\n') {
 227                l--;
 228                cp++;
 229        }
 230        *left = l;
 231        return cp;
 232}
 233
 234static int word_char(char ch)
 235{
 236        return isalnum(ch) || ch == '_';
 237}
 238
 239static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 240                      const char *name, unsigned lno, char sign)
 241{
 242        if (opt->null_following_name)
 243                sign = '\0';
 244        if (opt->pathname)
 245                printf("%s%c", name, sign);
 246        if (opt->linenum)
 247                printf("%d%c", lno, sign);
 248        printf("%.*s\n", (int)(eol-bol), bol);
 249}
 250
 251static void show_name(struct grep_opt *opt, const char *name)
 252{
 253        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
 254}
 255
 256static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 257{
 258        char *hit = strstr(line, pattern);
 259        if (!hit) {
 260                match->rm_so = match->rm_eo = -1;
 261                return REG_NOMATCH;
 262        }
 263        else {
 264                match->rm_so = hit - line;
 265                match->rm_eo = match->rm_so + strlen(pattern);
 266                return 0;
 267        }
 268}
 269
 270static int strip_timestamp(char *bol, char **eol_p)
 271{
 272        char *eol = *eol_p;
 273        int ch;
 274
 275        while (bol < --eol) {
 276                if (*eol != '>')
 277                        continue;
 278                *eol_p = ++eol;
 279                ch = *eol;
 280                *eol = '\0';
 281                return ch;
 282        }
 283        return 0;
 284}
 285
 286static struct {
 287        const char *field;
 288        size_t len;
 289} header_field[] = {
 290        { "author ", 7 },
 291        { "committer ", 10 },
 292};
 293
 294static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 295{
 296        int hit = 0;
 297        int saved_ch = 0;
 298        regmatch_t pmatch[10];
 299
 300        if ((p->token != GREP_PATTERN) &&
 301            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 302                return 0;
 303
 304        if (p->token == GREP_PATTERN_HEAD) {
 305                const char *field;
 306                size_t len;
 307                assert(p->field < ARRAY_SIZE(header_field));
 308                field = header_field[p->field].field;
 309                len = header_field[p->field].len;
 310                if (strncmp(bol, field, len))
 311                        return 0;
 312                bol += len;
 313                saved_ch = strip_timestamp(bol, &eol);
 314        }
 315
 316 again:
 317        if (!opt->fixed) {
 318                regex_t *exp = &p->regexp;
 319                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 320                               pmatch, 0);
 321        }
 322        else {
 323                hit = !fixmatch(p->pattern, bol, pmatch);
 324        }
 325
 326        if (hit && opt->word_regexp) {
 327                if ((pmatch[0].rm_so < 0) ||
 328                    (eol - bol) <= pmatch[0].rm_so ||
 329                    (pmatch[0].rm_eo < 0) ||
 330                    (eol - bol) < pmatch[0].rm_eo)
 331                        die("regexp returned nonsense");
 332
 333                /* Match beginning must be either beginning of the
 334                 * line, or at word boundary (i.e. the last char must
 335                 * not be a word char).  Similarly, match end must be
 336                 * either end of the line, or at word boundary
 337                 * (i.e. the next char must not be a word char).
 338                 */
 339                if ( ((pmatch[0].rm_so == 0) ||
 340                      !word_char(bol[pmatch[0].rm_so-1])) &&
 341                     ((pmatch[0].rm_eo == (eol-bol)) ||
 342                      !word_char(bol[pmatch[0].rm_eo])) )
 343                        ;
 344                else
 345                        hit = 0;
 346
 347                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 348                        /* There could be more than one match on the
 349                         * line, and the first match might not be
 350                         * strict word match.  But later ones could be!
 351                         * Forward to the next possible start, i.e. the
 352                         * next position following a non-word char.
 353                         */
 354                        bol = pmatch[0].rm_so + bol + 1;
 355                        while (word_char(bol[-1]) && bol < eol)
 356                                bol++;
 357                        if (bol < eol)
 358                                goto again;
 359                }
 360        }
 361        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 362                *eol = saved_ch;
 363        return hit;
 364}
 365
 366static int match_expr_eval(struct grep_opt *o,
 367                           struct grep_expr *x,
 368                           char *bol, char *eol,
 369                           enum grep_context ctx,
 370                           int collect_hits)
 371{
 372        int h = 0;
 373
 374        switch (x->node) {
 375        case GREP_NODE_ATOM:
 376                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 377                break;
 378        case GREP_NODE_NOT:
 379                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 380                break;
 381        case GREP_NODE_AND:
 382                if (!collect_hits)
 383                        return (match_expr_eval(o, x->u.binary.left,
 384                                                bol, eol, ctx, 0) &&
 385                                match_expr_eval(o, x->u.binary.right,
 386                                                bol, eol, ctx, 0));
 387                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 388                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 389                break;
 390        case GREP_NODE_OR:
 391                if (!collect_hits)
 392                        return (match_expr_eval(o, x->u.binary.left,
 393                                                bol, eol, ctx, 0) ||
 394                                match_expr_eval(o, x->u.binary.right,
 395                                                bol, eol, ctx, 0));
 396                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 397                x->u.binary.left->hit |= h;
 398                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 399                break;
 400        default:
 401                die("Unexpected node type (internal error) %d", x->node);
 402        }
 403        if (collect_hits)
 404                x->hit |= h;
 405        return h;
 406}
 407
 408static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 409                      enum grep_context ctx, int collect_hits)
 410{
 411        struct grep_expr *x = opt->pattern_expression;
 412        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 413}
 414
 415static int match_line(struct grep_opt *opt, char *bol, char *eol,
 416                      enum grep_context ctx, int collect_hits)
 417{
 418        struct grep_pat *p;
 419        if (opt->extended)
 420                return match_expr(opt, bol, eol, ctx, collect_hits);
 421
 422        /* we do not call with collect_hits without being extended */
 423        for (p = opt->pattern_list; p; p = p->next) {
 424                if (match_one_pattern(opt, p, bol, eol, ctx))
 425                        return 1;
 426        }
 427        return 0;
 428}
 429
 430static int grep_buffer_1(struct grep_opt *opt, const char *name,
 431                         char *buf, unsigned long size, int collect_hits)
 432{
 433        char *bol = buf;
 434        unsigned long left = size;
 435        unsigned lno = 1;
 436        struct pre_context_line {
 437                char *bol;
 438                char *eol;
 439        } *prev = NULL, *pcl;
 440        unsigned last_hit = 0;
 441        unsigned last_shown = 0;
 442        int binary_match_only = 0;
 443        const char *hunk_mark = "";
 444        unsigned count = 0;
 445        enum grep_context ctx = GREP_CONTEXT_HEAD;
 446
 447        if (buffer_is_binary(buf, size)) {
 448                switch (opt->binary) {
 449                case GREP_BINARY_DEFAULT:
 450                        binary_match_only = 1;
 451                        break;
 452                case GREP_BINARY_NOMATCH:
 453                        return 0; /* Assume unmatch */
 454                        break;
 455                default:
 456                        break;
 457                }
 458        }
 459
 460        if (opt->pre_context)
 461                prev = xcalloc(opt->pre_context, sizeof(*prev));
 462        if (opt->pre_context || opt->post_context)
 463                hunk_mark = "--\n";
 464
 465        while (left) {
 466                char *eol, ch;
 467                int hit;
 468
 469                eol = end_of_line(bol, &left);
 470                ch = *eol;
 471                *eol = 0;
 472
 473                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 474                        ctx = GREP_CONTEXT_BODY;
 475
 476                hit = match_line(opt, bol, eol, ctx, collect_hits);
 477                *eol = ch;
 478
 479                if (collect_hits)
 480                        goto next_line;
 481
 482                /* "grep -v -e foo -e bla" should list lines
 483                 * that do not have either, so inversion should
 484                 * be done outside.
 485                 */
 486                if (opt->invert)
 487                        hit = !hit;
 488                if (opt->unmatch_name_only) {
 489                        if (hit)
 490                                return 0;
 491                        goto next_line;
 492                }
 493                if (hit) {
 494                        count++;
 495                        if (opt->status_only)
 496                                return 1;
 497                        if (binary_match_only) {
 498                                printf("Binary file %s matches\n", name);
 499                                return 1;
 500                        }
 501                        if (opt->name_only) {
 502                                show_name(opt, name);
 503                                return 1;
 504                        }
 505                        /* Hit at this line.  If we haven't shown the
 506                         * pre-context lines, we would need to show them.
 507                         * When asked to do "count", this still show
 508                         * the context which is nonsense, but the user
 509                         * deserves to get that ;-).
 510                         */
 511                        if (opt->pre_context) {
 512                                unsigned from;
 513                                if (opt->pre_context < lno)
 514                                        from = lno - opt->pre_context;
 515                                else
 516                                        from = 1;
 517                                if (from <= last_shown)
 518                                        from = last_shown + 1;
 519                                if (last_shown && from != last_shown + 1)
 520                                        fputs(hunk_mark, stdout);
 521                                while (from < lno) {
 522                                        pcl = &prev[lno-from-1];
 523                                        show_line(opt, pcl->bol, pcl->eol,
 524                                                  name, from, '-');
 525                                        from++;
 526                                }
 527                                last_shown = lno-1;
 528                        }
 529                        if (last_shown && lno != last_shown + 1)
 530                                fputs(hunk_mark, stdout);
 531                        if (!opt->count)
 532                                show_line(opt, bol, eol, name, lno, ':');
 533                        last_shown = last_hit = lno;
 534                }
 535                else if (last_hit &&
 536                         lno <= last_hit + opt->post_context) {
 537                        /* If the last hit is within the post context,
 538                         * we need to show this line.
 539                         */
 540                        if (last_shown && lno != last_shown + 1)
 541                                fputs(hunk_mark, stdout);
 542                        show_line(opt, bol, eol, name, lno, '-');
 543                        last_shown = lno;
 544                }
 545                if (opt->pre_context) {
 546                        memmove(prev+1, prev,
 547                                (opt->pre_context-1) * sizeof(*prev));
 548                        prev->bol = bol;
 549                        prev->eol = eol;
 550                }
 551
 552        next_line:
 553                bol = eol + 1;
 554                if (!left)
 555                        break;
 556                left--;
 557                lno++;
 558        }
 559
 560        free(prev);
 561        if (collect_hits)
 562                return 0;
 563
 564        if (opt->status_only)
 565                return 0;
 566        if (opt->unmatch_name_only) {
 567                /* We did not see any hit, so we want to show this */
 568                show_name(opt, name);
 569                return 1;
 570        }
 571
 572        /* NEEDSWORK:
 573         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 574         * which feels mostly useless but sometimes useful.  Maybe
 575         * make it another option?  For now suppress them.
 576         */
 577        if (opt->count && count)
 578                printf("%s%c%u\n", name,
 579                       opt->null_following_name ? '\0' : ':', count);
 580        return !!last_hit;
 581}
 582
 583static void clr_hit_marker(struct grep_expr *x)
 584{
 585        /* All-hit markers are meaningful only at the very top level
 586         * OR node.
 587         */
 588        while (1) {
 589                x->hit = 0;
 590                if (x->node != GREP_NODE_OR)
 591                        return;
 592                x->u.binary.left->hit = 0;
 593                x = x->u.binary.right;
 594        }
 595}
 596
 597static int chk_hit_marker(struct grep_expr *x)
 598{
 599        /* Top level nodes have hit markers.  See if they all are hits */
 600        while (1) {
 601                if (x->node != GREP_NODE_OR)
 602                        return x->hit;
 603                if (!x->u.binary.left->hit)
 604                        return 0;
 605                x = x->u.binary.right;
 606        }
 607}
 608
 609int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 610{
 611        /*
 612         * we do not have to do the two-pass grep when we do not check
 613         * buffer-wide "all-match".
 614         */
 615        if (!opt->all_match)
 616                return grep_buffer_1(opt, name, buf, size, 0);
 617
 618        /* Otherwise the toplevel "or" terms hit a bit differently.
 619         * We first clear hit markers from them.
 620         */
 621        clr_hit_marker(opt->pattern_expression);
 622        grep_buffer_1(opt, name, buf, size, 1);
 623
 624        if (!chk_hit_marker(opt->pattern_expression))
 625                return 0;
 626
 627        return grep_buffer_1(opt, name, buf, size, 0);
 628}