grep.con commit Merge branch 'maint-1.6.0' into maint-1.6.1 (96aa7ad)
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  32{
  33        int err = regcomp(&p->regexp, p->pattern, opt->regflags);
  34        if (err) {
  35                char errbuf[1024];
  36                char where[1024];
  37                if (p->no)
  38                        sprintf(where, "In '%s' at %d, ",
  39                                p->origin, p->no);
  40                else if (p->origin)
  41                        sprintf(where, "%s, ", p->origin);
  42                else
  43                        where[0] = 0;
  44                regerror(err, &p->regexp, errbuf, 1024);
  45                regfree(&p->regexp);
  46                die("%s'%s': %s", where, p->pattern, errbuf);
  47        }
  48}
  49
  50static struct grep_expr *compile_pattern_or(struct grep_pat **);
  51static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  52{
  53        struct grep_pat *p;
  54        struct grep_expr *x;
  55
  56        p = *list;
  57        if (!p)
  58                return NULL;
  59        switch (p->token) {
  60        case GREP_PATTERN: /* atom */
  61        case GREP_PATTERN_HEAD:
  62        case GREP_PATTERN_BODY:
  63                x = xcalloc(1, sizeof (struct grep_expr));
  64                x->node = GREP_NODE_ATOM;
  65                x->u.atom = p;
  66                *list = p->next;
  67                return x;
  68        case GREP_OPEN_PAREN:
  69                *list = p->next;
  70                x = compile_pattern_or(list);
  71                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  72                        die("unmatched parenthesis");
  73                *list = (*list)->next;
  74                return x;
  75        default:
  76                return NULL;
  77        }
  78}
  79
  80static struct grep_expr *compile_pattern_not(struct grep_pat **list)
  81{
  82        struct grep_pat *p;
  83        struct grep_expr *x;
  84
  85        p = *list;
  86        if (!p)
  87                return NULL;
  88        switch (p->token) {
  89        case GREP_NOT:
  90                if (!p->next)
  91                        die("--not not followed by pattern expression");
  92                *list = p->next;
  93                x = xcalloc(1, sizeof (struct grep_expr));
  94                x->node = GREP_NODE_NOT;
  95                x->u.unary = compile_pattern_not(list);
  96                if (!x->u.unary)
  97                        die("--not followed by non pattern expression");
  98                return x;
  99        default:
 100                return compile_pattern_atom(list);
 101        }
 102}
 103
 104static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 105{
 106        struct grep_pat *p;
 107        struct grep_expr *x, *y, *z;
 108
 109        x = compile_pattern_not(list);
 110        p = *list;
 111        if (p && p->token == GREP_AND) {
 112                if (!p->next)
 113                        die("--and not followed by pattern expression");
 114                *list = p->next;
 115                y = compile_pattern_and(list);
 116                if (!y)
 117                        die("--and not followed by pattern expression");
 118                z = xcalloc(1, sizeof (struct grep_expr));
 119                z->node = GREP_NODE_AND;
 120                z->u.binary.left = x;
 121                z->u.binary.right = y;
 122                return z;
 123        }
 124        return x;
 125}
 126
 127static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 128{
 129        struct grep_pat *p;
 130        struct grep_expr *x, *y, *z;
 131
 132        x = compile_pattern_and(list);
 133        p = *list;
 134        if (x && p && p->token != GREP_CLOSE_PAREN) {
 135                y = compile_pattern_or(list);
 136                if (!y)
 137                        die("not a pattern expression %s", p->pattern);
 138                z = xcalloc(1, sizeof (struct grep_expr));
 139                z->node = GREP_NODE_OR;
 140                z->u.binary.left = x;
 141                z->u.binary.right = y;
 142                return z;
 143        }
 144        return x;
 145}
 146
 147static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 148{
 149        return compile_pattern_or(list);
 150}
 151
 152void compile_grep_patterns(struct grep_opt *opt)
 153{
 154        struct grep_pat *p;
 155
 156        if (opt->all_match)
 157                opt->extended = 1;
 158
 159        for (p = opt->pattern_list; p; p = p->next) {
 160                switch (p->token) {
 161                case GREP_PATTERN: /* atom */
 162                case GREP_PATTERN_HEAD:
 163                case GREP_PATTERN_BODY:
 164                        if (!opt->fixed)
 165                                compile_regexp(p, opt);
 166                        break;
 167                default:
 168                        opt->extended = 1;
 169                        break;
 170                }
 171        }
 172
 173        if (!opt->extended)
 174                return;
 175
 176        /* Then bundle them up in an expression.
 177         * A classic recursive descent parser would do.
 178         */
 179        p = opt->pattern_list;
 180        opt->pattern_expression = compile_pattern_expr(&p);
 181        if (p)
 182                die("incomplete pattern expression: %s", p->pattern);
 183}
 184
 185static void free_pattern_expr(struct grep_expr *x)
 186{
 187        switch (x->node) {
 188        case GREP_NODE_ATOM:
 189                break;
 190        case GREP_NODE_NOT:
 191                free_pattern_expr(x->u.unary);
 192                break;
 193        case GREP_NODE_AND:
 194        case GREP_NODE_OR:
 195                free_pattern_expr(x->u.binary.left);
 196                free_pattern_expr(x->u.binary.right);
 197                break;
 198        }
 199        free(x);
 200}
 201
 202void free_grep_patterns(struct grep_opt *opt)
 203{
 204        struct grep_pat *p, *n;
 205
 206        for (p = opt->pattern_list; p; p = n) {
 207                n = p->next;
 208                switch (p->token) {
 209                case GREP_PATTERN: /* atom */
 210                case GREP_PATTERN_HEAD:
 211                case GREP_PATTERN_BODY:
 212                        regfree(&p->regexp);
 213                        break;
 214                default:
 215                        break;
 216                }
 217                free(p);
 218        }
 219
 220        if (!opt->extended)
 221                return;
 222        free_pattern_expr(opt->pattern_expression);
 223}
 224
 225static char *end_of_line(char *cp, unsigned long *left)
 226{
 227        unsigned long l = *left;
 228        while (l && *cp != '\n') {
 229                l--;
 230                cp++;
 231        }
 232        *left = l;
 233        return cp;
 234}
 235
 236static int word_char(char ch)
 237{
 238        return isalnum(ch) || ch == '_';
 239}
 240
 241static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 242                      const char *name, unsigned lno, char sign)
 243{
 244        if (opt->null_following_name)
 245                sign = '\0';
 246        if (opt->pathname)
 247                printf("%s%c", name, sign);
 248        if (opt->linenum)
 249                printf("%d%c", lno, sign);
 250        printf("%.*s\n", (int)(eol-bol), bol);
 251}
 252
 253static void show_name(struct grep_opt *opt, const char *name)
 254{
 255        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
 256}
 257
 258static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 259{
 260        char *hit = strstr(line, pattern);
 261        if (!hit) {
 262                match->rm_so = match->rm_eo = -1;
 263                return REG_NOMATCH;
 264        }
 265        else {
 266                match->rm_so = hit - line;
 267                match->rm_eo = match->rm_so + strlen(pattern);
 268                return 0;
 269        }
 270}
 271
 272static int strip_timestamp(char *bol, char **eol_p)
 273{
 274        char *eol = *eol_p;
 275        int ch;
 276
 277        while (bol < --eol) {
 278                if (*eol != '>')
 279                        continue;
 280                *eol_p = ++eol;
 281                ch = *eol;
 282                *eol = '\0';
 283                return ch;
 284        }
 285        return 0;
 286}
 287
 288static struct {
 289        const char *field;
 290        size_t len;
 291} header_field[] = {
 292        { "author ", 7 },
 293        { "committer ", 10 },
 294};
 295
 296static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 297{
 298        int hit = 0;
 299        int at_true_bol = 1;
 300        int saved_ch = 0;
 301        regmatch_t pmatch[10];
 302
 303        if ((p->token != GREP_PATTERN) &&
 304            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 305                return 0;
 306
 307        if (p->token == GREP_PATTERN_HEAD) {
 308                const char *field;
 309                size_t len;
 310                assert(p->field < ARRAY_SIZE(header_field));
 311                field = header_field[p->field].field;
 312                len = header_field[p->field].len;
 313                if (strncmp(bol, field, len))
 314                        return 0;
 315                bol += len;
 316                saved_ch = strip_timestamp(bol, &eol);
 317        }
 318
 319 again:
 320        if (!opt->fixed) {
 321                regex_t *exp = &p->regexp;
 322                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 323                               pmatch, 0);
 324        }
 325        else {
 326                hit = !fixmatch(p->pattern, bol, pmatch);
 327        }
 328
 329        if (hit && opt->word_regexp) {
 330                if ((pmatch[0].rm_so < 0) ||
 331                    (eol - bol) <= pmatch[0].rm_so ||
 332                    (pmatch[0].rm_eo < 0) ||
 333                    (eol - bol) < pmatch[0].rm_eo)
 334                        die("regexp returned nonsense");
 335
 336                /* Match beginning must be either beginning of the
 337                 * line, or at word boundary (i.e. the last char must
 338                 * not be a word char).  Similarly, match end must be
 339                 * either end of the line, or at word boundary
 340                 * (i.e. the next char must not be a word char).
 341                 */
 342                if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
 343                      !word_char(bol[pmatch[0].rm_so-1])) &&
 344                     ((pmatch[0].rm_eo == (eol-bol)) ||
 345                      !word_char(bol[pmatch[0].rm_eo])) )
 346                        ;
 347                else
 348                        hit = 0;
 349
 350                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 351                        /* There could be more than one match on the
 352                         * line, and the first match might not be
 353                         * strict word match.  But later ones could be!
 354                         */
 355                        bol = pmatch[0].rm_so + bol + 1;
 356                        at_true_bol = 0;
 357                        goto again;
 358                }
 359        }
 360        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 361                *eol = saved_ch;
 362        return hit;
 363}
 364
 365static int match_expr_eval(struct grep_opt *o,
 366                           struct grep_expr *x,
 367                           char *bol, char *eol,
 368                           enum grep_context ctx,
 369                           int collect_hits)
 370{
 371        int h = 0;
 372
 373        if (!x)
 374                die("Not a valid grep expression");
 375        switch (x->node) {
 376        case GREP_NODE_ATOM:
 377                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 378                break;
 379        case GREP_NODE_NOT:
 380                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 381                break;
 382        case GREP_NODE_AND:
 383                if (!collect_hits)
 384                        return (match_expr_eval(o, x->u.binary.left,
 385                                                bol, eol, ctx, 0) &&
 386                                match_expr_eval(o, x->u.binary.right,
 387                                                bol, eol, ctx, 0));
 388                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 389                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 390                break;
 391        case GREP_NODE_OR:
 392                if (!collect_hits)
 393                        return (match_expr_eval(o, x->u.binary.left,
 394                                                bol, eol, ctx, 0) ||
 395                                match_expr_eval(o, x->u.binary.right,
 396                                                bol, eol, ctx, 0));
 397                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 398                x->u.binary.left->hit |= h;
 399                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 400                break;
 401        default:
 402                die("Unexpected node type (internal error) %d\n", x->node);
 403        }
 404        if (collect_hits)
 405                x->hit |= h;
 406        return h;
 407}
 408
 409static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 410                      enum grep_context ctx, int collect_hits)
 411{
 412        struct grep_expr *x = opt->pattern_expression;
 413        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 414}
 415
 416static int match_line(struct grep_opt *opt, char *bol, char *eol,
 417                      enum grep_context ctx, int collect_hits)
 418{
 419        struct grep_pat *p;
 420        if (opt->extended)
 421                return match_expr(opt, bol, eol, ctx, collect_hits);
 422
 423        /* we do not call with collect_hits without being extended */
 424        for (p = opt->pattern_list; p; p = p->next) {
 425                if (match_one_pattern(opt, p, bol, eol, ctx))
 426                        return 1;
 427        }
 428        return 0;
 429}
 430
 431static int grep_buffer_1(struct grep_opt *opt, const char *name,
 432                         char *buf, unsigned long size, int collect_hits)
 433{
 434        char *bol = buf;
 435        unsigned long left = size;
 436        unsigned lno = 1;
 437        struct pre_context_line {
 438                char *bol;
 439                char *eol;
 440        } *prev = NULL, *pcl;
 441        unsigned last_hit = 0;
 442        unsigned last_shown = 0;
 443        int binary_match_only = 0;
 444        const char *hunk_mark = "";
 445        unsigned count = 0;
 446        enum grep_context ctx = GREP_CONTEXT_HEAD;
 447
 448        if (buffer_is_binary(buf, size)) {
 449                switch (opt->binary) {
 450                case GREP_BINARY_DEFAULT:
 451                        binary_match_only = 1;
 452                        break;
 453                case GREP_BINARY_NOMATCH:
 454                        return 0; /* Assume unmatch */
 455                        break;
 456                default:
 457                        break;
 458                }
 459        }
 460
 461        if (opt->pre_context)
 462                prev = xcalloc(opt->pre_context, sizeof(*prev));
 463        if (opt->pre_context || opt->post_context)
 464                hunk_mark = "--\n";
 465
 466        while (left) {
 467                char *eol, ch;
 468                int hit;
 469
 470                eol = end_of_line(bol, &left);
 471                ch = *eol;
 472                *eol = 0;
 473
 474                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 475                        ctx = GREP_CONTEXT_BODY;
 476
 477                hit = match_line(opt, bol, eol, ctx, collect_hits);
 478                *eol = ch;
 479
 480                if (collect_hits)
 481                        goto next_line;
 482
 483                /* "grep -v -e foo -e bla" should list lines
 484                 * that do not have either, so inversion should
 485                 * be done outside.
 486                 */
 487                if (opt->invert)
 488                        hit = !hit;
 489                if (opt->unmatch_name_only) {
 490                        if (hit)
 491                                return 0;
 492                        goto next_line;
 493                }
 494                if (hit) {
 495                        count++;
 496                        if (opt->status_only)
 497                                return 1;
 498                        if (binary_match_only) {
 499                                printf("Binary file %s matches\n", name);
 500                                return 1;
 501                        }
 502                        if (opt->name_only) {
 503                                show_name(opt, name);
 504                                return 1;
 505                        }
 506                        /* Hit at this line.  If we haven't shown the
 507                         * pre-context lines, we would need to show them.
 508                         * When asked to do "count", this still show
 509                         * the context which is nonsense, but the user
 510                         * deserves to get that ;-).
 511                         */
 512                        if (opt->pre_context) {
 513                                unsigned from;
 514                                if (opt->pre_context < lno)
 515                                        from = lno - opt->pre_context;
 516                                else
 517                                        from = 1;
 518                                if (from <= last_shown)
 519                                        from = last_shown + 1;
 520                                if (last_shown && from != last_shown + 1)
 521                                        fputs(hunk_mark, stdout);
 522                                while (from < lno) {
 523                                        pcl = &prev[lno-from-1];
 524                                        show_line(opt, pcl->bol, pcl->eol,
 525                                                  name, from, '-');
 526                                        from++;
 527                                }
 528                                last_shown = lno-1;
 529                        }
 530                        if (last_shown && lno != last_shown + 1)
 531                                fputs(hunk_mark, stdout);
 532                        if (!opt->count)
 533                                show_line(opt, bol, eol, name, lno, ':');
 534                        last_shown = last_hit = lno;
 535                }
 536                else if (last_hit &&
 537                         lno <= last_hit + opt->post_context) {
 538                        /* If the last hit is within the post context,
 539                         * we need to show this line.
 540                         */
 541                        if (last_shown && lno != last_shown + 1)
 542                                fputs(hunk_mark, stdout);
 543                        show_line(opt, bol, eol, name, lno, '-');
 544                        last_shown = lno;
 545                }
 546                if (opt->pre_context) {
 547                        memmove(prev+1, prev,
 548                                (opt->pre_context-1) * sizeof(*prev));
 549                        prev->bol = bol;
 550                        prev->eol = eol;
 551                }
 552
 553        next_line:
 554                bol = eol + 1;
 555                if (!left)
 556                        break;
 557                left--;
 558                lno++;
 559        }
 560
 561        free(prev);
 562        if (collect_hits)
 563                return 0;
 564
 565        if (opt->status_only)
 566                return 0;
 567        if (opt->unmatch_name_only) {
 568                /* We did not see any hit, so we want to show this */
 569                show_name(opt, name);
 570                return 1;
 571        }
 572
 573        /* NEEDSWORK:
 574         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 575         * which feels mostly useless but sometimes useful.  Maybe
 576         * make it another option?  For now suppress them.
 577         */
 578        if (opt->count && count)
 579                printf("%s%c%u\n", name,
 580                       opt->null_following_name ? '\0' : ':', count);
 581        return !!last_hit;
 582}
 583
 584static void clr_hit_marker(struct grep_expr *x)
 585{
 586        /* All-hit markers are meaningful only at the very top level
 587         * OR node.
 588         */
 589        while (1) {
 590                x->hit = 0;
 591                if (x->node != GREP_NODE_OR)
 592                        return;
 593                x->u.binary.left->hit = 0;
 594                x = x->u.binary.right;
 595        }
 596}
 597
 598static int chk_hit_marker(struct grep_expr *x)
 599{
 600        /* Top level nodes have hit markers.  See if they all are hits */
 601        while (1) {
 602                if (x->node != GREP_NODE_OR)
 603                        return x->hit;
 604                if (!x->u.binary.left->hit)
 605                        return 0;
 606                x = x->u.binary.right;
 607        }
 608}
 609
 610int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 611{
 612        /*
 613         * we do not have to do the two-pass grep when we do not check
 614         * buffer-wide "all-match".
 615         */
 616        if (!opt->all_match)
 617                return grep_buffer_1(opt, name, buf, size, 0);
 618
 619        /* Otherwise the toplevel "or" terms hit a bit differently.
 620         * We first clear hit markers from them.
 621         */
 622        clr_hit_marker(opt->pattern_expression);
 623        grep_buffer_1(opt, name, buf, size, 1);
 624
 625        if (!chk_hit_marker(opt->pattern_expression))
 626                return 0;
 627
 628        return grep_buffer_1(opt, name, buf, size, 0);
 629}