grep.con commit Merge branch 'tr/maint-no-index-fixes' (33256e6)
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static int isregexspecial(int c)
  32{
  33        return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
  34                               c == '.' || c == '^' || c == '{' || c == '|';
  35}
  36
  37static int is_fixed(const char *s)
  38{
  39        while (!isregexspecial(*s))
  40                s++;
  41        return !*s;
  42}
  43
  44static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  45{
  46        int err;
  47
  48        if (opt->fixed || is_fixed(p->pattern))
  49                p->fixed = 1;
  50        if (opt->regflags & REG_ICASE)
  51                p->fixed = 0;
  52        if (p->fixed)
  53                return;
  54
  55        err = regcomp(&p->regexp, p->pattern, opt->regflags);
  56        if (err) {
  57                char errbuf[1024];
  58                char where[1024];
  59                if (p->no)
  60                        sprintf(where, "In '%s' at %d, ",
  61                                p->origin, p->no);
  62                else if (p->origin)
  63                        sprintf(where, "%s, ", p->origin);
  64                else
  65                        where[0] = 0;
  66                regerror(err, &p->regexp, errbuf, 1024);
  67                regfree(&p->regexp);
  68                die("%s'%s': %s", where, p->pattern, errbuf);
  69        }
  70}
  71
  72static struct grep_expr *compile_pattern_or(struct grep_pat **);
  73static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  74{
  75        struct grep_pat *p;
  76        struct grep_expr *x;
  77
  78        p = *list;
  79        switch (p->token) {
  80        case GREP_PATTERN: /* atom */
  81        case GREP_PATTERN_HEAD:
  82        case GREP_PATTERN_BODY:
  83                x = xcalloc(1, sizeof (struct grep_expr));
  84                x->node = GREP_NODE_ATOM;
  85                x->u.atom = p;
  86                *list = p->next;
  87                return x;
  88        case GREP_OPEN_PAREN:
  89                *list = p->next;
  90                x = compile_pattern_or(list);
  91                if (!x)
  92                        return NULL;
  93                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  94                        die("unmatched parenthesis");
  95                *list = (*list)->next;
  96                return x;
  97        default:
  98                return NULL;
  99        }
 100}
 101
 102static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 103{
 104        struct grep_pat *p;
 105        struct grep_expr *x;
 106
 107        p = *list;
 108        switch (p->token) {
 109        case GREP_NOT:
 110                if (!p->next)
 111                        die("--not not followed by pattern expression");
 112                *list = p->next;
 113                x = xcalloc(1, sizeof (struct grep_expr));
 114                x->node = GREP_NODE_NOT;
 115                x->u.unary = compile_pattern_not(list);
 116                if (!x->u.unary)
 117                        die("--not followed by non pattern expression");
 118                return x;
 119        default:
 120                return compile_pattern_atom(list);
 121        }
 122}
 123
 124static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 125{
 126        struct grep_pat *p;
 127        struct grep_expr *x, *y, *z;
 128
 129        x = compile_pattern_not(list);
 130        p = *list;
 131        if (p && p->token == GREP_AND) {
 132                if (!p->next)
 133                        die("--and not followed by pattern expression");
 134                *list = p->next;
 135                y = compile_pattern_and(list);
 136                if (!y)
 137                        die("--and not followed by pattern expression");
 138                z = xcalloc(1, sizeof (struct grep_expr));
 139                z->node = GREP_NODE_AND;
 140                z->u.binary.left = x;
 141                z->u.binary.right = y;
 142                return z;
 143        }
 144        return x;
 145}
 146
 147static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 148{
 149        struct grep_pat *p;
 150        struct grep_expr *x, *y, *z;
 151
 152        x = compile_pattern_and(list);
 153        p = *list;
 154        if (x && p && p->token != GREP_CLOSE_PAREN) {
 155                y = compile_pattern_or(list);
 156                if (!y)
 157                        die("not a pattern expression %s", p->pattern);
 158                z = xcalloc(1, sizeof (struct grep_expr));
 159                z->node = GREP_NODE_OR;
 160                z->u.binary.left = x;
 161                z->u.binary.right = y;
 162                return z;
 163        }
 164        return x;
 165}
 166
 167static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 168{
 169        return compile_pattern_or(list);
 170}
 171
 172void compile_grep_patterns(struct grep_opt *opt)
 173{
 174        struct grep_pat *p;
 175
 176        if (opt->all_match)
 177                opt->extended = 1;
 178
 179        for (p = opt->pattern_list; p; p = p->next) {
 180                switch (p->token) {
 181                case GREP_PATTERN: /* atom */
 182                case GREP_PATTERN_HEAD:
 183                case GREP_PATTERN_BODY:
 184                        compile_regexp(p, opt);
 185                        break;
 186                default:
 187                        opt->extended = 1;
 188                        break;
 189                }
 190        }
 191
 192        if (!opt->extended)
 193                return;
 194
 195        /* Then bundle them up in an expression.
 196         * A classic recursive descent parser would do.
 197         */
 198        p = opt->pattern_list;
 199        opt->pattern_expression = compile_pattern_expr(&p);
 200        if (p)
 201                die("incomplete pattern expression: %s", p->pattern);
 202}
 203
 204static void free_pattern_expr(struct grep_expr *x)
 205{
 206        switch (x->node) {
 207        case GREP_NODE_ATOM:
 208                break;
 209        case GREP_NODE_NOT:
 210                free_pattern_expr(x->u.unary);
 211                break;
 212        case GREP_NODE_AND:
 213        case GREP_NODE_OR:
 214                free_pattern_expr(x->u.binary.left);
 215                free_pattern_expr(x->u.binary.right);
 216                break;
 217        }
 218        free(x);
 219}
 220
 221void free_grep_patterns(struct grep_opt *opt)
 222{
 223        struct grep_pat *p, *n;
 224
 225        for (p = opt->pattern_list; p; p = n) {
 226                n = p->next;
 227                switch (p->token) {
 228                case GREP_PATTERN: /* atom */
 229                case GREP_PATTERN_HEAD:
 230                case GREP_PATTERN_BODY:
 231                        regfree(&p->regexp);
 232                        break;
 233                default:
 234                        break;
 235                }
 236                free(p);
 237        }
 238
 239        if (!opt->extended)
 240                return;
 241        free_pattern_expr(opt->pattern_expression);
 242}
 243
 244static char *end_of_line(char *cp, unsigned long *left)
 245{
 246        unsigned long l = *left;
 247        while (l && *cp != '\n') {
 248                l--;
 249                cp++;
 250        }
 251        *left = l;
 252        return cp;
 253}
 254
 255static int word_char(char ch)
 256{
 257        return isalnum(ch) || ch == '_';
 258}
 259
 260static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 261                      const char *name, unsigned lno, char sign)
 262{
 263        if (opt->null_following_name)
 264                sign = '\0';
 265        if (opt->pathname)
 266                printf("%s%c", name, sign);
 267        if (opt->linenum)
 268                printf("%d%c", lno, sign);
 269        printf("%.*s\n", (int)(eol-bol), bol);
 270}
 271
 272static void show_name(struct grep_opt *opt, const char *name)
 273{
 274        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
 275}
 276
 277static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 278{
 279        char *hit = strstr(line, pattern);
 280        if (!hit) {
 281                match->rm_so = match->rm_eo = -1;
 282                return REG_NOMATCH;
 283        }
 284        else {
 285                match->rm_so = hit - line;
 286                match->rm_eo = match->rm_so + strlen(pattern);
 287                return 0;
 288        }
 289}
 290
 291static int strip_timestamp(char *bol, char **eol_p)
 292{
 293        char *eol = *eol_p;
 294        int ch;
 295
 296        while (bol < --eol) {
 297                if (*eol != '>')
 298                        continue;
 299                *eol_p = ++eol;
 300                ch = *eol;
 301                *eol = '\0';
 302                return ch;
 303        }
 304        return 0;
 305}
 306
 307static struct {
 308        const char *field;
 309        size_t len;
 310} header_field[] = {
 311        { "author ", 7 },
 312        { "committer ", 10 },
 313};
 314
 315static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 316{
 317        int hit = 0;
 318        int saved_ch = 0;
 319        regmatch_t pmatch[10];
 320
 321        if ((p->token != GREP_PATTERN) &&
 322            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 323                return 0;
 324
 325        if (p->token == GREP_PATTERN_HEAD) {
 326                const char *field;
 327                size_t len;
 328                assert(p->field < ARRAY_SIZE(header_field));
 329                field = header_field[p->field].field;
 330                len = header_field[p->field].len;
 331                if (strncmp(bol, field, len))
 332                        return 0;
 333                bol += len;
 334                saved_ch = strip_timestamp(bol, &eol);
 335        }
 336
 337 again:
 338        if (!p->fixed) {
 339                regex_t *exp = &p->regexp;
 340                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 341                               pmatch, 0);
 342        }
 343        else {
 344                hit = !fixmatch(p->pattern, bol, pmatch);
 345        }
 346
 347        if (hit && opt->word_regexp) {
 348                if ((pmatch[0].rm_so < 0) ||
 349                    (eol - bol) <= pmatch[0].rm_so ||
 350                    (pmatch[0].rm_eo < 0) ||
 351                    (eol - bol) < pmatch[0].rm_eo)
 352                        die("regexp returned nonsense");
 353
 354                /* Match beginning must be either beginning of the
 355                 * line, or at word boundary (i.e. the last char must
 356                 * not be a word char).  Similarly, match end must be
 357                 * either end of the line, or at word boundary
 358                 * (i.e. the next char must not be a word char).
 359                 */
 360                if ( ((pmatch[0].rm_so == 0) ||
 361                      !word_char(bol[pmatch[0].rm_so-1])) &&
 362                     ((pmatch[0].rm_eo == (eol-bol)) ||
 363                      !word_char(bol[pmatch[0].rm_eo])) )
 364                        ;
 365                else
 366                        hit = 0;
 367
 368                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 369                        /* There could be more than one match on the
 370                         * line, and the first match might not be
 371                         * strict word match.  But later ones could be!
 372                         * Forward to the next possible start, i.e. the
 373                         * next position following a non-word char.
 374                         */
 375                        bol = pmatch[0].rm_so + bol + 1;
 376                        while (word_char(bol[-1]) && bol < eol)
 377                                bol++;
 378                        if (bol < eol)
 379                                goto again;
 380                }
 381        }
 382        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 383                *eol = saved_ch;
 384        return hit;
 385}
 386
 387static int match_expr_eval(struct grep_opt *o,
 388                           struct grep_expr *x,
 389                           char *bol, char *eol,
 390                           enum grep_context ctx,
 391                           int collect_hits)
 392{
 393        int h = 0;
 394
 395        switch (x->node) {
 396        case GREP_NODE_ATOM:
 397                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 398                break;
 399        case GREP_NODE_NOT:
 400                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 401                break;
 402        case GREP_NODE_AND:
 403                if (!collect_hits)
 404                        return (match_expr_eval(o, x->u.binary.left,
 405                                                bol, eol, ctx, 0) &&
 406                                match_expr_eval(o, x->u.binary.right,
 407                                                bol, eol, ctx, 0));
 408                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 409                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 410                break;
 411        case GREP_NODE_OR:
 412                if (!collect_hits)
 413                        return (match_expr_eval(o, x->u.binary.left,
 414                                                bol, eol, ctx, 0) ||
 415                                match_expr_eval(o, x->u.binary.right,
 416                                                bol, eol, ctx, 0));
 417                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 418                x->u.binary.left->hit |= h;
 419                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 420                break;
 421        default:
 422                die("Unexpected node type (internal error) %d", x->node);
 423        }
 424        if (collect_hits)
 425                x->hit |= h;
 426        return h;
 427}
 428
 429static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 430                      enum grep_context ctx, int collect_hits)
 431{
 432        struct grep_expr *x = opt->pattern_expression;
 433        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 434}
 435
 436static int match_line(struct grep_opt *opt, char *bol, char *eol,
 437                      enum grep_context ctx, int collect_hits)
 438{
 439        struct grep_pat *p;
 440        if (opt->extended)
 441                return match_expr(opt, bol, eol, ctx, collect_hits);
 442
 443        /* we do not call with collect_hits without being extended */
 444        for (p = opt->pattern_list; p; p = p->next) {
 445                if (match_one_pattern(opt, p, bol, eol, ctx))
 446                        return 1;
 447        }
 448        return 0;
 449}
 450
 451static int grep_buffer_1(struct grep_opt *opt, const char *name,
 452                         char *buf, unsigned long size, int collect_hits)
 453{
 454        char *bol = buf;
 455        unsigned long left = size;
 456        unsigned lno = 1;
 457        struct pre_context_line {
 458                char *bol;
 459                char *eol;
 460        } *prev = NULL, *pcl;
 461        unsigned last_hit = 0;
 462        unsigned last_shown = 0;
 463        int binary_match_only = 0;
 464        const char *hunk_mark = "";
 465        unsigned count = 0;
 466        enum grep_context ctx = GREP_CONTEXT_HEAD;
 467
 468        if (buffer_is_binary(buf, size)) {
 469                switch (opt->binary) {
 470                case GREP_BINARY_DEFAULT:
 471                        binary_match_only = 1;
 472                        break;
 473                case GREP_BINARY_NOMATCH:
 474                        return 0; /* Assume unmatch */
 475                        break;
 476                default:
 477                        break;
 478                }
 479        }
 480
 481        if (opt->pre_context)
 482                prev = xcalloc(opt->pre_context, sizeof(*prev));
 483        if (opt->pre_context || opt->post_context)
 484                hunk_mark = "--\n";
 485
 486        while (left) {
 487                char *eol, ch;
 488                int hit;
 489
 490                eol = end_of_line(bol, &left);
 491                ch = *eol;
 492                *eol = 0;
 493
 494                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 495                        ctx = GREP_CONTEXT_BODY;
 496
 497                hit = match_line(opt, bol, eol, ctx, collect_hits);
 498                *eol = ch;
 499
 500                if (collect_hits)
 501                        goto next_line;
 502
 503                /* "grep -v -e foo -e bla" should list lines
 504                 * that do not have either, so inversion should
 505                 * be done outside.
 506                 */
 507                if (opt->invert)
 508                        hit = !hit;
 509                if (opt->unmatch_name_only) {
 510                        if (hit)
 511                                return 0;
 512                        goto next_line;
 513                }
 514                if (hit) {
 515                        count++;
 516                        if (opt->status_only)
 517                                return 1;
 518                        if (binary_match_only) {
 519                                printf("Binary file %s matches\n", name);
 520                                return 1;
 521                        }
 522                        if (opt->name_only) {
 523                                show_name(opt, name);
 524                                return 1;
 525                        }
 526                        /* Hit at this line.  If we haven't shown the
 527                         * pre-context lines, we would need to show them.
 528                         * When asked to do "count", this still show
 529                         * the context which is nonsense, but the user
 530                         * deserves to get that ;-).
 531                         */
 532                        if (opt->pre_context) {
 533                                unsigned from;
 534                                if (opt->pre_context < lno)
 535                                        from = lno - opt->pre_context;
 536                                else
 537                                        from = 1;
 538                                if (from <= last_shown)
 539                                        from = last_shown + 1;
 540                                if (last_shown && from != last_shown + 1)
 541                                        fputs(hunk_mark, stdout);
 542                                while (from < lno) {
 543                                        pcl = &prev[lno-from-1];
 544                                        show_line(opt, pcl->bol, pcl->eol,
 545                                                  name, from, '-');
 546                                        from++;
 547                                }
 548                                last_shown = lno-1;
 549                        }
 550                        if (last_shown && lno != last_shown + 1)
 551                                fputs(hunk_mark, stdout);
 552                        if (!opt->count)
 553                                show_line(opt, bol, eol, name, lno, ':');
 554                        last_shown = last_hit = lno;
 555                }
 556                else if (last_hit &&
 557                         lno <= last_hit + opt->post_context) {
 558                        /* If the last hit is within the post context,
 559                         * we need to show this line.
 560                         */
 561                        if (last_shown && lno != last_shown + 1)
 562                                fputs(hunk_mark, stdout);
 563                        show_line(opt, bol, eol, name, lno, '-');
 564                        last_shown = lno;
 565                }
 566                if (opt->pre_context) {
 567                        memmove(prev+1, prev,
 568                                (opt->pre_context-1) * sizeof(*prev));
 569                        prev->bol = bol;
 570                        prev->eol = eol;
 571                }
 572
 573        next_line:
 574                bol = eol + 1;
 575                if (!left)
 576                        break;
 577                left--;
 578                lno++;
 579        }
 580
 581        free(prev);
 582        if (collect_hits)
 583                return 0;
 584
 585        if (opt->status_only)
 586                return 0;
 587        if (opt->unmatch_name_only) {
 588                /* We did not see any hit, so we want to show this */
 589                show_name(opt, name);
 590                return 1;
 591        }
 592
 593        /* NEEDSWORK:
 594         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 595         * which feels mostly useless but sometimes useful.  Maybe
 596         * make it another option?  For now suppress them.
 597         */
 598        if (opt->count && count)
 599                printf("%s%c%u\n", name,
 600                       opt->null_following_name ? '\0' : ':', count);
 601        return !!last_hit;
 602}
 603
 604static void clr_hit_marker(struct grep_expr *x)
 605{
 606        /* All-hit markers are meaningful only at the very top level
 607         * OR node.
 608         */
 609        while (1) {
 610                x->hit = 0;
 611                if (x->node != GREP_NODE_OR)
 612                        return;
 613                x->u.binary.left->hit = 0;
 614                x = x->u.binary.right;
 615        }
 616}
 617
 618static int chk_hit_marker(struct grep_expr *x)
 619{
 620        /* Top level nodes have hit markers.  See if they all are hits */
 621        while (1) {
 622                if (x->node != GREP_NODE_OR)
 623                        return x->hit;
 624                if (!x->u.binary.left->hit)
 625                        return 0;
 626                x = x->u.binary.right;
 627        }
 628}
 629
 630int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 631{
 632        /*
 633         * we do not have to do the two-pass grep when we do not check
 634         * buffer-wide "all-match".
 635         */
 636        if (!opt->all_match)
 637                return grep_buffer_1(opt, name, buf, size, 0);
 638
 639        /* Otherwise the toplevel "or" terms hit a bit differently.
 640         * We first clear hit markers from them.
 641         */
 642        clr_hit_marker(opt->pattern_expression);
 643        grep_buffer_1(opt, name, buf, size, 1);
 644
 645        if (!chk_hit_marker(opt->pattern_expression))
 646                return 0;
 647
 648        return grep_buffer_1(opt, name, buf, size, 0);
 649}