grep.con commit Merge branch 'jn/gitweb-customlinks' (168d5bd)
   1#include "cache.h"
   2#include "grep.h"
   3#include "xdiff-interface.h"
   4
   5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   6{
   7        struct grep_pat *p = xcalloc(1, sizeof(*p));
   8        p->pattern = pat;
   9        p->origin = "header";
  10        p->no = 0;
  11        p->token = GREP_PATTERN_HEAD;
  12        p->field = field;
  13        *opt->pattern_tail = p;
  14        opt->pattern_tail = &p->next;
  15        p->next = NULL;
  16}
  17
  18void append_grep_pattern(struct grep_opt *opt, const char *pat,
  19                         const char *origin, int no, enum grep_pat_token t)
  20{
  21        struct grep_pat *p = xcalloc(1, sizeof(*p));
  22        p->pattern = pat;
  23        p->origin = origin;
  24        p->no = no;
  25        p->token = t;
  26        *opt->pattern_tail = p;
  27        opt->pattern_tail = &p->next;
  28        p->next = NULL;
  29}
  30
  31static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  32{
  33        int err = regcomp(&p->regexp, p->pattern, opt->regflags);
  34        if (err) {
  35                char errbuf[1024];
  36                char where[1024];
  37                if (p->no)
  38                        sprintf(where, "In '%s' at %d, ",
  39                                p->origin, p->no);
  40                else if (p->origin)
  41                        sprintf(where, "%s, ", p->origin);
  42                else
  43                        where[0] = 0;
  44                regerror(err, &p->regexp, errbuf, 1024);
  45                regfree(&p->regexp);
  46                die("%s'%s': %s", where, p->pattern, errbuf);
  47        }
  48}
  49
  50static struct grep_expr *compile_pattern_or(struct grep_pat **);
  51static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  52{
  53        struct grep_pat *p;
  54        struct grep_expr *x;
  55
  56        p = *list;
  57        switch (p->token) {
  58        case GREP_PATTERN: /* atom */
  59        case GREP_PATTERN_HEAD:
  60        case GREP_PATTERN_BODY:
  61                x = xcalloc(1, sizeof (struct grep_expr));
  62                x->node = GREP_NODE_ATOM;
  63                x->u.atom = p;
  64                *list = p->next;
  65                return x;
  66        case GREP_OPEN_PAREN:
  67                *list = p->next;
  68                x = compile_pattern_or(list);
  69                if (!x)
  70                        return NULL;
  71                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
  72                        die("unmatched parenthesis");
  73                *list = (*list)->next;
  74                return x;
  75        default:
  76                return NULL;
  77        }
  78}
  79
  80static struct grep_expr *compile_pattern_not(struct grep_pat **list)
  81{
  82        struct grep_pat *p;
  83        struct grep_expr *x;
  84
  85        p = *list;
  86        switch (p->token) {
  87        case GREP_NOT:
  88                if (!p->next)
  89                        die("--not not followed by pattern expression");
  90                *list = p->next;
  91                x = xcalloc(1, sizeof (struct grep_expr));
  92                x->node = GREP_NODE_NOT;
  93                x->u.unary = compile_pattern_not(list);
  94                if (!x->u.unary)
  95                        die("--not followed by non pattern expression");
  96                return x;
  97        default:
  98                return compile_pattern_atom(list);
  99        }
 100}
 101
 102static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 103{
 104        struct grep_pat *p;
 105        struct grep_expr *x, *y, *z;
 106
 107        x = compile_pattern_not(list);
 108        p = *list;
 109        if (p && p->token == GREP_AND) {
 110                if (!p->next)
 111                        die("--and not followed by pattern expression");
 112                *list = p->next;
 113                y = compile_pattern_and(list);
 114                if (!y)
 115                        die("--and not followed by pattern expression");
 116                z = xcalloc(1, sizeof (struct grep_expr));
 117                z->node = GREP_NODE_AND;
 118                z->u.binary.left = x;
 119                z->u.binary.right = y;
 120                return z;
 121        }
 122        return x;
 123}
 124
 125static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 126{
 127        struct grep_pat *p;
 128        struct grep_expr *x, *y, *z;
 129
 130        x = compile_pattern_and(list);
 131        p = *list;
 132        if (x && p && p->token != GREP_CLOSE_PAREN) {
 133                y = compile_pattern_or(list);
 134                if (!y)
 135                        die("not a pattern expression %s", p->pattern);
 136                z = xcalloc(1, sizeof (struct grep_expr));
 137                z->node = GREP_NODE_OR;
 138                z->u.binary.left = x;
 139                z->u.binary.right = y;
 140                return z;
 141        }
 142        return x;
 143}
 144
 145static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 146{
 147        return compile_pattern_or(list);
 148}
 149
 150void compile_grep_patterns(struct grep_opt *opt)
 151{
 152        struct grep_pat *p;
 153
 154        if (opt->all_match)
 155                opt->extended = 1;
 156
 157        for (p = opt->pattern_list; p; p = p->next) {
 158                switch (p->token) {
 159                case GREP_PATTERN: /* atom */
 160                case GREP_PATTERN_HEAD:
 161                case GREP_PATTERN_BODY:
 162                        if (!opt->fixed)
 163                                compile_regexp(p, opt);
 164                        break;
 165                default:
 166                        opt->extended = 1;
 167                        break;
 168                }
 169        }
 170
 171        if (!opt->extended)
 172                return;
 173
 174        /* Then bundle them up in an expression.
 175         * A classic recursive descent parser would do.
 176         */
 177        p = opt->pattern_list;
 178        opt->pattern_expression = compile_pattern_expr(&p);
 179        if (p)
 180                die("incomplete pattern expression: %s", p->pattern);
 181}
 182
 183static void free_pattern_expr(struct grep_expr *x)
 184{
 185        switch (x->node) {
 186        case GREP_NODE_ATOM:
 187                break;
 188        case GREP_NODE_NOT:
 189                free_pattern_expr(x->u.unary);
 190                break;
 191        case GREP_NODE_AND:
 192        case GREP_NODE_OR:
 193                free_pattern_expr(x->u.binary.left);
 194                free_pattern_expr(x->u.binary.right);
 195                break;
 196        }
 197        free(x);
 198}
 199
 200void free_grep_patterns(struct grep_opt *opt)
 201{
 202        struct grep_pat *p, *n;
 203
 204        for (p = opt->pattern_list; p; p = n) {
 205                n = p->next;
 206                switch (p->token) {
 207                case GREP_PATTERN: /* atom */
 208                case GREP_PATTERN_HEAD:
 209                case GREP_PATTERN_BODY:
 210                        regfree(&p->regexp);
 211                        break;
 212                default:
 213                        break;
 214                }
 215                free(p);
 216        }
 217
 218        if (!opt->extended)
 219                return;
 220        free_pattern_expr(opt->pattern_expression);
 221}
 222
 223static char *end_of_line(char *cp, unsigned long *left)
 224{
 225        unsigned long l = *left;
 226        while (l && *cp != '\n') {
 227                l--;
 228                cp++;
 229        }
 230        *left = l;
 231        return cp;
 232}
 233
 234static int word_char(char ch)
 235{
 236        return isalnum(ch) || ch == '_';
 237}
 238
 239static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 240                      const char *name, unsigned lno, char sign)
 241{
 242        if (opt->null_following_name)
 243                sign = '\0';
 244        if (opt->pathname)
 245                printf("%s%c", name, sign);
 246        if (opt->linenum)
 247                printf("%d%c", lno, sign);
 248        printf("%.*s\n", (int)(eol-bol), bol);
 249}
 250
 251static void show_name(struct grep_opt *opt, const char *name)
 252{
 253        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
 254}
 255
 256static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 257{
 258        char *hit = strstr(line, pattern);
 259        if (!hit) {
 260                match->rm_so = match->rm_eo = -1;
 261                return REG_NOMATCH;
 262        }
 263        else {
 264                match->rm_so = hit - line;
 265                match->rm_eo = match->rm_so + strlen(pattern);
 266                return 0;
 267        }
 268}
 269
 270static int strip_timestamp(char *bol, char **eol_p)
 271{
 272        char *eol = *eol_p;
 273        int ch;
 274
 275        while (bol < --eol) {
 276                if (*eol != '>')
 277                        continue;
 278                *eol_p = ++eol;
 279                ch = *eol;
 280                *eol = '\0';
 281                return ch;
 282        }
 283        return 0;
 284}
 285
 286static struct {
 287        const char *field;
 288        size_t len;
 289} header_field[] = {
 290        { "author ", 7 },
 291        { "committer ", 10 },
 292};
 293
 294static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 295{
 296        int hit = 0;
 297        int at_true_bol = 1;
 298        int saved_ch = 0;
 299        regmatch_t pmatch[10];
 300
 301        if ((p->token != GREP_PATTERN) &&
 302            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 303                return 0;
 304
 305        if (p->token == GREP_PATTERN_HEAD) {
 306                const char *field;
 307                size_t len;
 308                assert(p->field < ARRAY_SIZE(header_field));
 309                field = header_field[p->field].field;
 310                len = header_field[p->field].len;
 311                if (strncmp(bol, field, len))
 312                        return 0;
 313                bol += len;
 314                saved_ch = strip_timestamp(bol, &eol);
 315        }
 316
 317 again:
 318        if (!opt->fixed) {
 319                regex_t *exp = &p->regexp;
 320                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 321                               pmatch, 0);
 322        }
 323        else {
 324                hit = !fixmatch(p->pattern, bol, pmatch);
 325        }
 326
 327        if (hit && opt->word_regexp) {
 328                if ((pmatch[0].rm_so < 0) ||
 329                    (eol - bol) <= pmatch[0].rm_so ||
 330                    (pmatch[0].rm_eo < 0) ||
 331                    (eol - bol) < pmatch[0].rm_eo)
 332                        die("regexp returned nonsense");
 333
 334                /* Match beginning must be either beginning of the
 335                 * line, or at word boundary (i.e. the last char must
 336                 * not be a word char).  Similarly, match end must be
 337                 * either end of the line, or at word boundary
 338                 * (i.e. the next char must not be a word char).
 339                 */
 340                if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
 341                      !word_char(bol[pmatch[0].rm_so-1])) &&
 342                     ((pmatch[0].rm_eo == (eol-bol)) ||
 343                      !word_char(bol[pmatch[0].rm_eo])) )
 344                        ;
 345                else
 346                        hit = 0;
 347
 348                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 349                        /* There could be more than one match on the
 350                         * line, and the first match might not be
 351                         * strict word match.  But later ones could be!
 352                         */
 353                        bol = pmatch[0].rm_so + bol + 1;
 354                        at_true_bol = 0;
 355                        goto again;
 356                }
 357        }
 358        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 359                *eol = saved_ch;
 360        return hit;
 361}
 362
 363static int match_expr_eval(struct grep_opt *o,
 364                           struct grep_expr *x,
 365                           char *bol, char *eol,
 366                           enum grep_context ctx,
 367                           int collect_hits)
 368{
 369        int h = 0;
 370
 371        switch (x->node) {
 372        case GREP_NODE_ATOM:
 373                h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
 374                break;
 375        case GREP_NODE_NOT:
 376                h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
 377                break;
 378        case GREP_NODE_AND:
 379                if (!collect_hits)
 380                        return (match_expr_eval(o, x->u.binary.left,
 381                                                bol, eol, ctx, 0) &&
 382                                match_expr_eval(o, x->u.binary.right,
 383                                                bol, eol, ctx, 0));
 384                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 385                h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
 386                break;
 387        case GREP_NODE_OR:
 388                if (!collect_hits)
 389                        return (match_expr_eval(o, x->u.binary.left,
 390                                                bol, eol, ctx, 0) ||
 391                                match_expr_eval(o, x->u.binary.right,
 392                                                bol, eol, ctx, 0));
 393                h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
 394                x->u.binary.left->hit |= h;
 395                h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
 396                break;
 397        default:
 398                die("Unexpected node type (internal error) %d\n", x->node);
 399        }
 400        if (collect_hits)
 401                x->hit |= h;
 402        return h;
 403}
 404
 405static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 406                      enum grep_context ctx, int collect_hits)
 407{
 408        struct grep_expr *x = opt->pattern_expression;
 409        return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
 410}
 411
 412static int match_line(struct grep_opt *opt, char *bol, char *eol,
 413                      enum grep_context ctx, int collect_hits)
 414{
 415        struct grep_pat *p;
 416        if (opt->extended)
 417                return match_expr(opt, bol, eol, ctx, collect_hits);
 418
 419        /* we do not call with collect_hits without being extended */
 420        for (p = opt->pattern_list; p; p = p->next) {
 421                if (match_one_pattern(opt, p, bol, eol, ctx))
 422                        return 1;
 423        }
 424        return 0;
 425}
 426
 427static int grep_buffer_1(struct grep_opt *opt, const char *name,
 428                         char *buf, unsigned long size, int collect_hits)
 429{
 430        char *bol = buf;
 431        unsigned long left = size;
 432        unsigned lno = 1;
 433        struct pre_context_line {
 434                char *bol;
 435                char *eol;
 436        } *prev = NULL, *pcl;
 437        unsigned last_hit = 0;
 438        unsigned last_shown = 0;
 439        int binary_match_only = 0;
 440        const char *hunk_mark = "";
 441        unsigned count = 0;
 442        enum grep_context ctx = GREP_CONTEXT_HEAD;
 443
 444        if (buffer_is_binary(buf, size)) {
 445                switch (opt->binary) {
 446                case GREP_BINARY_DEFAULT:
 447                        binary_match_only = 1;
 448                        break;
 449                case GREP_BINARY_NOMATCH:
 450                        return 0; /* Assume unmatch */
 451                        break;
 452                default:
 453                        break;
 454                }
 455        }
 456
 457        if (opt->pre_context)
 458                prev = xcalloc(opt->pre_context, sizeof(*prev));
 459        if (opt->pre_context || opt->post_context)
 460                hunk_mark = "--\n";
 461
 462        while (left) {
 463                char *eol, ch;
 464                int hit;
 465
 466                eol = end_of_line(bol, &left);
 467                ch = *eol;
 468                *eol = 0;
 469
 470                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 471                        ctx = GREP_CONTEXT_BODY;
 472
 473                hit = match_line(opt, bol, eol, ctx, collect_hits);
 474                *eol = ch;
 475
 476                if (collect_hits)
 477                        goto next_line;
 478
 479                /* "grep -v -e foo -e bla" should list lines
 480                 * that do not have either, so inversion should
 481                 * be done outside.
 482                 */
 483                if (opt->invert)
 484                        hit = !hit;
 485                if (opt->unmatch_name_only) {
 486                        if (hit)
 487                                return 0;
 488                        goto next_line;
 489                }
 490                if (hit) {
 491                        count++;
 492                        if (opt->status_only)
 493                                return 1;
 494                        if (binary_match_only) {
 495                                printf("Binary file %s matches\n", name);
 496                                return 1;
 497                        }
 498                        if (opt->name_only) {
 499                                show_name(opt, name);
 500                                return 1;
 501                        }
 502                        /* Hit at this line.  If we haven't shown the
 503                         * pre-context lines, we would need to show them.
 504                         * When asked to do "count", this still show
 505                         * the context which is nonsense, but the user
 506                         * deserves to get that ;-).
 507                         */
 508                        if (opt->pre_context) {
 509                                unsigned from;
 510                                if (opt->pre_context < lno)
 511                                        from = lno - opt->pre_context;
 512                                else
 513                                        from = 1;
 514                                if (from <= last_shown)
 515                                        from = last_shown + 1;
 516                                if (last_shown && from != last_shown + 1)
 517                                        fputs(hunk_mark, stdout);
 518                                while (from < lno) {
 519                                        pcl = &prev[lno-from-1];
 520                                        show_line(opt, pcl->bol, pcl->eol,
 521                                                  name, from, '-');
 522                                        from++;
 523                                }
 524                                last_shown = lno-1;
 525                        }
 526                        if (last_shown && lno != last_shown + 1)
 527                                fputs(hunk_mark, stdout);
 528                        if (!opt->count)
 529                                show_line(opt, bol, eol, name, lno, ':');
 530                        last_shown = last_hit = lno;
 531                }
 532                else if (last_hit &&
 533                         lno <= last_hit + opt->post_context) {
 534                        /* If the last hit is within the post context,
 535                         * we need to show this line.
 536                         */
 537                        if (last_shown && lno != last_shown + 1)
 538                                fputs(hunk_mark, stdout);
 539                        show_line(opt, bol, eol, name, lno, '-');
 540                        last_shown = lno;
 541                }
 542                if (opt->pre_context) {
 543                        memmove(prev+1, prev,
 544                                (opt->pre_context-1) * sizeof(*prev));
 545                        prev->bol = bol;
 546                        prev->eol = eol;
 547                }
 548
 549        next_line:
 550                bol = eol + 1;
 551                if (!left)
 552                        break;
 553                left--;
 554                lno++;
 555        }
 556
 557        free(prev);
 558        if (collect_hits)
 559                return 0;
 560
 561        if (opt->status_only)
 562                return 0;
 563        if (opt->unmatch_name_only) {
 564                /* We did not see any hit, so we want to show this */
 565                show_name(opt, name);
 566                return 1;
 567        }
 568
 569        /* NEEDSWORK:
 570         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 571         * which feels mostly useless but sometimes useful.  Maybe
 572         * make it another option?  For now suppress them.
 573         */
 574        if (opt->count && count)
 575                printf("%s%c%u\n", name,
 576                       opt->null_following_name ? '\0' : ':', count);
 577        return !!last_hit;
 578}
 579
 580static void clr_hit_marker(struct grep_expr *x)
 581{
 582        /* All-hit markers are meaningful only at the very top level
 583         * OR node.
 584         */
 585        while (1) {
 586                x->hit = 0;
 587                if (x->node != GREP_NODE_OR)
 588                        return;
 589                x->u.binary.left->hit = 0;
 590                x = x->u.binary.right;
 591        }
 592}
 593
 594static int chk_hit_marker(struct grep_expr *x)
 595{
 596        /* Top level nodes have hit markers.  See if they all are hits */
 597        while (1) {
 598                if (x->node != GREP_NODE_OR)
 599                        return x->hit;
 600                if (!x->u.binary.left->hit)
 601                        return 0;
 602                x = x->u.binary.right;
 603        }
 604}
 605
 606int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 607{
 608        /*
 609         * we do not have to do the two-pass grep when we do not check
 610         * buffer-wide "all-match".
 611         */
 612        if (!opt->all_match)
 613                return grep_buffer_1(opt, name, buf, size, 0);
 614
 615        /* Otherwise the toplevel "or" terms hit a bit differently.
 616         * We first clear hit markers from them.
 617         */
 618        clr_hit_marker(opt->pattern_expression);
 619        grep_buffer_1(opt, name, buf, size, 1);
 620
 621        if (!chk_hit_marker(opt->pattern_expression))
 622                return 0;
 623
 624        return grep_buffer_1(opt, name, buf, size, 0);
 625}