grep.con commit Documentation: do not convert ... operator to ellipses (b9190e7)
   1#include "cache.h"
   2#include "grep.h"
   3#include "userdiff.h"
   4#include "xdiff-interface.h"
   5
   6void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
   7{
   8        struct grep_pat *p = xcalloc(1, sizeof(*p));
   9        p->pattern = pat;
  10        p->patternlen = strlen(pat);
  11        p->origin = "header";
  12        p->no = 0;
  13        p->token = GREP_PATTERN_HEAD;
  14        p->field = field;
  15        *opt->header_tail = p;
  16        opt->header_tail = &p->next;
  17        p->next = NULL;
  18}
  19
  20void append_grep_pattern(struct grep_opt *opt, const char *pat,
  21                         const char *origin, int no, enum grep_pat_token t)
  22{
  23        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
  24}
  25
  26void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
  27                     const char *origin, int no, enum grep_pat_token t)
  28{
  29        struct grep_pat *p = xcalloc(1, sizeof(*p));
  30        p->pattern = pat;
  31        p->patternlen = patlen;
  32        p->origin = origin;
  33        p->no = no;
  34        p->token = t;
  35        *opt->pattern_tail = p;
  36        opt->pattern_tail = &p->next;
  37        p->next = NULL;
  38}
  39
  40struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
  41{
  42        struct grep_pat *pat;
  43        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
  44        *ret = *opt;
  45
  46        ret->pattern_list = NULL;
  47        ret->pattern_tail = &ret->pattern_list;
  48
  49        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
  50        {
  51                if(pat->token == GREP_PATTERN_HEAD)
  52                        append_header_grep_pattern(ret, pat->field,
  53                                                   pat->pattern);
  54                else
  55                        append_grep_pat(ret, pat->pattern, pat->patternlen,
  56                                        pat->origin, pat->no, pat->token);
  57        }
  58
  59        return ret;
  60}
  61
  62static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  63{
  64        int err;
  65
  66        p->word_regexp = opt->word_regexp;
  67        p->ignore_case = opt->ignore_case;
  68        p->fixed = opt->fixed;
  69
  70        if (p->fixed)
  71                return;
  72
  73        err = regcomp(&p->regexp, p->pattern, opt->regflags);
  74        if (err) {
  75                char errbuf[1024];
  76                char where[1024];
  77                if (p->no)
  78                        sprintf(where, "In '%s' at %d, ",
  79                                p->origin, p->no);
  80                else if (p->origin)
  81                        sprintf(where, "%s, ", p->origin);
  82                else
  83                        where[0] = 0;
  84                regerror(err, &p->regexp, errbuf, 1024);
  85                regfree(&p->regexp);
  86                die("%s'%s': %s", where, p->pattern, errbuf);
  87        }
  88}
  89
  90static struct grep_expr *compile_pattern_or(struct grep_pat **);
  91static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
  92{
  93        struct grep_pat *p;
  94        struct grep_expr *x;
  95
  96        p = *list;
  97        if (!p)
  98                return NULL;
  99        switch (p->token) {
 100        case GREP_PATTERN: /* atom */
 101        case GREP_PATTERN_HEAD:
 102        case GREP_PATTERN_BODY:
 103                x = xcalloc(1, sizeof (struct grep_expr));
 104                x->node = GREP_NODE_ATOM;
 105                x->u.atom = p;
 106                *list = p->next;
 107                return x;
 108        case GREP_OPEN_PAREN:
 109                *list = p->next;
 110                x = compile_pattern_or(list);
 111                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 112                        die("unmatched parenthesis");
 113                *list = (*list)->next;
 114                return x;
 115        default:
 116                return NULL;
 117        }
 118}
 119
 120static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 121{
 122        struct grep_pat *p;
 123        struct grep_expr *x;
 124
 125        p = *list;
 126        if (!p)
 127                return NULL;
 128        switch (p->token) {
 129        case GREP_NOT:
 130                if (!p->next)
 131                        die("--not not followed by pattern expression");
 132                *list = p->next;
 133                x = xcalloc(1, sizeof (struct grep_expr));
 134                x->node = GREP_NODE_NOT;
 135                x->u.unary = compile_pattern_not(list);
 136                if (!x->u.unary)
 137                        die("--not followed by non pattern expression");
 138                return x;
 139        default:
 140                return compile_pattern_atom(list);
 141        }
 142}
 143
 144static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 145{
 146        struct grep_pat *p;
 147        struct grep_expr *x, *y, *z;
 148
 149        x = compile_pattern_not(list);
 150        p = *list;
 151        if (p && p->token == GREP_AND) {
 152                if (!p->next)
 153                        die("--and not followed by pattern expression");
 154                *list = p->next;
 155                y = compile_pattern_and(list);
 156                if (!y)
 157                        die("--and not followed by pattern expression");
 158                z = xcalloc(1, sizeof (struct grep_expr));
 159                z->node = GREP_NODE_AND;
 160                z->u.binary.left = x;
 161                z->u.binary.right = y;
 162                return z;
 163        }
 164        return x;
 165}
 166
 167static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 168{
 169        struct grep_pat *p;
 170        struct grep_expr *x, *y, *z;
 171
 172        x = compile_pattern_and(list);
 173        p = *list;
 174        if (x && p && p->token != GREP_CLOSE_PAREN) {
 175                y = compile_pattern_or(list);
 176                if (!y)
 177                        die("not a pattern expression %s", p->pattern);
 178                z = xcalloc(1, sizeof (struct grep_expr));
 179                z->node = GREP_NODE_OR;
 180                z->u.binary.left = x;
 181                z->u.binary.right = y;
 182                return z;
 183        }
 184        return x;
 185}
 186
 187static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 188{
 189        return compile_pattern_or(list);
 190}
 191
 192void compile_grep_patterns(struct grep_opt *opt)
 193{
 194        struct grep_pat *p;
 195        struct grep_expr *header_expr = NULL;
 196
 197        if (opt->header_list) {
 198                p = opt->header_list;
 199                header_expr = compile_pattern_expr(&p);
 200                if (p)
 201                        die("incomplete pattern expression: %s", p->pattern);
 202                for (p = opt->header_list; p; p = p->next) {
 203                        switch (p->token) {
 204                        case GREP_PATTERN: /* atom */
 205                        case GREP_PATTERN_HEAD:
 206                        case GREP_PATTERN_BODY:
 207                                compile_regexp(p, opt);
 208                                break;
 209                        default:
 210                                opt->extended = 1;
 211                                break;
 212                        }
 213                }
 214        }
 215
 216        for (p = opt->pattern_list; p; p = p->next) {
 217                switch (p->token) {
 218                case GREP_PATTERN: /* atom */
 219                case GREP_PATTERN_HEAD:
 220                case GREP_PATTERN_BODY:
 221                        compile_regexp(p, opt);
 222                        break;
 223                default:
 224                        opt->extended = 1;
 225                        break;
 226                }
 227        }
 228
 229        if (opt->all_match || header_expr)
 230                opt->extended = 1;
 231        else if (!opt->extended)
 232                return;
 233
 234        /* Then bundle them up in an expression.
 235         * A classic recursive descent parser would do.
 236         */
 237        p = opt->pattern_list;
 238        if (p)
 239                opt->pattern_expression = compile_pattern_expr(&p);
 240        if (p)
 241                die("incomplete pattern expression: %s", p->pattern);
 242
 243        if (!header_expr)
 244                return;
 245
 246        if (opt->pattern_expression) {
 247                struct grep_expr *z;
 248                z = xcalloc(1, sizeof(*z));
 249                z->node = GREP_NODE_OR;
 250                z->u.binary.left = opt->pattern_expression;
 251                z->u.binary.right = header_expr;
 252                opt->pattern_expression = z;
 253        } else {
 254                opt->pattern_expression = header_expr;
 255        }
 256        opt->all_match = 1;
 257}
 258
 259static void free_pattern_expr(struct grep_expr *x)
 260{
 261        switch (x->node) {
 262        case GREP_NODE_ATOM:
 263                break;
 264        case GREP_NODE_NOT:
 265                free_pattern_expr(x->u.unary);
 266                break;
 267        case GREP_NODE_AND:
 268        case GREP_NODE_OR:
 269                free_pattern_expr(x->u.binary.left);
 270                free_pattern_expr(x->u.binary.right);
 271                break;
 272        }
 273        free(x);
 274}
 275
 276void free_grep_patterns(struct grep_opt *opt)
 277{
 278        struct grep_pat *p, *n;
 279
 280        for (p = opt->pattern_list; p; p = n) {
 281                n = p->next;
 282                switch (p->token) {
 283                case GREP_PATTERN: /* atom */
 284                case GREP_PATTERN_HEAD:
 285                case GREP_PATTERN_BODY:
 286                        regfree(&p->regexp);
 287                        break;
 288                default:
 289                        break;
 290                }
 291                free(p);
 292        }
 293
 294        if (!opt->extended)
 295                return;
 296        free_pattern_expr(opt->pattern_expression);
 297}
 298
 299static char *end_of_line(char *cp, unsigned long *left)
 300{
 301        unsigned long l = *left;
 302        while (l && *cp != '\n') {
 303                l--;
 304                cp++;
 305        }
 306        *left = l;
 307        return cp;
 308}
 309
 310static int word_char(char ch)
 311{
 312        return isalnum(ch) || ch == '_';
 313}
 314
 315static void output_color(struct grep_opt *opt, const void *data, size_t size,
 316                         const char *color)
 317{
 318        if (opt->color && color && color[0]) {
 319                opt->output(opt, color, strlen(color));
 320                opt->output(opt, data, size);
 321                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
 322        } else
 323                opt->output(opt, data, size);
 324}
 325
 326static void output_sep(struct grep_opt *opt, char sign)
 327{
 328        if (opt->null_following_name)
 329                opt->output(opt, "\0", 1);
 330        else
 331                output_color(opt, &sign, 1, opt->color_sep);
 332}
 333
 334static void show_name(struct grep_opt *opt, const char *name)
 335{
 336        output_color(opt, name, strlen(name), opt->color_filename);
 337        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 338}
 339
 340static int fixmatch(struct grep_pat *p, char *line, char *eol,
 341                    regmatch_t *match)
 342{
 343        char *hit;
 344
 345        if (p->ignore_case) {
 346                char *s = line;
 347                do {
 348                        hit = strcasestr(s, p->pattern);
 349                        if (hit)
 350                                break;
 351                        s += strlen(s) + 1;
 352                } while (s < eol);
 353        } else
 354                hit = memmem(line, eol - line, p->pattern, p->patternlen);
 355
 356        if (!hit) {
 357                match->rm_so = match->rm_eo = -1;
 358                return REG_NOMATCH;
 359        }
 360        else {
 361                match->rm_so = hit - line;
 362                match->rm_eo = match->rm_so + p->patternlen;
 363                return 0;
 364        }
 365}
 366
 367static int regmatch(const regex_t *preg, char *line, char *eol,
 368                    regmatch_t *match, int eflags)
 369{
 370#ifdef REG_STARTEND
 371        match->rm_so = 0;
 372        match->rm_eo = eol - line;
 373        eflags |= REG_STARTEND;
 374#endif
 375        return regexec(preg, line, 1, match, eflags);
 376}
 377
 378static int strip_timestamp(char *bol, char **eol_p)
 379{
 380        char *eol = *eol_p;
 381        int ch;
 382
 383        while (bol < --eol) {
 384                if (*eol != '>')
 385                        continue;
 386                *eol_p = ++eol;
 387                ch = *eol;
 388                *eol = '\0';
 389                return ch;
 390        }
 391        return 0;
 392}
 393
 394static struct {
 395        const char *field;
 396        size_t len;
 397} header_field[] = {
 398        { "author ", 7 },
 399        { "committer ", 10 },
 400};
 401
 402static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 403                             enum grep_context ctx,
 404                             regmatch_t *pmatch, int eflags)
 405{
 406        int hit = 0;
 407        int saved_ch = 0;
 408        const char *start = bol;
 409
 410        if ((p->token != GREP_PATTERN) &&
 411            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 412                return 0;
 413
 414        if (p->token == GREP_PATTERN_HEAD) {
 415                const char *field;
 416                size_t len;
 417                assert(p->field < ARRAY_SIZE(header_field));
 418                field = header_field[p->field].field;
 419                len = header_field[p->field].len;
 420                if (strncmp(bol, field, len))
 421                        return 0;
 422                bol += len;
 423                saved_ch = strip_timestamp(bol, &eol);
 424        }
 425
 426 again:
 427        if (p->fixed)
 428                hit = !fixmatch(p, bol, eol, pmatch);
 429        else
 430                hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
 431
 432        if (hit && p->word_regexp) {
 433                if ((pmatch[0].rm_so < 0) ||
 434                    (eol - bol) < pmatch[0].rm_so ||
 435                    (pmatch[0].rm_eo < 0) ||
 436                    (eol - bol) < pmatch[0].rm_eo)
 437                        die("regexp returned nonsense");
 438
 439                /* Match beginning must be either beginning of the
 440                 * line, or at word boundary (i.e. the last char must
 441                 * not be a word char).  Similarly, match end must be
 442                 * either end of the line, or at word boundary
 443                 * (i.e. the next char must not be a word char).
 444                 */
 445                if ( ((pmatch[0].rm_so == 0) ||
 446                      !word_char(bol[pmatch[0].rm_so-1])) &&
 447                     ((pmatch[0].rm_eo == (eol-bol)) ||
 448                      !word_char(bol[pmatch[0].rm_eo])) )
 449                        ;
 450                else
 451                        hit = 0;
 452
 453                /* Words consist of at least one character. */
 454                if (pmatch->rm_so == pmatch->rm_eo)
 455                        hit = 0;
 456
 457                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 458                        /* There could be more than one match on the
 459                         * line, and the first match might not be
 460                         * strict word match.  But later ones could be!
 461                         * Forward to the next possible start, i.e. the
 462                         * next position following a non-word char.
 463                         */
 464                        bol = pmatch[0].rm_so + bol + 1;
 465                        while (word_char(bol[-1]) && bol < eol)
 466                                bol++;
 467                        eflags |= REG_NOTBOL;
 468                        if (bol < eol)
 469                                goto again;
 470                }
 471        }
 472        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 473                *eol = saved_ch;
 474        if (hit) {
 475                pmatch[0].rm_so += bol - start;
 476                pmatch[0].rm_eo += bol - start;
 477        }
 478        return hit;
 479}
 480
 481static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
 482                           enum grep_context ctx, int collect_hits)
 483{
 484        int h = 0;
 485        regmatch_t match;
 486
 487        if (!x)
 488                die("Not a valid grep expression");
 489        switch (x->node) {
 490        case GREP_NODE_ATOM:
 491                h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
 492                break;
 493        case GREP_NODE_NOT:
 494                h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
 495                break;
 496        case GREP_NODE_AND:
 497                if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
 498                        return 0;
 499                h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
 500                break;
 501        case GREP_NODE_OR:
 502                if (!collect_hits)
 503                        return (match_expr_eval(x->u.binary.left,
 504                                                bol, eol, ctx, 0) ||
 505                                match_expr_eval(x->u.binary.right,
 506                                                bol, eol, ctx, 0));
 507                h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
 508                x->u.binary.left->hit |= h;
 509                h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
 510                break;
 511        default:
 512                die("Unexpected node type (internal error) %d", x->node);
 513        }
 514        if (collect_hits)
 515                x->hit |= h;
 516        return h;
 517}
 518
 519static int match_expr(struct grep_opt *opt, char *bol, char *eol,
 520                      enum grep_context ctx, int collect_hits)
 521{
 522        struct grep_expr *x = opt->pattern_expression;
 523        return match_expr_eval(x, bol, eol, ctx, collect_hits);
 524}
 525
 526static int match_line(struct grep_opt *opt, char *bol, char *eol,
 527                      enum grep_context ctx, int collect_hits)
 528{
 529        struct grep_pat *p;
 530        regmatch_t match;
 531
 532        if (opt->extended)
 533                return match_expr(opt, bol, eol, ctx, collect_hits);
 534
 535        /* we do not call with collect_hits without being extended */
 536        for (p = opt->pattern_list; p; p = p->next) {
 537                if (match_one_pattern(p, bol, eol, ctx, &match, 0))
 538                        return 1;
 539        }
 540        return 0;
 541}
 542
 543static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
 544                              enum grep_context ctx,
 545                              regmatch_t *pmatch, int eflags)
 546{
 547        regmatch_t match;
 548
 549        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
 550                return 0;
 551        if (match.rm_so < 0 || match.rm_eo < 0)
 552                return 0;
 553        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
 554                if (match.rm_so > pmatch->rm_so)
 555                        return 1;
 556                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
 557                        return 1;
 558        }
 559        pmatch->rm_so = match.rm_so;
 560        pmatch->rm_eo = match.rm_eo;
 561        return 1;
 562}
 563
 564static int next_match(struct grep_opt *opt, char *bol, char *eol,
 565                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
 566{
 567        struct grep_pat *p;
 568        int hit = 0;
 569
 570        pmatch->rm_so = pmatch->rm_eo = -1;
 571        if (bol < eol) {
 572                for (p = opt->pattern_list; p; p = p->next) {
 573                        switch (p->token) {
 574                        case GREP_PATTERN: /* atom */
 575                        case GREP_PATTERN_HEAD:
 576                        case GREP_PATTERN_BODY:
 577                                hit |= match_next_pattern(p, bol, eol, ctx,
 578                                                          pmatch, eflags);
 579                                break;
 580                        default:
 581                                break;
 582                        }
 583                }
 584        }
 585        return hit;
 586}
 587
 588static void show_line(struct grep_opt *opt, char *bol, char *eol,
 589                      const char *name, unsigned lno, char sign)
 590{
 591        int rest = eol - bol;
 592        char *line_color = NULL;
 593
 594        if (opt->pre_context || opt->post_context) {
 595                if (opt->last_shown == 0) {
 596                        if (opt->show_hunk_mark) {
 597                                output_color(opt, "--", 2, opt->color_sep);
 598                                opt->output(opt, "\n", 1);
 599                        }
 600                } else if (lno > opt->last_shown + 1) {
 601                        output_color(opt, "--", 2, opt->color_sep);
 602                        opt->output(opt, "\n", 1);
 603                }
 604        }
 605        opt->last_shown = lno;
 606
 607        if (opt->pathname) {
 608                output_color(opt, name, strlen(name), opt->color_filename);
 609                output_sep(opt, sign);
 610        }
 611        if (opt->linenum) {
 612                char buf[32];
 613                snprintf(buf, sizeof(buf), "%d", lno);
 614                output_color(opt, buf, strlen(buf), opt->color_lineno);
 615                output_sep(opt, sign);
 616        }
 617        if (opt->color) {
 618                regmatch_t match;
 619                enum grep_context ctx = GREP_CONTEXT_BODY;
 620                int ch = *eol;
 621                int eflags = 0;
 622
 623                if (sign == ':')
 624                        line_color = opt->color_selected;
 625                else if (sign == '-')
 626                        line_color = opt->color_context;
 627                else if (sign == '=')
 628                        line_color = opt->color_function;
 629                *eol = '\0';
 630                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
 631                        if (match.rm_so == match.rm_eo)
 632                                break;
 633
 634                        output_color(opt, bol, match.rm_so, line_color);
 635                        output_color(opt, bol + match.rm_so,
 636                                     match.rm_eo - match.rm_so,
 637                                     opt->color_match);
 638                        bol += match.rm_eo;
 639                        rest -= match.rm_eo;
 640                        eflags = REG_NOTBOL;
 641                }
 642                *eol = ch;
 643        }
 644        output_color(opt, bol, rest, line_color);
 645        opt->output(opt, "\n", 1);
 646}
 647
 648static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
 649{
 650        xdemitconf_t *xecfg = opt->priv;
 651        if (xecfg && xecfg->find_func) {
 652                char buf[1];
 653                return xecfg->find_func(bol, eol - bol, buf, 1,
 654                                        xecfg->find_func_priv) >= 0;
 655        }
 656
 657        if (bol == eol)
 658                return 0;
 659        if (isalpha(*bol) || *bol == '_' || *bol == '$')
 660                return 1;
 661        return 0;
 662}
 663
 664static void show_funcname_line(struct grep_opt *opt, const char *name,
 665                               char *buf, char *bol, unsigned lno)
 666{
 667        while (bol > buf) {
 668                char *eol = --bol;
 669
 670                while (bol > buf && bol[-1] != '\n')
 671                        bol--;
 672                lno--;
 673
 674                if (lno <= opt->last_shown)
 675                        break;
 676
 677                if (match_funcname(opt, bol, eol)) {
 678                        show_line(opt, bol, eol, name, lno, '=');
 679                        break;
 680                }
 681        }
 682}
 683
 684static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
 685                             char *bol, unsigned lno)
 686{
 687        unsigned cur = lno, from = 1, funcname_lno = 0;
 688        int funcname_needed = opt->funcname;
 689
 690        if (opt->pre_context < lno)
 691                from = lno - opt->pre_context;
 692        if (from <= opt->last_shown)
 693                from = opt->last_shown + 1;
 694
 695        /* Rewind. */
 696        while (bol > buf && cur > from) {
 697                char *eol = --bol;
 698
 699                while (bol > buf && bol[-1] != '\n')
 700                        bol--;
 701                cur--;
 702                if (funcname_needed && match_funcname(opt, bol, eol)) {
 703                        funcname_lno = cur;
 704                        funcname_needed = 0;
 705                }
 706        }
 707
 708        /* We need to look even further back to find a function signature. */
 709        if (opt->funcname && funcname_needed)
 710                show_funcname_line(opt, name, buf, bol, cur);
 711
 712        /* Back forward. */
 713        while (cur < lno) {
 714                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
 715
 716                while (*eol != '\n')
 717                        eol++;
 718                show_line(opt, bol, eol, name, cur, sign);
 719                bol = eol + 1;
 720                cur++;
 721        }
 722}
 723
 724static int should_lookahead(struct grep_opt *opt)
 725{
 726        struct grep_pat *p;
 727
 728        if (opt->extended)
 729                return 0; /* punt for too complex stuff */
 730        if (opt->invert)
 731                return 0;
 732        for (p = opt->pattern_list; p; p = p->next) {
 733                if (p->token != GREP_PATTERN)
 734                        return 0; /* punt for "header only" and stuff */
 735        }
 736        return 1;
 737}
 738
 739static int look_ahead(struct grep_opt *opt,
 740                      unsigned long *left_p,
 741                      unsigned *lno_p,
 742                      char **bol_p)
 743{
 744        unsigned lno = *lno_p;
 745        char *bol = *bol_p;
 746        struct grep_pat *p;
 747        char *sp, *last_bol;
 748        regoff_t earliest = -1;
 749
 750        for (p = opt->pattern_list; p; p = p->next) {
 751                int hit;
 752                regmatch_t m;
 753
 754                if (p->fixed)
 755                        hit = !fixmatch(p, bol, bol + *left_p, &m);
 756                else
 757                        hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
 758                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
 759                        continue;
 760                if (earliest < 0 || m.rm_so < earliest)
 761                        earliest = m.rm_so;
 762        }
 763
 764        if (earliest < 0) {
 765                *bol_p = bol + *left_p;
 766                *left_p = 0;
 767                return 1;
 768        }
 769        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
 770                ; /* find the beginning of the line */
 771        last_bol = sp;
 772
 773        for (sp = bol; sp < last_bol; sp++) {
 774                if (*sp == '\n')
 775                        lno++;
 776        }
 777        *left_p -= last_bol - bol;
 778        *bol_p = last_bol;
 779        *lno_p = lno;
 780        return 0;
 781}
 782
 783int grep_threads_ok(const struct grep_opt *opt)
 784{
 785        /* If this condition is true, then we may use the attribute
 786         * machinery in grep_buffer_1. The attribute code is not
 787         * thread safe, so we disable the use of threads.
 788         */
 789        if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
 790            !opt->name_only)
 791                return 0;
 792
 793        return 1;
 794}
 795
 796static void std_output(struct grep_opt *opt, const void *buf, size_t size)
 797{
 798        fwrite(buf, size, 1, stdout);
 799}
 800
 801static int grep_buffer_1(struct grep_opt *opt, const char *name,
 802                         char *buf, unsigned long size, int collect_hits)
 803{
 804        char *bol = buf;
 805        unsigned long left = size;
 806        unsigned lno = 1;
 807        unsigned last_hit = 0;
 808        int binary_match_only = 0;
 809        unsigned count = 0;
 810        int try_lookahead = 0;
 811        enum grep_context ctx = GREP_CONTEXT_HEAD;
 812        xdemitconf_t xecfg;
 813
 814        if (!opt->output)
 815                opt->output = std_output;
 816
 817        if (opt->last_shown && (opt->pre_context || opt->post_context) &&
 818            opt->output == std_output)
 819                opt->show_hunk_mark = 1;
 820        opt->last_shown = 0;
 821
 822        switch (opt->binary) {
 823        case GREP_BINARY_DEFAULT:
 824                if (buffer_is_binary(buf, size))
 825                        binary_match_only = 1;
 826                break;
 827        case GREP_BINARY_NOMATCH:
 828                if (buffer_is_binary(buf, size))
 829                        return 0; /* Assume unmatch */
 830                break;
 831        case GREP_BINARY_TEXT:
 832                break;
 833        default:
 834                die("bug: unknown binary handling mode");
 835        }
 836
 837        memset(&xecfg, 0, sizeof(xecfg));
 838        if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
 839            !opt->name_only && !binary_match_only && !collect_hits) {
 840                struct userdiff_driver *drv = userdiff_find_by_path(name);
 841                if (drv && drv->funcname.pattern) {
 842                        const struct userdiff_funcname *pe = &drv->funcname;
 843                        xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
 844                        opt->priv = &xecfg;
 845                }
 846        }
 847        try_lookahead = should_lookahead(opt);
 848
 849        while (left) {
 850                char *eol, ch;
 851                int hit;
 852
 853                /*
 854                 * look_ahead() skips quicly to the line that possibly
 855                 * has the next hit; don't call it if we need to do
 856                 * something more than just skipping the current line
 857                 * in response to an unmatch for the current line.  E.g.
 858                 * inside a post-context window, we will show the current
 859                 * line as a context around the previous hit when it
 860                 * doesn't hit.
 861                 */
 862                if (try_lookahead
 863                    && !(last_hit
 864                         && lno <= last_hit + opt->post_context)
 865                    && look_ahead(opt, &left, &lno, &bol))
 866                        break;
 867                eol = end_of_line(bol, &left);
 868                ch = *eol;
 869                *eol = 0;
 870
 871                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 872                        ctx = GREP_CONTEXT_BODY;
 873
 874                hit = match_line(opt, bol, eol, ctx, collect_hits);
 875                *eol = ch;
 876
 877                if (collect_hits)
 878                        goto next_line;
 879
 880                /* "grep -v -e foo -e bla" should list lines
 881                 * that do not have either, so inversion should
 882                 * be done outside.
 883                 */
 884                if (opt->invert)
 885                        hit = !hit;
 886                if (opt->unmatch_name_only) {
 887                        if (hit)
 888                                return 0;
 889                        goto next_line;
 890                }
 891                if (hit) {
 892                        count++;
 893                        if (opt->status_only)
 894                                return 1;
 895                        if (opt->name_only) {
 896                                show_name(opt, name);
 897                                return 1;
 898                        }
 899                        if (opt->count)
 900                                goto next_line;
 901                        if (binary_match_only) {
 902                                opt->output(opt, "Binary file ", 12);
 903                                output_color(opt, name, strlen(name),
 904                                             opt->color_filename);
 905                                opt->output(opt, " matches\n", 9);
 906                                return 1;
 907                        }
 908                        /* Hit at this line.  If we haven't shown the
 909                         * pre-context lines, we would need to show them.
 910                         */
 911                        if (opt->pre_context)
 912                                show_pre_context(opt, name, buf, bol, lno);
 913                        else if (opt->funcname)
 914                                show_funcname_line(opt, name, buf, bol, lno);
 915                        show_line(opt, bol, eol, name, lno, ':');
 916                        last_hit = lno;
 917                }
 918                else if (last_hit &&
 919                         lno <= last_hit + opt->post_context) {
 920                        /* If the last hit is within the post context,
 921                         * we need to show this line.
 922                         */
 923                        show_line(opt, bol, eol, name, lno, '-');
 924                }
 925
 926        next_line:
 927                bol = eol + 1;
 928                if (!left)
 929                        break;
 930                left--;
 931                lno++;
 932        }
 933
 934        if (collect_hits)
 935                return 0;
 936
 937        if (opt->status_only)
 938                return 0;
 939        if (opt->unmatch_name_only) {
 940                /* We did not see any hit, so we want to show this */
 941                show_name(opt, name);
 942                return 1;
 943        }
 944
 945        xdiff_clear_find_func(&xecfg);
 946        opt->priv = NULL;
 947
 948        /* NEEDSWORK:
 949         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 950         * which feels mostly useless but sometimes useful.  Maybe
 951         * make it another option?  For now suppress them.
 952         */
 953        if (opt->count && count) {
 954                char buf[32];
 955                output_color(opt, name, strlen(name), opt->color_filename);
 956                output_sep(opt, ':');
 957                snprintf(buf, sizeof(buf), "%u\n", count);
 958                opt->output(opt, buf, strlen(buf));
 959                return 1;
 960        }
 961        return !!last_hit;
 962}
 963
 964static void clr_hit_marker(struct grep_expr *x)
 965{
 966        /* All-hit markers are meaningful only at the very top level
 967         * OR node.
 968         */
 969        while (1) {
 970                x->hit = 0;
 971                if (x->node != GREP_NODE_OR)
 972                        return;
 973                x->u.binary.left->hit = 0;
 974                x = x->u.binary.right;
 975        }
 976}
 977
 978static int chk_hit_marker(struct grep_expr *x)
 979{
 980        /* Top level nodes have hit markers.  See if they all are hits */
 981        while (1) {
 982                if (x->node != GREP_NODE_OR)
 983                        return x->hit;
 984                if (!x->u.binary.left->hit)
 985                        return 0;
 986                x = x->u.binary.right;
 987        }
 988}
 989
 990int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
 991{
 992        /*
 993         * we do not have to do the two-pass grep when we do not check
 994         * buffer-wide "all-match".
 995         */
 996        if (!opt->all_match)
 997                return grep_buffer_1(opt, name, buf, size, 0);
 998
 999        /* Otherwise the toplevel "or" terms hit a bit differently.
1000         * We first clear hit markers from them.
1001         */
1002        clr_hit_marker(opt->pattern_expression);
1003        grep_buffer_1(opt, name, buf, size, 1);
1004
1005        if (!chk_hit_marker(opt->pattern_expression))
1006                return 0;
1007
1008        return grep_buffer_1(opt, name, buf, size, 0);
1009}