451275d2980428a8891e0fcf9f8f15aa543f16b1
   1#include "cache.h"
   2#include "grep.h"
   3#include "userdiff.h"
   4#include "xdiff-interface.h"
   5#include "diff.h"
   6#include "diffcore.h"
   7#include "commit.h"
   8
   9static int grep_source_load(struct grep_source *gs);
  10static int grep_source_is_binary(struct grep_source *gs);
  11
  12static struct grep_opt grep_defaults;
  13
  14/*
  15 * Initialize the grep_defaults template with hardcoded defaults.
  16 * We could let the compiler do this, but without C99 initializers
  17 * the code gets unwieldy and unreadable, so...
  18 */
  19void init_grep_defaults(void)
  20{
  21        struct grep_opt *opt = &grep_defaults;
  22        static int run_once;
  23
  24        if (run_once)
  25                return;
  26        run_once++;
  27
  28        memset(opt, 0, sizeof(*opt));
  29        opt->relative = 1;
  30        opt->pathname = 1;
  31        opt->regflags = REG_NEWLINE;
  32        opt->max_depth = -1;
  33        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  34        opt->extended_regexp_option = 0;
  35        color_set(opt->color_context, "");
  36        color_set(opt->color_filename, "");
  37        color_set(opt->color_function, "");
  38        color_set(opt->color_lineno, "");
  39        color_set(opt->color_match_context, GIT_COLOR_BOLD_RED);
  40        color_set(opt->color_match_selected, GIT_COLOR_BOLD_RED);
  41        color_set(opt->color_selected, "");
  42        color_set(opt->color_sep, GIT_COLOR_CYAN);
  43        opt->color = -1;
  44}
  45
  46static int parse_pattern_type_arg(const char *opt, const char *arg)
  47{
  48        if (!strcmp(arg, "default"))
  49                return GREP_PATTERN_TYPE_UNSPECIFIED;
  50        else if (!strcmp(arg, "basic"))
  51                return GREP_PATTERN_TYPE_BRE;
  52        else if (!strcmp(arg, "extended"))
  53                return GREP_PATTERN_TYPE_ERE;
  54        else if (!strcmp(arg, "fixed"))
  55                return GREP_PATTERN_TYPE_FIXED;
  56        else if (!strcmp(arg, "perl"))
  57                return GREP_PATTERN_TYPE_PCRE;
  58        die("bad %s argument: %s", opt, arg);
  59}
  60
  61/*
  62 * Read the configuration file once and store it in
  63 * the grep_defaults template.
  64 */
  65int grep_config(const char *var, const char *value, void *cb)
  66{
  67        struct grep_opt *opt = &grep_defaults;
  68        char *color = NULL;
  69
  70        if (userdiff_config(var, value) < 0)
  71                return -1;
  72
  73        if (!strcmp(var, "grep.extendedregexp")) {
  74                if (git_config_bool(var, value))
  75                        opt->extended_regexp_option = 1;
  76                else
  77                        opt->extended_regexp_option = 0;
  78                return 0;
  79        }
  80
  81        if (!strcmp(var, "grep.patterntype")) {
  82                opt->pattern_type_option = parse_pattern_type_arg(var, value);
  83                return 0;
  84        }
  85
  86        if (!strcmp(var, "grep.linenumber")) {
  87                opt->linenum = git_config_bool(var, value);
  88                return 0;
  89        }
  90
  91        if (!strcmp(var, "grep.fullname")) {
  92                opt->relative = !git_config_bool(var, value);
  93                return 0;
  94        }
  95
  96        if (!strcmp(var, "color.grep"))
  97                opt->color = git_config_colorbool(var, value);
  98        else if (!strcmp(var, "color.grep.context"))
  99                color = opt->color_context;
 100        else if (!strcmp(var, "color.grep.filename"))
 101                color = opt->color_filename;
 102        else if (!strcmp(var, "color.grep.function"))
 103                color = opt->color_function;
 104        else if (!strcmp(var, "color.grep.linenumber"))
 105                color = opt->color_lineno;
 106        else if (!strcmp(var, "color.grep.matchcontext"))
 107                color = opt->color_match_context;
 108        else if (!strcmp(var, "color.grep.matchselected"))
 109                color = opt->color_match_selected;
 110        else if (!strcmp(var, "color.grep.selected"))
 111                color = opt->color_selected;
 112        else if (!strcmp(var, "color.grep.separator"))
 113                color = opt->color_sep;
 114        else if (!strcmp(var, "color.grep.match")) {
 115                int rc = 0;
 116                if (!value)
 117                        return config_error_nonbool(var);
 118                rc |= color_parse(value, opt->color_match_context);
 119                rc |= color_parse(value, opt->color_match_selected);
 120                return rc;
 121        }
 122
 123        if (color) {
 124                if (!value)
 125                        return config_error_nonbool(var);
 126                return color_parse(value, color);
 127        }
 128        return 0;
 129}
 130
 131/*
 132 * Initialize one instance of grep_opt and copy the
 133 * default values from the template we read the configuration
 134 * information in an earlier call to git_config(grep_config).
 135 */
 136void grep_init(struct grep_opt *opt, const char *prefix)
 137{
 138        struct grep_opt *def = &grep_defaults;
 139
 140        memset(opt, 0, sizeof(*opt));
 141        opt->prefix = prefix;
 142        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 143        opt->pattern_tail = &opt->pattern_list;
 144        opt->header_tail = &opt->header_list;
 145
 146        opt->color = def->color;
 147        opt->extended_regexp_option = def->extended_regexp_option;
 148        opt->pattern_type_option = def->pattern_type_option;
 149        opt->linenum = def->linenum;
 150        opt->max_depth = def->max_depth;
 151        opt->pathname = def->pathname;
 152        opt->regflags = def->regflags;
 153        opt->relative = def->relative;
 154
 155        color_set(opt->color_context, def->color_context);
 156        color_set(opt->color_filename, def->color_filename);
 157        color_set(opt->color_function, def->color_function);
 158        color_set(opt->color_lineno, def->color_lineno);
 159        color_set(opt->color_match_context, def->color_match_context);
 160        color_set(opt->color_match_selected, def->color_match_selected);
 161        color_set(opt->color_selected, def->color_selected);
 162        color_set(opt->color_sep, def->color_sep);
 163}
 164
 165void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 166{
 167        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 168                grep_set_pattern_type_option(pattern_type, opt);
 169        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 170                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 171        else if (opt->extended_regexp_option)
 172                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 173}
 174
 175void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 176{
 177        switch (pattern_type) {
 178        case GREP_PATTERN_TYPE_UNSPECIFIED:
 179                /* fall through */
 180
 181        case GREP_PATTERN_TYPE_BRE:
 182                opt->fixed = 0;
 183                opt->pcre = 0;
 184                opt->regflags &= ~REG_EXTENDED;
 185                break;
 186
 187        case GREP_PATTERN_TYPE_ERE:
 188                opt->fixed = 0;
 189                opt->pcre = 0;
 190                opt->regflags |= REG_EXTENDED;
 191                break;
 192
 193        case GREP_PATTERN_TYPE_FIXED:
 194                opt->fixed = 1;
 195                opt->pcre = 0;
 196                opt->regflags &= ~REG_EXTENDED;
 197                break;
 198
 199        case GREP_PATTERN_TYPE_PCRE:
 200                opt->fixed = 0;
 201                opt->pcre = 1;
 202                opt->regflags &= ~REG_EXTENDED;
 203                break;
 204        }
 205}
 206
 207static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 208                                        const char *origin, int no,
 209                                        enum grep_pat_token t,
 210                                        enum grep_header_field field)
 211{
 212        struct grep_pat *p = xcalloc(1, sizeof(*p));
 213        p->pattern = xmemdupz(pat, patlen);
 214        p->patternlen = patlen;
 215        p->origin = origin;
 216        p->no = no;
 217        p->token = t;
 218        p->field = field;
 219        return p;
 220}
 221
 222static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 223{
 224        **tail = p;
 225        *tail = &p->next;
 226        p->next = NULL;
 227
 228        switch (p->token) {
 229        case GREP_PATTERN: /* atom */
 230        case GREP_PATTERN_HEAD:
 231        case GREP_PATTERN_BODY:
 232                for (;;) {
 233                        struct grep_pat *new_pat;
 234                        size_t len = 0;
 235                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 236                        while (++len <= p->patternlen) {
 237                                if (*(--cp) == '\n') {
 238                                        nl = cp;
 239                                        break;
 240                                }
 241                        }
 242                        if (!nl)
 243                                break;
 244                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 245                                                  p->no, p->token, p->field);
 246                        new_pat->next = p->next;
 247                        if (!p->next)
 248                                *tail = &new_pat->next;
 249                        p->next = new_pat;
 250                        *nl = '\0';
 251                        p->patternlen -= len;
 252                }
 253                break;
 254        default:
 255                break;
 256        }
 257}
 258
 259void append_header_grep_pattern(struct grep_opt *opt,
 260                                enum grep_header_field field, const char *pat)
 261{
 262        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 263                                             GREP_PATTERN_HEAD, field);
 264        if (field == GREP_HEADER_REFLOG)
 265                opt->use_reflog_filter = 1;
 266        do_append_grep_pat(&opt->header_tail, p);
 267}
 268
 269void append_grep_pattern(struct grep_opt *opt, const char *pat,
 270                         const char *origin, int no, enum grep_pat_token t)
 271{
 272        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 273}
 274
 275void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 276                     const char *origin, int no, enum grep_pat_token t)
 277{
 278        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 279        do_append_grep_pat(&opt->pattern_tail, p);
 280}
 281
 282struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 283{
 284        struct grep_pat *pat;
 285        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 286        *ret = *opt;
 287
 288        ret->pattern_list = NULL;
 289        ret->pattern_tail = &ret->pattern_list;
 290
 291        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 292        {
 293                if(pat->token == GREP_PATTERN_HEAD)
 294                        append_header_grep_pattern(ret, pat->field,
 295                                                   pat->pattern);
 296                else
 297                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 298                                        pat->origin, pat->no, pat->token);
 299        }
 300
 301        return ret;
 302}
 303
 304static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 305                const char *error)
 306{
 307        char where[1024];
 308
 309        if (p->no)
 310                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 311        else if (p->origin)
 312                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 313        else
 314                where[0] = 0;
 315
 316        die("%s'%s': %s", where, p->pattern, error);
 317}
 318
 319#ifdef USE_LIBPCRE
 320static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
 321{
 322        const char *error;
 323        int erroffset;
 324        int options = PCRE_MULTILINE;
 325
 326        if (opt->ignore_case)
 327                options |= PCRE_CASELESS;
 328
 329        p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 330                        NULL);
 331        if (!p->pcre_regexp)
 332                compile_regexp_failed(p, error);
 333
 334        p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
 335        if (!p->pcre_extra_info && error)
 336                die("%s", error);
 337}
 338
 339static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
 340                regmatch_t *match, int eflags)
 341{
 342        int ovector[30], ret, flags = 0;
 343
 344        if (eflags & REG_NOTBOL)
 345                flags |= PCRE_NOTBOL;
 346
 347        ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
 348                        0, flags, ovector, ARRAY_SIZE(ovector));
 349        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 350                die("pcre_exec failed with error code %d", ret);
 351        if (ret > 0) {
 352                ret = 0;
 353                match->rm_so = ovector[0];
 354                match->rm_eo = ovector[1];
 355        }
 356
 357        return ret;
 358}
 359
 360static void free_pcre_regexp(struct grep_pat *p)
 361{
 362        pcre_free(p->pcre_regexp);
 363        pcre_free(p->pcre_extra_info);
 364}
 365#else /* !USE_LIBPCRE */
 366static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
 367{
 368        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 369}
 370
 371static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
 372                regmatch_t *match, int eflags)
 373{
 374        return 1;
 375}
 376
 377static void free_pcre_regexp(struct grep_pat *p)
 378{
 379}
 380#endif /* !USE_LIBPCRE */
 381
 382static int is_fixed(const char *s, size_t len)
 383{
 384        size_t i;
 385
 386        /* regcomp cannot accept patterns with NULs so we
 387         * consider any pattern containing a NUL fixed.
 388         */
 389        if (memchr(s, 0, len))
 390                return 1;
 391
 392        for (i = 0; i < len; i++) {
 393                if (is_regex_special(s[i]))
 394                        return 0;
 395        }
 396
 397        return 1;
 398}
 399
 400static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 401{
 402        int icase, ascii_only;
 403        int err;
 404
 405        p->word_regexp = opt->word_regexp;
 406        p->ignore_case = opt->ignore_case;
 407        icase          = opt->regflags & REG_ICASE || p->ignore_case;
 408        ascii_only     = !has_non_ascii(p->pattern);
 409
 410        if (opt->fixed)
 411                p->fixed = 1;
 412        else if ((!icase || ascii_only) &&
 413                 is_fixed(p->pattern, p->patternlen))
 414                p->fixed = 1;
 415        else
 416                p->fixed = 0;
 417
 418        if (p->fixed) {
 419                if (opt->regflags & REG_ICASE || p->ignore_case)
 420                        p->kws = kwsalloc(tolower_trans_tbl);
 421                else
 422                        p->kws = kwsalloc(NULL);
 423                kwsincr(p->kws, p->pattern, p->patternlen);
 424                kwsprep(p->kws);
 425                return;
 426        }
 427
 428        if (opt->pcre) {
 429                compile_pcre_regexp(p, opt);
 430                return;
 431        }
 432
 433        err = regcomp(&p->regexp, p->pattern, opt->regflags);
 434        if (err) {
 435                char errbuf[1024];
 436                regerror(err, &p->regexp, errbuf, 1024);
 437                regfree(&p->regexp);
 438                compile_regexp_failed(p, errbuf);
 439        }
 440}
 441
 442static struct grep_expr *compile_pattern_or(struct grep_pat **);
 443static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 444{
 445        struct grep_pat *p;
 446        struct grep_expr *x;
 447
 448        p = *list;
 449        if (!p)
 450                return NULL;
 451        switch (p->token) {
 452        case GREP_PATTERN: /* atom */
 453        case GREP_PATTERN_HEAD:
 454        case GREP_PATTERN_BODY:
 455                x = xcalloc(1, sizeof (struct grep_expr));
 456                x->node = GREP_NODE_ATOM;
 457                x->u.atom = p;
 458                *list = p->next;
 459                return x;
 460        case GREP_OPEN_PAREN:
 461                *list = p->next;
 462                x = compile_pattern_or(list);
 463                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 464                        die("unmatched parenthesis");
 465                *list = (*list)->next;
 466                return x;
 467        default:
 468                return NULL;
 469        }
 470}
 471
 472static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 473{
 474        struct grep_pat *p;
 475        struct grep_expr *x;
 476
 477        p = *list;
 478        if (!p)
 479                return NULL;
 480        switch (p->token) {
 481        case GREP_NOT:
 482                if (!p->next)
 483                        die("--not not followed by pattern expression");
 484                *list = p->next;
 485                x = xcalloc(1, sizeof (struct grep_expr));
 486                x->node = GREP_NODE_NOT;
 487                x->u.unary = compile_pattern_not(list);
 488                if (!x->u.unary)
 489                        die("--not followed by non pattern expression");
 490                return x;
 491        default:
 492                return compile_pattern_atom(list);
 493        }
 494}
 495
 496static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 497{
 498        struct grep_pat *p;
 499        struct grep_expr *x, *y, *z;
 500
 501        x = compile_pattern_not(list);
 502        p = *list;
 503        if (p && p->token == GREP_AND) {
 504                if (!p->next)
 505                        die("--and not followed by pattern expression");
 506                *list = p->next;
 507                y = compile_pattern_and(list);
 508                if (!y)
 509                        die("--and not followed by pattern expression");
 510                z = xcalloc(1, sizeof (struct grep_expr));
 511                z->node = GREP_NODE_AND;
 512                z->u.binary.left = x;
 513                z->u.binary.right = y;
 514                return z;
 515        }
 516        return x;
 517}
 518
 519static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 520{
 521        struct grep_pat *p;
 522        struct grep_expr *x, *y, *z;
 523
 524        x = compile_pattern_and(list);
 525        p = *list;
 526        if (x && p && p->token != GREP_CLOSE_PAREN) {
 527                y = compile_pattern_or(list);
 528                if (!y)
 529                        die("not a pattern expression %s", p->pattern);
 530                z = xcalloc(1, sizeof (struct grep_expr));
 531                z->node = GREP_NODE_OR;
 532                z->u.binary.left = x;
 533                z->u.binary.right = y;
 534                return z;
 535        }
 536        return x;
 537}
 538
 539static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 540{
 541        return compile_pattern_or(list);
 542}
 543
 544static void indent(int in)
 545{
 546        while (in-- > 0)
 547                fputc(' ', stderr);
 548}
 549
 550static void dump_grep_pat(struct grep_pat *p)
 551{
 552        switch (p->token) {
 553        case GREP_AND: fprintf(stderr, "*and*"); break;
 554        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 555        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 556        case GREP_NOT: fprintf(stderr, "*not*"); break;
 557        case GREP_OR: fprintf(stderr, "*or*"); break;
 558
 559        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 560        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 561        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 562        }
 563
 564        switch (p->token) {
 565        default: break;
 566        case GREP_PATTERN_HEAD:
 567                fprintf(stderr, "<head %d>", p->field); break;
 568        case GREP_PATTERN_BODY:
 569                fprintf(stderr, "<body>"); break;
 570        }
 571        switch (p->token) {
 572        default: break;
 573        case GREP_PATTERN_HEAD:
 574        case GREP_PATTERN_BODY:
 575        case GREP_PATTERN:
 576                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 577                break;
 578        }
 579        fputc('\n', stderr);
 580}
 581
 582static void dump_grep_expression_1(struct grep_expr *x, int in)
 583{
 584        indent(in);
 585        switch (x->node) {
 586        case GREP_NODE_TRUE:
 587                fprintf(stderr, "true\n");
 588                break;
 589        case GREP_NODE_ATOM:
 590                dump_grep_pat(x->u.atom);
 591                break;
 592        case GREP_NODE_NOT:
 593                fprintf(stderr, "(not\n");
 594                dump_grep_expression_1(x->u.unary, in+1);
 595                indent(in);
 596                fprintf(stderr, ")\n");
 597                break;
 598        case GREP_NODE_AND:
 599                fprintf(stderr, "(and\n");
 600                dump_grep_expression_1(x->u.binary.left, in+1);
 601                dump_grep_expression_1(x->u.binary.right, in+1);
 602                indent(in);
 603                fprintf(stderr, ")\n");
 604                break;
 605        case GREP_NODE_OR:
 606                fprintf(stderr, "(or\n");
 607                dump_grep_expression_1(x->u.binary.left, in+1);
 608                dump_grep_expression_1(x->u.binary.right, in+1);
 609                indent(in);
 610                fprintf(stderr, ")\n");
 611                break;
 612        }
 613}
 614
 615static void dump_grep_expression(struct grep_opt *opt)
 616{
 617        struct grep_expr *x = opt->pattern_expression;
 618
 619        if (opt->all_match)
 620                fprintf(stderr, "[all-match]\n");
 621        dump_grep_expression_1(x, 0);
 622        fflush(NULL);
 623}
 624
 625static struct grep_expr *grep_true_expr(void)
 626{
 627        struct grep_expr *z = xcalloc(1, sizeof(*z));
 628        z->node = GREP_NODE_TRUE;
 629        return z;
 630}
 631
 632static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 633{
 634        struct grep_expr *z = xcalloc(1, sizeof(*z));
 635        z->node = GREP_NODE_OR;
 636        z->u.binary.left = left;
 637        z->u.binary.right = right;
 638        return z;
 639}
 640
 641static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 642{
 643        struct grep_pat *p;
 644        struct grep_expr *header_expr;
 645        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 646        enum grep_header_field fld;
 647
 648        if (!opt->header_list)
 649                return NULL;
 650
 651        for (p = opt->header_list; p; p = p->next) {
 652                if (p->token != GREP_PATTERN_HEAD)
 653                        die("bug: a non-header pattern in grep header list.");
 654                if (p->field < GREP_HEADER_FIELD_MIN ||
 655                    GREP_HEADER_FIELD_MAX <= p->field)
 656                        die("bug: unknown header field %d", p->field);
 657                compile_regexp(p, opt);
 658        }
 659
 660        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 661                header_group[fld] = NULL;
 662
 663        for (p = opt->header_list; p; p = p->next) {
 664                struct grep_expr *h;
 665                struct grep_pat *pp = p;
 666
 667                h = compile_pattern_atom(&pp);
 668                if (!h || pp != p->next)
 669                        die("bug: malformed header expr");
 670                if (!header_group[p->field]) {
 671                        header_group[p->field] = h;
 672                        continue;
 673                }
 674                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 675        }
 676
 677        header_expr = NULL;
 678
 679        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 680                if (!header_group[fld])
 681                        continue;
 682                if (!header_expr)
 683                        header_expr = grep_true_expr();
 684                header_expr = grep_or_expr(header_group[fld], header_expr);
 685        }
 686        return header_expr;
 687}
 688
 689static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 690{
 691        struct grep_expr *z = x;
 692
 693        while (x) {
 694                assert(x->node == GREP_NODE_OR);
 695                if (x->u.binary.right &&
 696                    x->u.binary.right->node == GREP_NODE_TRUE) {
 697                        x->u.binary.right = y;
 698                        break;
 699                }
 700                x = x->u.binary.right;
 701        }
 702        return z;
 703}
 704
 705static void compile_grep_patterns_real(struct grep_opt *opt)
 706{
 707        struct grep_pat *p;
 708        struct grep_expr *header_expr = prep_header_patterns(opt);
 709
 710        for (p = opt->pattern_list; p; p = p->next) {
 711                switch (p->token) {
 712                case GREP_PATTERN: /* atom */
 713                case GREP_PATTERN_HEAD:
 714                case GREP_PATTERN_BODY:
 715                        compile_regexp(p, opt);
 716                        break;
 717                default:
 718                        opt->extended = 1;
 719                        break;
 720                }
 721        }
 722
 723        if (opt->all_match || header_expr)
 724                opt->extended = 1;
 725        else if (!opt->extended && !opt->debug)
 726                return;
 727
 728        p = opt->pattern_list;
 729        if (p)
 730                opt->pattern_expression = compile_pattern_expr(&p);
 731        if (p)
 732                die("incomplete pattern expression: %s", p->pattern);
 733
 734        if (!header_expr)
 735                return;
 736
 737        if (!opt->pattern_expression)
 738                opt->pattern_expression = header_expr;
 739        else if (opt->all_match)
 740                opt->pattern_expression = grep_splice_or(header_expr,
 741                                                         opt->pattern_expression);
 742        else
 743                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
 744                                                       header_expr);
 745        opt->all_match = 1;
 746}
 747
 748void compile_grep_patterns(struct grep_opt *opt)
 749{
 750        compile_grep_patterns_real(opt);
 751        if (opt->debug)
 752                dump_grep_expression(opt);
 753}
 754
 755static void free_pattern_expr(struct grep_expr *x)
 756{
 757        switch (x->node) {
 758        case GREP_NODE_TRUE:
 759        case GREP_NODE_ATOM:
 760                break;
 761        case GREP_NODE_NOT:
 762                free_pattern_expr(x->u.unary);
 763                break;
 764        case GREP_NODE_AND:
 765        case GREP_NODE_OR:
 766                free_pattern_expr(x->u.binary.left);
 767                free_pattern_expr(x->u.binary.right);
 768                break;
 769        }
 770        free(x);
 771}
 772
 773void free_grep_patterns(struct grep_opt *opt)
 774{
 775        struct grep_pat *p, *n;
 776
 777        for (p = opt->pattern_list; p; p = n) {
 778                n = p->next;
 779                switch (p->token) {
 780                case GREP_PATTERN: /* atom */
 781                case GREP_PATTERN_HEAD:
 782                case GREP_PATTERN_BODY:
 783                        if (p->kws)
 784                                kwsfree(p->kws);
 785                        else if (p->pcre_regexp)
 786                                free_pcre_regexp(p);
 787                        else
 788                                regfree(&p->regexp);
 789                        free(p->pattern);
 790                        break;
 791                default:
 792                        break;
 793                }
 794                free(p);
 795        }
 796
 797        if (!opt->extended)
 798                return;
 799        free_pattern_expr(opt->pattern_expression);
 800}
 801
 802static char *end_of_line(char *cp, unsigned long *left)
 803{
 804        unsigned long l = *left;
 805        while (l && *cp != '\n') {
 806                l--;
 807                cp++;
 808        }
 809        *left = l;
 810        return cp;
 811}
 812
 813static int word_char(char ch)
 814{
 815        return isalnum(ch) || ch == '_';
 816}
 817
 818static void output_color(struct grep_opt *opt, const void *data, size_t size,
 819                         const char *color)
 820{
 821        if (want_color(opt->color) && color && color[0]) {
 822                opt->output(opt, color, strlen(color));
 823                opt->output(opt, data, size);
 824                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
 825        } else
 826                opt->output(opt, data, size);
 827}
 828
 829static void output_sep(struct grep_opt *opt, char sign)
 830{
 831        if (opt->null_following_name)
 832                opt->output(opt, "\0", 1);
 833        else
 834                output_color(opt, &sign, 1, opt->color_sep);
 835}
 836
 837static void show_name(struct grep_opt *opt, const char *name)
 838{
 839        output_color(opt, name, strlen(name), opt->color_filename);
 840        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 841}
 842
 843static int fixmatch(struct grep_pat *p, char *line, char *eol,
 844                    regmatch_t *match)
 845{
 846        struct kwsmatch kwsm;
 847        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
 848        if (offset == -1) {
 849                match->rm_so = match->rm_eo = -1;
 850                return REG_NOMATCH;
 851        } else {
 852                match->rm_so = offset;
 853                match->rm_eo = match->rm_so + kwsm.size[0];
 854                return 0;
 855        }
 856}
 857
 858static int regmatch(const regex_t *preg, char *line, char *eol,
 859                    regmatch_t *match, int eflags)
 860{
 861#ifdef REG_STARTEND
 862        match->rm_so = 0;
 863        match->rm_eo = eol - line;
 864        eflags |= REG_STARTEND;
 865#endif
 866        return regexec(preg, line, 1, match, eflags);
 867}
 868
 869static int patmatch(struct grep_pat *p, char *line, char *eol,
 870                    regmatch_t *match, int eflags)
 871{
 872        int hit;
 873
 874        if (p->fixed)
 875                hit = !fixmatch(p, line, eol, match);
 876        else if (p->pcre_regexp)
 877                hit = !pcrematch(p, line, eol, match, eflags);
 878        else
 879                hit = !regmatch(&p->regexp, line, eol, match, eflags);
 880
 881        return hit;
 882}
 883
 884static int strip_timestamp(char *bol, char **eol_p)
 885{
 886        char *eol = *eol_p;
 887        int ch;
 888
 889        while (bol < --eol) {
 890                if (*eol != '>')
 891                        continue;
 892                *eol_p = ++eol;
 893                ch = *eol;
 894                *eol = '\0';
 895                return ch;
 896        }
 897        return 0;
 898}
 899
 900static struct {
 901        const char *field;
 902        size_t len;
 903} header_field[] = {
 904        { "author ", 7 },
 905        { "committer ", 10 },
 906        { "reflog ", 7 },
 907};
 908
 909static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 910                             enum grep_context ctx,
 911                             regmatch_t *pmatch, int eflags)
 912{
 913        int hit = 0;
 914        int saved_ch = 0;
 915        const char *start = bol;
 916
 917        if ((p->token != GREP_PATTERN) &&
 918            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 919                return 0;
 920
 921        if (p->token == GREP_PATTERN_HEAD) {
 922                const char *field;
 923                size_t len;
 924                assert(p->field < ARRAY_SIZE(header_field));
 925                field = header_field[p->field].field;
 926                len = header_field[p->field].len;
 927                if (strncmp(bol, field, len))
 928                        return 0;
 929                bol += len;
 930                switch (p->field) {
 931                case GREP_HEADER_AUTHOR:
 932                case GREP_HEADER_COMMITTER:
 933                        saved_ch = strip_timestamp(bol, &eol);
 934                        break;
 935                default:
 936                        break;
 937                }
 938        }
 939
 940 again:
 941        hit = patmatch(p, bol, eol, pmatch, eflags);
 942
 943        if (hit && p->word_regexp) {
 944                if ((pmatch[0].rm_so < 0) ||
 945                    (eol - bol) < pmatch[0].rm_so ||
 946                    (pmatch[0].rm_eo < 0) ||
 947                    (eol - bol) < pmatch[0].rm_eo)
 948                        die("regexp returned nonsense");
 949
 950                /* Match beginning must be either beginning of the
 951                 * line, or at word boundary (i.e. the last char must
 952                 * not be a word char).  Similarly, match end must be
 953                 * either end of the line, or at word boundary
 954                 * (i.e. the next char must not be a word char).
 955                 */
 956                if ( ((pmatch[0].rm_so == 0) ||
 957                      !word_char(bol[pmatch[0].rm_so-1])) &&
 958                     ((pmatch[0].rm_eo == (eol-bol)) ||
 959                      !word_char(bol[pmatch[0].rm_eo])) )
 960                        ;
 961                else
 962                        hit = 0;
 963
 964                /* Words consist of at least one character. */
 965                if (pmatch->rm_so == pmatch->rm_eo)
 966                        hit = 0;
 967
 968                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 969                        /* There could be more than one match on the
 970                         * line, and the first match might not be
 971                         * strict word match.  But later ones could be!
 972                         * Forward to the next possible start, i.e. the
 973                         * next position following a non-word char.
 974                         */
 975                        bol = pmatch[0].rm_so + bol + 1;
 976                        while (word_char(bol[-1]) && bol < eol)
 977                                bol++;
 978                        eflags |= REG_NOTBOL;
 979                        if (bol < eol)
 980                                goto again;
 981                }
 982        }
 983        if (p->token == GREP_PATTERN_HEAD && saved_ch)
 984                *eol = saved_ch;
 985        if (hit) {
 986                pmatch[0].rm_so += bol - start;
 987                pmatch[0].rm_eo += bol - start;
 988        }
 989        return hit;
 990}
 991
 992static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
 993                           enum grep_context ctx, int collect_hits)
 994{
 995        int h = 0;
 996        regmatch_t match;
 997
 998        if (!x)
 999                die("Not a valid grep expression");
1000        switch (x->node) {
1001        case GREP_NODE_TRUE:
1002                h = 1;
1003                break;
1004        case GREP_NODE_ATOM:
1005                h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
1006                break;
1007        case GREP_NODE_NOT:
1008                h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
1009                break;
1010        case GREP_NODE_AND:
1011                if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
1012                        return 0;
1013                h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
1014                break;
1015        case GREP_NODE_OR:
1016                if (!collect_hits)
1017                        return (match_expr_eval(x->u.binary.left,
1018                                                bol, eol, ctx, 0) ||
1019                                match_expr_eval(x->u.binary.right,
1020                                                bol, eol, ctx, 0));
1021                h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
1022                x->u.binary.left->hit |= h;
1023                h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
1024                break;
1025        default:
1026                die("Unexpected node type (internal error) %d", x->node);
1027        }
1028        if (collect_hits)
1029                x->hit |= h;
1030        return h;
1031}
1032
1033static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1034                      enum grep_context ctx, int collect_hits)
1035{
1036        struct grep_expr *x = opt->pattern_expression;
1037        return match_expr_eval(x, bol, eol, ctx, collect_hits);
1038}
1039
1040static int match_line(struct grep_opt *opt, char *bol, char *eol,
1041                      enum grep_context ctx, int collect_hits)
1042{
1043        struct grep_pat *p;
1044        regmatch_t match;
1045
1046        if (opt->extended)
1047                return match_expr(opt, bol, eol, ctx, collect_hits);
1048
1049        /* we do not call with collect_hits without being extended */
1050        for (p = opt->pattern_list; p; p = p->next) {
1051                if (match_one_pattern(p, bol, eol, ctx, &match, 0))
1052                        return 1;
1053        }
1054        return 0;
1055}
1056
1057static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1058                              enum grep_context ctx,
1059                              regmatch_t *pmatch, int eflags)
1060{
1061        regmatch_t match;
1062
1063        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1064                return 0;
1065        if (match.rm_so < 0 || match.rm_eo < 0)
1066                return 0;
1067        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1068                if (match.rm_so > pmatch->rm_so)
1069                        return 1;
1070                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1071                        return 1;
1072        }
1073        pmatch->rm_so = match.rm_so;
1074        pmatch->rm_eo = match.rm_eo;
1075        return 1;
1076}
1077
1078static int next_match(struct grep_opt *opt, char *bol, char *eol,
1079                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1080{
1081        struct grep_pat *p;
1082        int hit = 0;
1083
1084        pmatch->rm_so = pmatch->rm_eo = -1;
1085        if (bol < eol) {
1086                for (p = opt->pattern_list; p; p = p->next) {
1087                        switch (p->token) {
1088                        case GREP_PATTERN: /* atom */
1089                        case GREP_PATTERN_HEAD:
1090                        case GREP_PATTERN_BODY:
1091                                hit |= match_next_pattern(p, bol, eol, ctx,
1092                                                          pmatch, eflags);
1093                                break;
1094                        default:
1095                                break;
1096                        }
1097                }
1098        }
1099        return hit;
1100}
1101
1102static void show_line(struct grep_opt *opt, char *bol, char *eol,
1103                      const char *name, unsigned lno, char sign)
1104{
1105        int rest = eol - bol;
1106        const char *match_color, *line_color = NULL;
1107
1108        if (opt->file_break && opt->last_shown == 0) {
1109                if (opt->show_hunk_mark)
1110                        opt->output(opt, "\n", 1);
1111        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1112                if (opt->last_shown == 0) {
1113                        if (opt->show_hunk_mark) {
1114                                output_color(opt, "--", 2, opt->color_sep);
1115                                opt->output(opt, "\n", 1);
1116                        }
1117                } else if (lno > opt->last_shown + 1) {
1118                        output_color(opt, "--", 2, opt->color_sep);
1119                        opt->output(opt, "\n", 1);
1120                }
1121        }
1122        if (opt->heading && opt->last_shown == 0) {
1123                output_color(opt, name, strlen(name), opt->color_filename);
1124                opt->output(opt, "\n", 1);
1125        }
1126        opt->last_shown = lno;
1127
1128        if (!opt->heading && opt->pathname) {
1129                output_color(opt, name, strlen(name), opt->color_filename);
1130                output_sep(opt, sign);
1131        }
1132        if (opt->linenum) {
1133                char buf[32];
1134                snprintf(buf, sizeof(buf), "%d", lno);
1135                output_color(opt, buf, strlen(buf), opt->color_lineno);
1136                output_sep(opt, sign);
1137        }
1138        if (opt->color) {
1139                regmatch_t match;
1140                enum grep_context ctx = GREP_CONTEXT_BODY;
1141                int ch = *eol;
1142                int eflags = 0;
1143
1144                if (sign == ':')
1145                        match_color = opt->color_match_selected;
1146                else
1147                        match_color = opt->color_match_context;
1148                if (sign == ':')
1149                        line_color = opt->color_selected;
1150                else if (sign == '-')
1151                        line_color = opt->color_context;
1152                else if (sign == '=')
1153                        line_color = opt->color_function;
1154                *eol = '\0';
1155                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1156                        if (match.rm_so == match.rm_eo)
1157                                break;
1158
1159                        output_color(opt, bol, match.rm_so, line_color);
1160                        output_color(opt, bol + match.rm_so,
1161                                     match.rm_eo - match.rm_so, match_color);
1162                        bol += match.rm_eo;
1163                        rest -= match.rm_eo;
1164                        eflags = REG_NOTBOL;
1165                }
1166                *eol = ch;
1167        }
1168        output_color(opt, bol, rest, line_color);
1169        opt->output(opt, "\n", 1);
1170}
1171
1172#ifndef NO_PTHREADS
1173int grep_use_locks;
1174
1175/*
1176 * This lock protects access to the gitattributes machinery, which is
1177 * not thread-safe.
1178 */
1179pthread_mutex_t grep_attr_mutex;
1180
1181static inline void grep_attr_lock(void)
1182{
1183        if (grep_use_locks)
1184                pthread_mutex_lock(&grep_attr_mutex);
1185}
1186
1187static inline void grep_attr_unlock(void)
1188{
1189        if (grep_use_locks)
1190                pthread_mutex_unlock(&grep_attr_mutex);
1191}
1192
1193/*
1194 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1195 */
1196pthread_mutex_t grep_read_mutex;
1197
1198#else
1199#define grep_attr_lock()
1200#define grep_attr_unlock()
1201#endif
1202
1203static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1204{
1205        xdemitconf_t *xecfg = opt->priv;
1206        if (xecfg && !xecfg->find_func) {
1207                grep_source_load_driver(gs);
1208                if (gs->driver->funcname.pattern) {
1209                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1210                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1211                } else {
1212                        xecfg = opt->priv = NULL;
1213                }
1214        }
1215
1216        if (xecfg) {
1217                char buf[1];
1218                return xecfg->find_func(bol, eol - bol, buf, 1,
1219                                        xecfg->find_func_priv) >= 0;
1220        }
1221
1222        if (bol == eol)
1223                return 0;
1224        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1225                return 1;
1226        return 0;
1227}
1228
1229static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1230                               char *bol, unsigned lno)
1231{
1232        while (bol > gs->buf) {
1233                char *eol = --bol;
1234
1235                while (bol > gs->buf && bol[-1] != '\n')
1236                        bol--;
1237                lno--;
1238
1239                if (lno <= opt->last_shown)
1240                        break;
1241
1242                if (match_funcname(opt, gs, bol, eol)) {
1243                        show_line(opt, bol, eol, gs->name, lno, '=');
1244                        break;
1245                }
1246        }
1247}
1248
1249static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1250                             char *bol, char *end, unsigned lno)
1251{
1252        unsigned cur = lno, from = 1, funcname_lno = 0;
1253        int funcname_needed = !!opt->funcname;
1254
1255        if (opt->funcbody && !match_funcname(opt, gs, bol, end))
1256                funcname_needed = 2;
1257
1258        if (opt->pre_context < lno)
1259                from = lno - opt->pre_context;
1260        if (from <= opt->last_shown)
1261                from = opt->last_shown + 1;
1262
1263        /* Rewind. */
1264        while (bol > gs->buf &&
1265               cur > (funcname_needed == 2 ? opt->last_shown + 1 : from)) {
1266                char *eol = --bol;
1267
1268                while (bol > gs->buf && bol[-1] != '\n')
1269                        bol--;
1270                cur--;
1271                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1272                        funcname_lno = cur;
1273                        funcname_needed = 0;
1274                }
1275        }
1276
1277        /* We need to look even further back to find a function signature. */
1278        if (opt->funcname && funcname_needed)
1279                show_funcname_line(opt, gs, bol, cur);
1280
1281        /* Back forward. */
1282        while (cur < lno) {
1283                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1284
1285                while (*eol != '\n')
1286                        eol++;
1287                show_line(opt, bol, eol, gs->name, cur, sign);
1288                bol = eol + 1;
1289                cur++;
1290        }
1291}
1292
1293static int should_lookahead(struct grep_opt *opt)
1294{
1295        struct grep_pat *p;
1296
1297        if (opt->extended)
1298                return 0; /* punt for too complex stuff */
1299        if (opt->invert)
1300                return 0;
1301        for (p = opt->pattern_list; p; p = p->next) {
1302                if (p->token != GREP_PATTERN)
1303                        return 0; /* punt for "header only" and stuff */
1304        }
1305        return 1;
1306}
1307
1308static int look_ahead(struct grep_opt *opt,
1309                      unsigned long *left_p,
1310                      unsigned *lno_p,
1311                      char **bol_p)
1312{
1313        unsigned lno = *lno_p;
1314        char *bol = *bol_p;
1315        struct grep_pat *p;
1316        char *sp, *last_bol;
1317        regoff_t earliest = -1;
1318
1319        for (p = opt->pattern_list; p; p = p->next) {
1320                int hit;
1321                regmatch_t m;
1322
1323                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1324                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1325                        continue;
1326                if (earliest < 0 || m.rm_so < earliest)
1327                        earliest = m.rm_so;
1328        }
1329
1330        if (earliest < 0) {
1331                *bol_p = bol + *left_p;
1332                *left_p = 0;
1333                return 1;
1334        }
1335        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1336                ; /* find the beginning of the line */
1337        last_bol = sp;
1338
1339        for (sp = bol; sp < last_bol; sp++) {
1340                if (*sp == '\n')
1341                        lno++;
1342        }
1343        *left_p -= last_bol - bol;
1344        *bol_p = last_bol;
1345        *lno_p = lno;
1346        return 0;
1347}
1348
1349static void std_output(struct grep_opt *opt, const void *buf, size_t size)
1350{
1351        fwrite(buf, size, 1, stdout);
1352}
1353
1354static int fill_textconv_grep(struct userdiff_driver *driver,
1355                              struct grep_source *gs)
1356{
1357        struct diff_filespec *df;
1358        char *buf;
1359        size_t size;
1360
1361        if (!driver || !driver->textconv)
1362                return grep_source_load(gs);
1363
1364        /*
1365         * The textconv interface is intimately tied to diff_filespecs, so we
1366         * have to pretend to be one. If we could unify the grep_source
1367         * and diff_filespec structs, this mess could just go away.
1368         */
1369        df = alloc_filespec(gs->path);
1370        switch (gs->type) {
1371        case GREP_SOURCE_SHA1:
1372                fill_filespec(df, gs->identifier, 1, 0100644);
1373                break;
1374        case GREP_SOURCE_FILE:
1375                fill_filespec(df, null_sha1, 0, 0100644);
1376                break;
1377        default:
1378                die("BUG: attempt to textconv something without a path?");
1379        }
1380
1381        /*
1382         * fill_textconv is not remotely thread-safe; it may load objects
1383         * behind the scenes, and it modifies the global diff tempfile
1384         * structure.
1385         */
1386        grep_read_lock();
1387        size = fill_textconv(driver, df, &buf);
1388        grep_read_unlock();
1389        free_filespec(df);
1390
1391        /*
1392         * The normal fill_textconv usage by the diff machinery would just keep
1393         * the textconv'd buf separate from the diff_filespec. But much of the
1394         * grep code passes around a grep_source and assumes that its "buf"
1395         * pointer is the beginning of the thing we are searching. So let's
1396         * install our textconv'd version into the grep_source, taking care not
1397         * to leak any existing buffer.
1398         */
1399        grep_source_clear_data(gs);
1400        gs->buf = buf;
1401        gs->size = size;
1402
1403        return 0;
1404}
1405
1406static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1407{
1408        char *bol;
1409        unsigned long left;
1410        unsigned lno = 1;
1411        unsigned last_hit = 0;
1412        int binary_match_only = 0;
1413        unsigned count = 0;
1414        int try_lookahead = 0;
1415        int show_function = 0;
1416        struct userdiff_driver *textconv = NULL;
1417        enum grep_context ctx = GREP_CONTEXT_HEAD;
1418        xdemitconf_t xecfg;
1419
1420        if (!opt->output)
1421                opt->output = std_output;
1422
1423        if (opt->pre_context || opt->post_context || opt->file_break ||
1424            opt->funcbody) {
1425                /* Show hunk marks, except for the first file. */
1426                if (opt->last_shown)
1427                        opt->show_hunk_mark = 1;
1428                /*
1429                 * If we're using threads then we can't easily identify
1430                 * the first file.  Always put hunk marks in that case
1431                 * and skip the very first one later in work_done().
1432                 */
1433                if (opt->output != std_output)
1434                        opt->show_hunk_mark = 1;
1435        }
1436        opt->last_shown = 0;
1437
1438        if (opt->allow_textconv) {
1439                grep_source_load_driver(gs);
1440                /*
1441                 * We might set up the shared textconv cache data here, which
1442                 * is not thread-safe.
1443                 */
1444                grep_attr_lock();
1445                textconv = userdiff_get_textconv(gs->driver);
1446                grep_attr_unlock();
1447        }
1448
1449        /*
1450         * We know the result of a textconv is text, so we only have to care
1451         * about binary handling if we are not using it.
1452         */
1453        if (!textconv) {
1454                switch (opt->binary) {
1455                case GREP_BINARY_DEFAULT:
1456                        if (grep_source_is_binary(gs))
1457                                binary_match_only = 1;
1458                        break;
1459                case GREP_BINARY_NOMATCH:
1460                        if (grep_source_is_binary(gs))
1461                                return 0; /* Assume unmatch */
1462                        break;
1463                case GREP_BINARY_TEXT:
1464                        break;
1465                default:
1466                        die("bug: unknown binary handling mode");
1467                }
1468        }
1469
1470        memset(&xecfg, 0, sizeof(xecfg));
1471        opt->priv = &xecfg;
1472
1473        try_lookahead = should_lookahead(opt);
1474
1475        if (fill_textconv_grep(textconv, gs) < 0)
1476                return 0;
1477
1478        bol = gs->buf;
1479        left = gs->size;
1480        while (left) {
1481                char *eol, ch;
1482                int hit;
1483
1484                /*
1485                 * look_ahead() skips quickly to the line that possibly
1486                 * has the next hit; don't call it if we need to do
1487                 * something more than just skipping the current line
1488                 * in response to an unmatch for the current line.  E.g.
1489                 * inside a post-context window, we will show the current
1490                 * line as a context around the previous hit when it
1491                 * doesn't hit.
1492                 */
1493                if (try_lookahead
1494                    && !(last_hit
1495                         && (show_function ||
1496                             lno <= last_hit + opt->post_context))
1497                    && look_ahead(opt, &left, &lno, &bol))
1498                        break;
1499                eol = end_of_line(bol, &left);
1500                ch = *eol;
1501                *eol = 0;
1502
1503                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1504                        ctx = GREP_CONTEXT_BODY;
1505
1506                hit = match_line(opt, bol, eol, ctx, collect_hits);
1507                *eol = ch;
1508
1509                if (collect_hits)
1510                        goto next_line;
1511
1512                /* "grep -v -e foo -e bla" should list lines
1513                 * that do not have either, so inversion should
1514                 * be done outside.
1515                 */
1516                if (opt->invert)
1517                        hit = !hit;
1518                if (opt->unmatch_name_only) {
1519                        if (hit)
1520                                return 0;
1521                        goto next_line;
1522                }
1523                if (hit) {
1524                        count++;
1525                        if (opt->status_only)
1526                                return 1;
1527                        if (opt->name_only) {
1528                                show_name(opt, gs->name);
1529                                return 1;
1530                        }
1531                        if (opt->count)
1532                                goto next_line;
1533                        if (binary_match_only) {
1534                                opt->output(opt, "Binary file ", 12);
1535                                output_color(opt, gs->name, strlen(gs->name),
1536                                             opt->color_filename);
1537                                opt->output(opt, " matches\n", 9);
1538                                return 1;
1539                        }
1540                        /* Hit at this line.  If we haven't shown the
1541                         * pre-context lines, we would need to show them.
1542                         */
1543                        if (opt->pre_context || opt->funcbody)
1544                                show_pre_context(opt, gs, bol, eol, lno);
1545                        else if (opt->funcname)
1546                                show_funcname_line(opt, gs, bol, lno);
1547                        show_line(opt, bol, eol, gs->name, lno, ':');
1548                        last_hit = lno;
1549                        if (opt->funcbody)
1550                                show_function = 1;
1551                        goto next_line;
1552                }
1553                if (show_function && match_funcname(opt, gs, bol, eol))
1554                        show_function = 0;
1555                if (show_function ||
1556                    (last_hit && lno <= last_hit + opt->post_context)) {
1557                        /* If the last hit is within the post context,
1558                         * we need to show this line.
1559                         */
1560                        show_line(opt, bol, eol, gs->name, lno, '-');
1561                }
1562
1563        next_line:
1564                bol = eol + 1;
1565                if (!left)
1566                        break;
1567                left--;
1568                lno++;
1569        }
1570
1571        if (collect_hits)
1572                return 0;
1573
1574        if (opt->status_only)
1575                return 0;
1576        if (opt->unmatch_name_only) {
1577                /* We did not see any hit, so we want to show this */
1578                show_name(opt, gs->name);
1579                return 1;
1580        }
1581
1582        xdiff_clear_find_func(&xecfg);
1583        opt->priv = NULL;
1584
1585        /* NEEDSWORK:
1586         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1587         * which feels mostly useless but sometimes useful.  Maybe
1588         * make it another option?  For now suppress them.
1589         */
1590        if (opt->count && count) {
1591                char buf[32];
1592                if (opt->pathname) {
1593                        output_color(opt, gs->name, strlen(gs->name),
1594                                     opt->color_filename);
1595                        output_sep(opt, ':');
1596                }
1597                snprintf(buf, sizeof(buf), "%u\n", count);
1598                opt->output(opt, buf, strlen(buf));
1599                return 1;
1600        }
1601        return !!last_hit;
1602}
1603
1604static void clr_hit_marker(struct grep_expr *x)
1605{
1606        /* All-hit markers are meaningful only at the very top level
1607         * OR node.
1608         */
1609        while (1) {
1610                x->hit = 0;
1611                if (x->node != GREP_NODE_OR)
1612                        return;
1613                x->u.binary.left->hit = 0;
1614                x = x->u.binary.right;
1615        }
1616}
1617
1618static int chk_hit_marker(struct grep_expr *x)
1619{
1620        /* Top level nodes have hit markers.  See if they all are hits */
1621        while (1) {
1622                if (x->node != GREP_NODE_OR)
1623                        return x->hit;
1624                if (!x->u.binary.left->hit)
1625                        return 0;
1626                x = x->u.binary.right;
1627        }
1628}
1629
1630int grep_source(struct grep_opt *opt, struct grep_source *gs)
1631{
1632        /*
1633         * we do not have to do the two-pass grep when we do not check
1634         * buffer-wide "all-match".
1635         */
1636        if (!opt->all_match)
1637                return grep_source_1(opt, gs, 0);
1638
1639        /* Otherwise the toplevel "or" terms hit a bit differently.
1640         * We first clear hit markers from them.
1641         */
1642        clr_hit_marker(opt->pattern_expression);
1643        grep_source_1(opt, gs, 1);
1644
1645        if (!chk_hit_marker(opt->pattern_expression))
1646                return 0;
1647
1648        return grep_source_1(opt, gs, 0);
1649}
1650
1651int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
1652{
1653        struct grep_source gs;
1654        int r;
1655
1656        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
1657        gs.buf = buf;
1658        gs.size = size;
1659
1660        r = grep_source(opt, &gs);
1661
1662        grep_source_clear(&gs);
1663        return r;
1664}
1665
1666void grep_source_init(struct grep_source *gs, enum grep_source_type type,
1667                      const char *name, const char *path,
1668                      const void *identifier)
1669{
1670        gs->type = type;
1671        gs->name = xstrdup_or_null(name);
1672        gs->path = xstrdup_or_null(path);
1673        gs->buf = NULL;
1674        gs->size = 0;
1675        gs->driver = NULL;
1676
1677        switch (type) {
1678        case GREP_SOURCE_FILE:
1679                gs->identifier = xstrdup(identifier);
1680                break;
1681        case GREP_SOURCE_SHA1:
1682                gs->identifier = xmalloc(20);
1683                hashcpy(gs->identifier, identifier);
1684                break;
1685        case GREP_SOURCE_BUF:
1686                gs->identifier = NULL;
1687        }
1688}
1689
1690void grep_source_clear(struct grep_source *gs)
1691{
1692        free(gs->name);
1693        gs->name = NULL;
1694        free(gs->path);
1695        gs->path = NULL;
1696        free(gs->identifier);
1697        gs->identifier = NULL;
1698        grep_source_clear_data(gs);
1699}
1700
1701void grep_source_clear_data(struct grep_source *gs)
1702{
1703        switch (gs->type) {
1704        case GREP_SOURCE_FILE:
1705        case GREP_SOURCE_SHA1:
1706                free(gs->buf);
1707                gs->buf = NULL;
1708                gs->size = 0;
1709                break;
1710        case GREP_SOURCE_BUF:
1711                /* leave user-provided buf intact */
1712                break;
1713        }
1714}
1715
1716static int grep_source_load_sha1(struct grep_source *gs)
1717{
1718        enum object_type type;
1719
1720        grep_read_lock();
1721        gs->buf = read_sha1_file(gs->identifier, &type, &gs->size);
1722        grep_read_unlock();
1723
1724        if (!gs->buf)
1725                return error(_("'%s': unable to read %s"),
1726                             gs->name,
1727                             sha1_to_hex(gs->identifier));
1728        return 0;
1729}
1730
1731static int grep_source_load_file(struct grep_source *gs)
1732{
1733        const char *filename = gs->identifier;
1734        struct stat st;
1735        char *data;
1736        size_t size;
1737        int i;
1738
1739        if (lstat(filename, &st) < 0) {
1740        err_ret:
1741                if (errno != ENOENT)
1742                        error(_("'%s': %s"), filename, strerror(errno));
1743                return -1;
1744        }
1745        if (!S_ISREG(st.st_mode))
1746                return -1;
1747        size = xsize_t(st.st_size);
1748        i = open(filename, O_RDONLY);
1749        if (i < 0)
1750                goto err_ret;
1751        data = xmalloc(size + 1);
1752        if (st.st_size != read_in_full(i, data, size)) {
1753                error(_("'%s': short read %s"), filename, strerror(errno));
1754                close(i);
1755                free(data);
1756                return -1;
1757        }
1758        close(i);
1759        data[size] = 0;
1760
1761        gs->buf = data;
1762        gs->size = size;
1763        return 0;
1764}
1765
1766static int grep_source_load(struct grep_source *gs)
1767{
1768        if (gs->buf)
1769                return 0;
1770
1771        switch (gs->type) {
1772        case GREP_SOURCE_FILE:
1773                return grep_source_load_file(gs);
1774        case GREP_SOURCE_SHA1:
1775                return grep_source_load_sha1(gs);
1776        case GREP_SOURCE_BUF:
1777                return gs->buf ? 0 : -1;
1778        }
1779        die("BUG: invalid grep_source type");
1780}
1781
1782void grep_source_load_driver(struct grep_source *gs)
1783{
1784        if (gs->driver)
1785                return;
1786
1787        grep_attr_lock();
1788        if (gs->path)
1789                gs->driver = userdiff_find_by_path(gs->path);
1790        if (!gs->driver)
1791                gs->driver = userdiff_find_by_name("default");
1792        grep_attr_unlock();
1793}
1794
1795static int grep_source_is_binary(struct grep_source *gs)
1796{
1797        grep_source_load_driver(gs);
1798        if (gs->driver->binary != -1)
1799                return gs->driver->binary;
1800
1801        if (!grep_source_load(gs))
1802                return buffer_is_binary(gs->buf, gs->size);
1803
1804        return 0;
1805}