7fcdaa0753b7a9678e5eb4d0ad8aff9728bd6ea9
   1#include "cache.h"
   2#include "config.h"
   3#include "grep.h"
   4#include "userdiff.h"
   5#include "xdiff-interface.h"
   6#include "diff.h"
   7#include "diffcore.h"
   8#include "commit.h"
   9#include "quote.h"
  10
  11static int grep_source_load(struct grep_source *gs);
  12static int grep_source_is_binary(struct grep_source *gs);
  13
  14static struct grep_opt grep_defaults;
  15
  16static void std_output(struct grep_opt *opt, const void *buf, size_t size)
  17{
  18        fwrite(buf, size, 1, stdout);
  19}
  20
  21/*
  22 * Initialize the grep_defaults template with hardcoded defaults.
  23 * We could let the compiler do this, but without C99 initializers
  24 * the code gets unwieldy and unreadable, so...
  25 */
  26void init_grep_defaults(void)
  27{
  28        struct grep_opt *opt = &grep_defaults;
  29        static int run_once;
  30
  31        if (run_once)
  32                return;
  33        run_once++;
  34
  35        memset(opt, 0, sizeof(*opt));
  36        opt->relative = 1;
  37        opt->pathname = 1;
  38        opt->regflags = REG_NEWLINE;
  39        opt->max_depth = -1;
  40        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  41        color_set(opt->color_context, "");
  42        color_set(opt->color_filename, "");
  43        color_set(opt->color_function, "");
  44        color_set(opt->color_lineno, "");
  45        color_set(opt->color_match_context, GIT_COLOR_BOLD_RED);
  46        color_set(opt->color_match_selected, GIT_COLOR_BOLD_RED);
  47        color_set(opt->color_selected, "");
  48        color_set(opt->color_sep, GIT_COLOR_CYAN);
  49        opt->color = -1;
  50        opt->output = std_output;
  51}
  52
  53static int parse_pattern_type_arg(const char *opt, const char *arg)
  54{
  55        if (!strcmp(arg, "default"))
  56                return GREP_PATTERN_TYPE_UNSPECIFIED;
  57        else if (!strcmp(arg, "basic"))
  58                return GREP_PATTERN_TYPE_BRE;
  59        else if (!strcmp(arg, "extended"))
  60                return GREP_PATTERN_TYPE_ERE;
  61        else if (!strcmp(arg, "fixed"))
  62                return GREP_PATTERN_TYPE_FIXED;
  63        else if (!strcmp(arg, "perl"))
  64                return GREP_PATTERN_TYPE_PCRE;
  65        die("bad %s argument: %s", opt, arg);
  66}
  67
  68/*
  69 * Read the configuration file once and store it in
  70 * the grep_defaults template.
  71 */
  72int grep_config(const char *var, const char *value, void *cb)
  73{
  74        struct grep_opt *opt = &grep_defaults;
  75        char *color = NULL;
  76
  77        if (userdiff_config(var, value) < 0)
  78                return -1;
  79
  80        if (!strcmp(var, "grep.extendedregexp")) {
  81                opt->extended_regexp_option = git_config_bool(var, value);
  82                return 0;
  83        }
  84
  85        if (!strcmp(var, "grep.patterntype")) {
  86                opt->pattern_type_option = parse_pattern_type_arg(var, value);
  87                return 0;
  88        }
  89
  90        if (!strcmp(var, "grep.linenumber")) {
  91                opt->linenum = git_config_bool(var, value);
  92                return 0;
  93        }
  94
  95        if (!strcmp(var, "grep.fullname")) {
  96                opt->relative = !git_config_bool(var, value);
  97                return 0;
  98        }
  99
 100        if (!strcmp(var, "color.grep"))
 101                opt->color = git_config_colorbool(var, value);
 102        else if (!strcmp(var, "color.grep.context"))
 103                color = opt->color_context;
 104        else if (!strcmp(var, "color.grep.filename"))
 105                color = opt->color_filename;
 106        else if (!strcmp(var, "color.grep.function"))
 107                color = opt->color_function;
 108        else if (!strcmp(var, "color.grep.linenumber"))
 109                color = opt->color_lineno;
 110        else if (!strcmp(var, "color.grep.matchcontext"))
 111                color = opt->color_match_context;
 112        else if (!strcmp(var, "color.grep.matchselected"))
 113                color = opt->color_match_selected;
 114        else if (!strcmp(var, "color.grep.selected"))
 115                color = opt->color_selected;
 116        else if (!strcmp(var, "color.grep.separator"))
 117                color = opt->color_sep;
 118        else if (!strcmp(var, "color.grep.match")) {
 119                int rc = 0;
 120                if (!value)
 121                        return config_error_nonbool(var);
 122                rc |= color_parse(value, opt->color_match_context);
 123                rc |= color_parse(value, opt->color_match_selected);
 124                return rc;
 125        }
 126
 127        if (color) {
 128                if (!value)
 129                        return config_error_nonbool(var);
 130                return color_parse(value, color);
 131        }
 132        return 0;
 133}
 134
 135/*
 136 * Initialize one instance of grep_opt and copy the
 137 * default values from the template we read the configuration
 138 * information in an earlier call to git_config(grep_config).
 139 */
 140void grep_init(struct grep_opt *opt, const char *prefix)
 141{
 142        struct grep_opt *def = &grep_defaults;
 143
 144        memset(opt, 0, sizeof(*opt));
 145        opt->prefix = prefix;
 146        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 147        opt->pattern_tail = &opt->pattern_list;
 148        opt->header_tail = &opt->header_list;
 149
 150        opt->color = def->color;
 151        opt->extended_regexp_option = def->extended_regexp_option;
 152        opt->pattern_type_option = def->pattern_type_option;
 153        opt->linenum = def->linenum;
 154        opt->max_depth = def->max_depth;
 155        opt->pathname = def->pathname;
 156        opt->regflags = def->regflags;
 157        opt->relative = def->relative;
 158        opt->output = def->output;
 159
 160        color_set(opt->color_context, def->color_context);
 161        color_set(opt->color_filename, def->color_filename);
 162        color_set(opt->color_function, def->color_function);
 163        color_set(opt->color_lineno, def->color_lineno);
 164        color_set(opt->color_match_context, def->color_match_context);
 165        color_set(opt->color_match_selected, def->color_match_selected);
 166        color_set(opt->color_selected, def->color_selected);
 167        color_set(opt->color_sep, def->color_sep);
 168}
 169
 170static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 171{
 172        switch (pattern_type) {
 173        case GREP_PATTERN_TYPE_UNSPECIFIED:
 174                /* fall through */
 175
 176        case GREP_PATTERN_TYPE_BRE:
 177                break;
 178
 179        case GREP_PATTERN_TYPE_ERE:
 180                opt->regflags |= REG_EXTENDED;
 181                break;
 182
 183        case GREP_PATTERN_TYPE_FIXED:
 184                opt->fixed = 1;
 185                break;
 186
 187        case GREP_PATTERN_TYPE_PCRE:
 188#ifdef USE_LIBPCRE2
 189                opt->pcre2 = 1;
 190#else
 191                /*
 192                 * It's important that pcre1 always be assigned to
 193                 * even when there's no USE_LIBPCRE* defined. We still
 194                 * call the PCRE stub function, it just dies with
 195                 * "cannot use Perl-compatible regexes[...]".
 196                 */
 197                opt->pcre1 = 1;
 198#endif
 199                break;
 200        }
 201}
 202
 203void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 204{
 205        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 206                grep_set_pattern_type_option(pattern_type, opt);
 207        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 208                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 209        else if (opt->extended_regexp_option)
 210                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 211}
 212
 213static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 214                                        const char *origin, int no,
 215                                        enum grep_pat_token t,
 216                                        enum grep_header_field field)
 217{
 218        struct grep_pat *p = xcalloc(1, sizeof(*p));
 219        p->pattern = xmemdupz(pat, patlen);
 220        p->patternlen = patlen;
 221        p->origin = origin;
 222        p->no = no;
 223        p->token = t;
 224        p->field = field;
 225        return p;
 226}
 227
 228static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 229{
 230        **tail = p;
 231        *tail = &p->next;
 232        p->next = NULL;
 233
 234        switch (p->token) {
 235        case GREP_PATTERN: /* atom */
 236        case GREP_PATTERN_HEAD:
 237        case GREP_PATTERN_BODY:
 238                for (;;) {
 239                        struct grep_pat *new_pat;
 240                        size_t len = 0;
 241                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 242                        while (++len <= p->patternlen) {
 243                                if (*(--cp) == '\n') {
 244                                        nl = cp;
 245                                        break;
 246                                }
 247                        }
 248                        if (!nl)
 249                                break;
 250                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 251                                                  p->no, p->token, p->field);
 252                        new_pat->next = p->next;
 253                        if (!p->next)
 254                                *tail = &new_pat->next;
 255                        p->next = new_pat;
 256                        *nl = '\0';
 257                        p->patternlen -= len;
 258                }
 259                break;
 260        default:
 261                break;
 262        }
 263}
 264
 265void append_header_grep_pattern(struct grep_opt *opt,
 266                                enum grep_header_field field, const char *pat)
 267{
 268        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 269                                             GREP_PATTERN_HEAD, field);
 270        if (field == GREP_HEADER_REFLOG)
 271                opt->use_reflog_filter = 1;
 272        do_append_grep_pat(&opt->header_tail, p);
 273}
 274
 275void append_grep_pattern(struct grep_opt *opt, const char *pat,
 276                         const char *origin, int no, enum grep_pat_token t)
 277{
 278        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 279}
 280
 281void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 282                     const char *origin, int no, enum grep_pat_token t)
 283{
 284        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 285        do_append_grep_pat(&opt->pattern_tail, p);
 286}
 287
 288struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 289{
 290        struct grep_pat *pat;
 291        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 292        *ret = *opt;
 293
 294        ret->pattern_list = NULL;
 295        ret->pattern_tail = &ret->pattern_list;
 296
 297        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 298        {
 299                if(pat->token == GREP_PATTERN_HEAD)
 300                        append_header_grep_pattern(ret, pat->field,
 301                                                   pat->pattern);
 302                else
 303                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 304                                        pat->origin, pat->no, pat->token);
 305        }
 306
 307        return ret;
 308}
 309
 310static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 311                const char *error)
 312{
 313        char where[1024];
 314
 315        if (p->no)
 316                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 317        else if (p->origin)
 318                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 319        else
 320                where[0] = 0;
 321
 322        die("%s'%s': %s", where, p->pattern, error);
 323}
 324
 325static int is_fixed(const char *s, size_t len)
 326{
 327        size_t i;
 328
 329        for (i = 0; i < len; i++) {
 330                if (is_regex_special(s[i]))
 331                        return 0;
 332        }
 333
 334        return 1;
 335}
 336
 337static int has_null(const char *s, size_t len)
 338{
 339        /*
 340         * regcomp cannot accept patterns with NULs so when using it
 341         * we consider any pattern containing a NUL fixed.
 342         */
 343        if (memchr(s, 0, len))
 344                return 1;
 345
 346        return 0;
 347}
 348
 349#ifdef USE_LIBPCRE1
 350static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 351{
 352        const char *error;
 353        int erroffset;
 354        int options = PCRE_MULTILINE;
 355
 356        if (opt->ignore_case) {
 357                if (has_non_ascii(p->pattern))
 358                        p->pcre1_tables = pcre_maketables();
 359                options |= PCRE_CASELESS;
 360        }
 361        if (is_utf8_locale() && has_non_ascii(p->pattern))
 362                options |= PCRE_UTF8;
 363
 364        p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 365                                      p->pcre1_tables);
 366        if (!p->pcre1_regexp)
 367                compile_regexp_failed(p, error);
 368
 369        p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error);
 370        if (!p->pcre1_extra_info && error)
 371                die("%s", error);
 372
 373#ifdef GIT_PCRE1_USE_JIT
 374        pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 375        if (p->pcre1_jit_on == 1) {
 376                p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
 377                if (!p->pcre1_jit_stack)
 378                        die("Couldn't allocate PCRE JIT stack");
 379                pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
 380        } else if (p->pcre1_jit_on != 0) {
 381                die("BUG: The pcre1_jit_on variable should be 0 or 1, not %d",
 382                    p->pcre1_jit_on);
 383        }
 384#endif
 385}
 386
 387static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 388                regmatch_t *match, int eflags)
 389{
 390        int ovector[30], ret, flags = 0;
 391
 392        if (eflags & REG_NOTBOL)
 393                flags |= PCRE_NOTBOL;
 394
 395#ifdef GIT_PCRE1_USE_JIT
 396        if (p->pcre1_jit_on) {
 397                ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 398                                    eol - line, 0, flags, ovector,
 399                                    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
 400        } else
 401#endif
 402        {
 403                ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 404                                eol - line, 0, flags, ovector,
 405                                ARRAY_SIZE(ovector));
 406        }
 407
 408        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 409                die("pcre_exec failed with error code %d", ret);
 410        if (ret > 0) {
 411                ret = 0;
 412                match->rm_so = ovector[0];
 413                match->rm_eo = ovector[1];
 414        }
 415
 416        return ret;
 417}
 418
 419static void free_pcre1_regexp(struct grep_pat *p)
 420{
 421        pcre_free(p->pcre1_regexp);
 422#ifdef GIT_PCRE1_USE_JIT
 423        if (p->pcre1_jit_on) {
 424                pcre_free_study(p->pcre1_extra_info);
 425                pcre_jit_stack_free(p->pcre1_jit_stack);
 426        } else
 427#endif
 428        {
 429                pcre_free(p->pcre1_extra_info);
 430        }
 431        pcre_free((void *)p->pcre1_tables);
 432}
 433#else /* !USE_LIBPCRE1 */
 434static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 435{
 436        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 437}
 438
 439static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 440                regmatch_t *match, int eflags)
 441{
 442        return 1;
 443}
 444
 445static void free_pcre1_regexp(struct grep_pat *p)
 446{
 447}
 448#endif /* !USE_LIBPCRE1 */
 449
 450#ifdef USE_LIBPCRE2
 451static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 452{
 453        int error;
 454        PCRE2_UCHAR errbuf[256];
 455        PCRE2_SIZE erroffset;
 456        int options = PCRE2_MULTILINE;
 457        const uint8_t *character_tables = NULL;
 458        int jitret;
 459
 460        assert(opt->pcre2);
 461
 462        p->pcre2_compile_context = NULL;
 463
 464        if (opt->ignore_case) {
 465                if (has_non_ascii(p->pattern)) {
 466                        character_tables = pcre2_maketables(NULL);
 467                        p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 468                        pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 469                }
 470                options |= PCRE2_CASELESS;
 471        }
 472        if (is_utf8_locale() && has_non_ascii(p->pattern))
 473                options |= PCRE2_UTF;
 474
 475        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
 476                                         p->patternlen, options, &error, &erroffset,
 477                                         p->pcre2_compile_context);
 478
 479        if (p->pcre2_pattern) {
 480                p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
 481                if (!p->pcre2_match_data)
 482                        die("Couldn't allocate PCRE2 match data");
 483        } else {
 484                pcre2_get_error_message(error, errbuf, sizeof(errbuf));
 485                compile_regexp_failed(p, (const char *)&errbuf);
 486        }
 487
 488        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
 489        if (p->pcre2_jit_on == 1) {
 490                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 491                if (jitret)
 492                        die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
 493                p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
 494                if (!p->pcre2_jit_stack)
 495                        die("Couldn't allocate PCRE2 JIT stack");
 496                p->pcre2_match_context = pcre2_match_context_create(NULL);
 497                if (!p->pcre2_match_context)
 498                        die("Couldn't allocate PCRE2 match context");
 499                pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
 500        } else if (p->pcre2_jit_on != 0) {
 501                die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d",
 502                    p->pcre1_jit_on);
 503        }
 504}
 505
 506static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 507                regmatch_t *match, int eflags)
 508{
 509        int ret, flags = 0;
 510        PCRE2_SIZE *ovector;
 511        PCRE2_UCHAR errbuf[256];
 512
 513        if (eflags & REG_NOTBOL)
 514                flags |= PCRE2_NOTBOL;
 515
 516        if (p->pcre2_jit_on)
 517                ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
 518                                      eol - line, 0, flags, p->pcre2_match_data,
 519                                      NULL);
 520        else
 521                ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
 522                                  eol - line, 0, flags, p->pcre2_match_data,
 523                                  NULL);
 524
 525        if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
 526                pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
 527                die("%s failed with error code %d: %s",
 528                    (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
 529                    errbuf);
 530        }
 531        if (ret > 0) {
 532                ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
 533                ret = 0;
 534                match->rm_so = (int)ovector[0];
 535                match->rm_eo = (int)ovector[1];
 536        }
 537
 538        return ret;
 539}
 540
 541static void free_pcre2_pattern(struct grep_pat *p)
 542{
 543        pcre2_compile_context_free(p->pcre2_compile_context);
 544        pcre2_code_free(p->pcre2_pattern);
 545        pcre2_match_data_free(p->pcre2_match_data);
 546        pcre2_jit_stack_free(p->pcre2_jit_stack);
 547        pcre2_match_context_free(p->pcre2_match_context);
 548}
 549#else /* !USE_LIBPCRE2 */
 550static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 551{
 552        /*
 553         * Unreachable until USE_LIBPCRE2 becomes synonymous with
 554         * USE_LIBPCRE. See the sibling comment in
 555         * grep_set_pattern_type_option().
 556         */
 557        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 558}
 559
 560static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 561                regmatch_t *match, int eflags)
 562{
 563        return 1;
 564}
 565
 566static void free_pcre2_pattern(struct grep_pat *p)
 567{
 568}
 569#endif /* !USE_LIBPCRE2 */
 570
 571static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 572{
 573        struct strbuf sb = STRBUF_INIT;
 574        int err;
 575        int regflags = opt->regflags;
 576
 577        basic_regex_quote_buf(&sb, p->pattern);
 578        if (opt->ignore_case)
 579                regflags |= REG_ICASE;
 580        err = regcomp(&p->regexp, sb.buf, regflags);
 581        if (opt->debug)
 582                fprintf(stderr, "fixed %s\n", sb.buf);
 583        strbuf_release(&sb);
 584        if (err) {
 585                char errbuf[1024];
 586                regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 587                regfree(&p->regexp);
 588                compile_regexp_failed(p, errbuf);
 589        }
 590}
 591
 592static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 593{
 594        int icase, ascii_only;
 595        int err;
 596
 597        p->word_regexp = opt->word_regexp;
 598        p->ignore_case = opt->ignore_case;
 599        icase          = opt->regflags & REG_ICASE || p->ignore_case;
 600        ascii_only     = !has_non_ascii(p->pattern);
 601
 602        /*
 603         * Even when -F (fixed) asks us to do a non-regexp search, we
 604         * may not be able to correctly case-fold when -i
 605         * (ignore-case) is asked (in which case, we'll synthesize a
 606         * regexp to match the pattern that matches regexp special
 607         * characters literally, while ignoring case differences).  On
 608         * the other hand, even without -F, if the pattern does not
 609         * have any regexp special characters and there is no need for
 610         * case-folding search, we can internally turn it into a
 611         * simple string match using kws.  p->fixed tells us if we
 612         * want to use kws.
 613         */
 614        if (opt->fixed ||
 615            has_null(p->pattern, p->patternlen) ||
 616            is_fixed(p->pattern, p->patternlen))
 617                p->fixed = !icase || ascii_only;
 618
 619        if (p->fixed) {
 620                p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
 621                kwsincr(p->kws, p->pattern, p->patternlen);
 622                kwsprep(p->kws);
 623                return;
 624        } else if (opt->fixed) {
 625                /*
 626                 * We come here when the pattern has the non-ascii
 627                 * characters we cannot case-fold, and asked to
 628                 * ignore-case.
 629                 */
 630                compile_fixed_regexp(p, opt);
 631                return;
 632        }
 633
 634        if (opt->pcre2) {
 635                compile_pcre2_pattern(p, opt);
 636                return;
 637        }
 638
 639        if (opt->pcre1) {
 640                compile_pcre1_regexp(p, opt);
 641                return;
 642        }
 643
 644        err = regcomp(&p->regexp, p->pattern, opt->regflags);
 645        if (err) {
 646                char errbuf[1024];
 647                regerror(err, &p->regexp, errbuf, 1024);
 648                regfree(&p->regexp);
 649                compile_regexp_failed(p, errbuf);
 650        }
 651}
 652
 653static struct grep_expr *compile_pattern_or(struct grep_pat **);
 654static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 655{
 656        struct grep_pat *p;
 657        struct grep_expr *x;
 658
 659        p = *list;
 660        if (!p)
 661                return NULL;
 662        switch (p->token) {
 663        case GREP_PATTERN: /* atom */
 664        case GREP_PATTERN_HEAD:
 665        case GREP_PATTERN_BODY:
 666                x = xcalloc(1, sizeof (struct grep_expr));
 667                x->node = GREP_NODE_ATOM;
 668                x->u.atom = p;
 669                *list = p->next;
 670                return x;
 671        case GREP_OPEN_PAREN:
 672                *list = p->next;
 673                x = compile_pattern_or(list);
 674                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 675                        die("unmatched parenthesis");
 676                *list = (*list)->next;
 677                return x;
 678        default:
 679                return NULL;
 680        }
 681}
 682
 683static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 684{
 685        struct grep_pat *p;
 686        struct grep_expr *x;
 687
 688        p = *list;
 689        if (!p)
 690                return NULL;
 691        switch (p->token) {
 692        case GREP_NOT:
 693                if (!p->next)
 694                        die("--not not followed by pattern expression");
 695                *list = p->next;
 696                x = xcalloc(1, sizeof (struct grep_expr));
 697                x->node = GREP_NODE_NOT;
 698                x->u.unary = compile_pattern_not(list);
 699                if (!x->u.unary)
 700                        die("--not followed by non pattern expression");
 701                return x;
 702        default:
 703                return compile_pattern_atom(list);
 704        }
 705}
 706
 707static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 708{
 709        struct grep_pat *p;
 710        struct grep_expr *x, *y, *z;
 711
 712        x = compile_pattern_not(list);
 713        p = *list;
 714        if (p && p->token == GREP_AND) {
 715                if (!p->next)
 716                        die("--and not followed by pattern expression");
 717                *list = p->next;
 718                y = compile_pattern_and(list);
 719                if (!y)
 720                        die("--and not followed by pattern expression");
 721                z = xcalloc(1, sizeof (struct grep_expr));
 722                z->node = GREP_NODE_AND;
 723                z->u.binary.left = x;
 724                z->u.binary.right = y;
 725                return z;
 726        }
 727        return x;
 728}
 729
 730static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 731{
 732        struct grep_pat *p;
 733        struct grep_expr *x, *y, *z;
 734
 735        x = compile_pattern_and(list);
 736        p = *list;
 737        if (x && p && p->token != GREP_CLOSE_PAREN) {
 738                y = compile_pattern_or(list);
 739                if (!y)
 740                        die("not a pattern expression %s", p->pattern);
 741                z = xcalloc(1, sizeof (struct grep_expr));
 742                z->node = GREP_NODE_OR;
 743                z->u.binary.left = x;
 744                z->u.binary.right = y;
 745                return z;
 746        }
 747        return x;
 748}
 749
 750static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 751{
 752        return compile_pattern_or(list);
 753}
 754
 755static void indent(int in)
 756{
 757        while (in-- > 0)
 758                fputc(' ', stderr);
 759}
 760
 761static void dump_grep_pat(struct grep_pat *p)
 762{
 763        switch (p->token) {
 764        case GREP_AND: fprintf(stderr, "*and*"); break;
 765        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 766        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 767        case GREP_NOT: fprintf(stderr, "*not*"); break;
 768        case GREP_OR: fprintf(stderr, "*or*"); break;
 769
 770        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 771        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 772        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 773        }
 774
 775        switch (p->token) {
 776        default: break;
 777        case GREP_PATTERN_HEAD:
 778                fprintf(stderr, "<head %d>", p->field); break;
 779        case GREP_PATTERN_BODY:
 780                fprintf(stderr, "<body>"); break;
 781        }
 782        switch (p->token) {
 783        default: break;
 784        case GREP_PATTERN_HEAD:
 785        case GREP_PATTERN_BODY:
 786        case GREP_PATTERN:
 787                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 788                break;
 789        }
 790        fputc('\n', stderr);
 791}
 792
 793static void dump_grep_expression_1(struct grep_expr *x, int in)
 794{
 795        indent(in);
 796        switch (x->node) {
 797        case GREP_NODE_TRUE:
 798                fprintf(stderr, "true\n");
 799                break;
 800        case GREP_NODE_ATOM:
 801                dump_grep_pat(x->u.atom);
 802                break;
 803        case GREP_NODE_NOT:
 804                fprintf(stderr, "(not\n");
 805                dump_grep_expression_1(x->u.unary, in+1);
 806                indent(in);
 807                fprintf(stderr, ")\n");
 808                break;
 809        case GREP_NODE_AND:
 810                fprintf(stderr, "(and\n");
 811                dump_grep_expression_1(x->u.binary.left, in+1);
 812                dump_grep_expression_1(x->u.binary.right, in+1);
 813                indent(in);
 814                fprintf(stderr, ")\n");
 815                break;
 816        case GREP_NODE_OR:
 817                fprintf(stderr, "(or\n");
 818                dump_grep_expression_1(x->u.binary.left, in+1);
 819                dump_grep_expression_1(x->u.binary.right, in+1);
 820                indent(in);
 821                fprintf(stderr, ")\n");
 822                break;
 823        }
 824}
 825
 826static void dump_grep_expression(struct grep_opt *opt)
 827{
 828        struct grep_expr *x = opt->pattern_expression;
 829
 830        if (opt->all_match)
 831                fprintf(stderr, "[all-match]\n");
 832        dump_grep_expression_1(x, 0);
 833        fflush(NULL);
 834}
 835
 836static struct grep_expr *grep_true_expr(void)
 837{
 838        struct grep_expr *z = xcalloc(1, sizeof(*z));
 839        z->node = GREP_NODE_TRUE;
 840        return z;
 841}
 842
 843static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 844{
 845        struct grep_expr *z = xcalloc(1, sizeof(*z));
 846        z->node = GREP_NODE_OR;
 847        z->u.binary.left = left;
 848        z->u.binary.right = right;
 849        return z;
 850}
 851
 852static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 853{
 854        struct grep_pat *p;
 855        struct grep_expr *header_expr;
 856        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 857        enum grep_header_field fld;
 858
 859        if (!opt->header_list)
 860                return NULL;
 861
 862        for (p = opt->header_list; p; p = p->next) {
 863                if (p->token != GREP_PATTERN_HEAD)
 864                        die("BUG: a non-header pattern in grep header list.");
 865                if (p->field < GREP_HEADER_FIELD_MIN ||
 866                    GREP_HEADER_FIELD_MAX <= p->field)
 867                        die("BUG: unknown header field %d", p->field);
 868                compile_regexp(p, opt);
 869        }
 870
 871        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 872                header_group[fld] = NULL;
 873
 874        for (p = opt->header_list; p; p = p->next) {
 875                struct grep_expr *h;
 876                struct grep_pat *pp = p;
 877
 878                h = compile_pattern_atom(&pp);
 879                if (!h || pp != p->next)
 880                        die("BUG: malformed header expr");
 881                if (!header_group[p->field]) {
 882                        header_group[p->field] = h;
 883                        continue;
 884                }
 885                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 886        }
 887
 888        header_expr = NULL;
 889
 890        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 891                if (!header_group[fld])
 892                        continue;
 893                if (!header_expr)
 894                        header_expr = grep_true_expr();
 895                header_expr = grep_or_expr(header_group[fld], header_expr);
 896        }
 897        return header_expr;
 898}
 899
 900static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 901{
 902        struct grep_expr *z = x;
 903
 904        while (x) {
 905                assert(x->node == GREP_NODE_OR);
 906                if (x->u.binary.right &&
 907                    x->u.binary.right->node == GREP_NODE_TRUE) {
 908                        x->u.binary.right = y;
 909                        break;
 910                }
 911                x = x->u.binary.right;
 912        }
 913        return z;
 914}
 915
 916static void compile_grep_patterns_real(struct grep_opt *opt)
 917{
 918        struct grep_pat *p;
 919        struct grep_expr *header_expr = prep_header_patterns(opt);
 920
 921        for (p = opt->pattern_list; p; p = p->next) {
 922                switch (p->token) {
 923                case GREP_PATTERN: /* atom */
 924                case GREP_PATTERN_HEAD:
 925                case GREP_PATTERN_BODY:
 926                        compile_regexp(p, opt);
 927                        break;
 928                default:
 929                        opt->extended = 1;
 930                        break;
 931                }
 932        }
 933
 934        if (opt->all_match || header_expr)
 935                opt->extended = 1;
 936        else if (!opt->extended && !opt->debug)
 937                return;
 938
 939        p = opt->pattern_list;
 940        if (p)
 941                opt->pattern_expression = compile_pattern_expr(&p);
 942        if (p)
 943                die("incomplete pattern expression: %s", p->pattern);
 944
 945        if (!header_expr)
 946                return;
 947
 948        if (!opt->pattern_expression)
 949                opt->pattern_expression = header_expr;
 950        else if (opt->all_match)
 951                opt->pattern_expression = grep_splice_or(header_expr,
 952                                                         opt->pattern_expression);
 953        else
 954                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
 955                                                       header_expr);
 956        opt->all_match = 1;
 957}
 958
 959void compile_grep_patterns(struct grep_opt *opt)
 960{
 961        compile_grep_patterns_real(opt);
 962        if (opt->debug)
 963                dump_grep_expression(opt);
 964}
 965
 966static void free_pattern_expr(struct grep_expr *x)
 967{
 968        switch (x->node) {
 969        case GREP_NODE_TRUE:
 970        case GREP_NODE_ATOM:
 971                break;
 972        case GREP_NODE_NOT:
 973                free_pattern_expr(x->u.unary);
 974                break;
 975        case GREP_NODE_AND:
 976        case GREP_NODE_OR:
 977                free_pattern_expr(x->u.binary.left);
 978                free_pattern_expr(x->u.binary.right);
 979                break;
 980        }
 981        free(x);
 982}
 983
 984void free_grep_patterns(struct grep_opt *opt)
 985{
 986        struct grep_pat *p, *n;
 987
 988        for (p = opt->pattern_list; p; p = n) {
 989                n = p->next;
 990                switch (p->token) {
 991                case GREP_PATTERN: /* atom */
 992                case GREP_PATTERN_HEAD:
 993                case GREP_PATTERN_BODY:
 994                        if (p->kws)
 995                                kwsfree(p->kws);
 996                        else if (p->pcre1_regexp)
 997                                free_pcre1_regexp(p);
 998                        else if (p->pcre2_pattern)
 999                                free_pcre2_pattern(p);
1000                        else
1001                                regfree(&p->regexp);
1002                        free(p->pattern);
1003                        break;
1004                default:
1005                        break;
1006                }
1007                free(p);
1008        }
1009
1010        if (!opt->extended)
1011                return;
1012        free_pattern_expr(opt->pattern_expression);
1013}
1014
1015static char *end_of_line(char *cp, unsigned long *left)
1016{
1017        unsigned long l = *left;
1018        while (l && *cp != '\n') {
1019                l--;
1020                cp++;
1021        }
1022        *left = l;
1023        return cp;
1024}
1025
1026static int word_char(char ch)
1027{
1028        return isalnum(ch) || ch == '_';
1029}
1030
1031static void output_color(struct grep_opt *opt, const void *data, size_t size,
1032                         const char *color)
1033{
1034        if (want_color(opt->color) && color && color[0]) {
1035                opt->output(opt, color, strlen(color));
1036                opt->output(opt, data, size);
1037                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1038        } else
1039                opt->output(opt, data, size);
1040}
1041
1042static void output_sep(struct grep_opt *opt, char sign)
1043{
1044        if (opt->null_following_name)
1045                opt->output(opt, "\0", 1);
1046        else
1047                output_color(opt, &sign, 1, opt->color_sep);
1048}
1049
1050static void show_name(struct grep_opt *opt, const char *name)
1051{
1052        output_color(opt, name, strlen(name), opt->color_filename);
1053        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
1054}
1055
1056static int fixmatch(struct grep_pat *p, char *line, char *eol,
1057                    regmatch_t *match)
1058{
1059        struct kwsmatch kwsm;
1060        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1061        if (offset == -1) {
1062                match->rm_so = match->rm_eo = -1;
1063                return REG_NOMATCH;
1064        } else {
1065                match->rm_so = offset;
1066                match->rm_eo = match->rm_so + kwsm.size[0];
1067                return 0;
1068        }
1069}
1070
1071static int patmatch(struct grep_pat *p, char *line, char *eol,
1072                    regmatch_t *match, int eflags)
1073{
1074        int hit;
1075
1076        if (p->fixed)
1077                hit = !fixmatch(p, line, eol, match);
1078        else if (p->pcre1_regexp)
1079                hit = !pcre1match(p, line, eol, match, eflags);
1080        else if (p->pcre2_pattern)
1081                hit = !pcre2match(p, line, eol, match, eflags);
1082        else
1083                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1084                                   eflags);
1085
1086        return hit;
1087}
1088
1089static int strip_timestamp(char *bol, char **eol_p)
1090{
1091        char *eol = *eol_p;
1092        int ch;
1093
1094        while (bol < --eol) {
1095                if (*eol != '>')
1096                        continue;
1097                *eol_p = ++eol;
1098                ch = *eol;
1099                *eol = '\0';
1100                return ch;
1101        }
1102        return 0;
1103}
1104
1105static struct {
1106        const char *field;
1107        size_t len;
1108} header_field[] = {
1109        { "author ", 7 },
1110        { "committer ", 10 },
1111        { "reflog ", 7 },
1112};
1113
1114static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
1115                             enum grep_context ctx,
1116                             regmatch_t *pmatch, int eflags)
1117{
1118        int hit = 0;
1119        int saved_ch = 0;
1120        const char *start = bol;
1121
1122        if ((p->token != GREP_PATTERN) &&
1123            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1124                return 0;
1125
1126        if (p->token == GREP_PATTERN_HEAD) {
1127                const char *field;
1128                size_t len;
1129                assert(p->field < ARRAY_SIZE(header_field));
1130                field = header_field[p->field].field;
1131                len = header_field[p->field].len;
1132                if (strncmp(bol, field, len))
1133                        return 0;
1134                bol += len;
1135                switch (p->field) {
1136                case GREP_HEADER_AUTHOR:
1137                case GREP_HEADER_COMMITTER:
1138                        saved_ch = strip_timestamp(bol, &eol);
1139                        break;
1140                default:
1141                        break;
1142                }
1143        }
1144
1145 again:
1146        hit = patmatch(p, bol, eol, pmatch, eflags);
1147
1148        if (hit && p->word_regexp) {
1149                if ((pmatch[0].rm_so < 0) ||
1150                    (eol - bol) < pmatch[0].rm_so ||
1151                    (pmatch[0].rm_eo < 0) ||
1152                    (eol - bol) < pmatch[0].rm_eo)
1153                        die("regexp returned nonsense");
1154
1155                /* Match beginning must be either beginning of the
1156                 * line, or at word boundary (i.e. the last char must
1157                 * not be a word char).  Similarly, match end must be
1158                 * either end of the line, or at word boundary
1159                 * (i.e. the next char must not be a word char).
1160                 */
1161                if ( ((pmatch[0].rm_so == 0) ||
1162                      !word_char(bol[pmatch[0].rm_so-1])) &&
1163                     ((pmatch[0].rm_eo == (eol-bol)) ||
1164                      !word_char(bol[pmatch[0].rm_eo])) )
1165                        ;
1166                else
1167                        hit = 0;
1168
1169                /* Words consist of at least one character. */
1170                if (pmatch->rm_so == pmatch->rm_eo)
1171                        hit = 0;
1172
1173                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1174                        /* There could be more than one match on the
1175                         * line, and the first match might not be
1176                         * strict word match.  But later ones could be!
1177                         * Forward to the next possible start, i.e. the
1178                         * next position following a non-word char.
1179                         */
1180                        bol = pmatch[0].rm_so + bol + 1;
1181                        while (word_char(bol[-1]) && bol < eol)
1182                                bol++;
1183                        eflags |= REG_NOTBOL;
1184                        if (bol < eol)
1185                                goto again;
1186                }
1187        }
1188        if (p->token == GREP_PATTERN_HEAD && saved_ch)
1189                *eol = saved_ch;
1190        if (hit) {
1191                pmatch[0].rm_so += bol - start;
1192                pmatch[0].rm_eo += bol - start;
1193        }
1194        return hit;
1195}
1196
1197static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
1198                           enum grep_context ctx, int collect_hits)
1199{
1200        int h = 0;
1201        regmatch_t match;
1202
1203        if (!x)
1204                die("Not a valid grep expression");
1205        switch (x->node) {
1206        case GREP_NODE_TRUE:
1207                h = 1;
1208                break;
1209        case GREP_NODE_ATOM:
1210                h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
1211                break;
1212        case GREP_NODE_NOT:
1213                h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
1214                break;
1215        case GREP_NODE_AND:
1216                if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
1217                        return 0;
1218                h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
1219                break;
1220        case GREP_NODE_OR:
1221                if (!collect_hits)
1222                        return (match_expr_eval(x->u.binary.left,
1223                                                bol, eol, ctx, 0) ||
1224                                match_expr_eval(x->u.binary.right,
1225                                                bol, eol, ctx, 0));
1226                h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
1227                x->u.binary.left->hit |= h;
1228                h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
1229                break;
1230        default:
1231                die("Unexpected node type (internal error) %d", x->node);
1232        }
1233        if (collect_hits)
1234                x->hit |= h;
1235        return h;
1236}
1237
1238static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1239                      enum grep_context ctx, int collect_hits)
1240{
1241        struct grep_expr *x = opt->pattern_expression;
1242        return match_expr_eval(x, bol, eol, ctx, collect_hits);
1243}
1244
1245static int match_line(struct grep_opt *opt, char *bol, char *eol,
1246                      enum grep_context ctx, int collect_hits)
1247{
1248        struct grep_pat *p;
1249        regmatch_t match;
1250
1251        if (opt->extended)
1252                return match_expr(opt, bol, eol, ctx, collect_hits);
1253
1254        /* we do not call with collect_hits without being extended */
1255        for (p = opt->pattern_list; p; p = p->next) {
1256                if (match_one_pattern(p, bol, eol, ctx, &match, 0))
1257                        return 1;
1258        }
1259        return 0;
1260}
1261
1262static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1263                              enum grep_context ctx,
1264                              regmatch_t *pmatch, int eflags)
1265{
1266        regmatch_t match;
1267
1268        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1269                return 0;
1270        if (match.rm_so < 0 || match.rm_eo < 0)
1271                return 0;
1272        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1273                if (match.rm_so > pmatch->rm_so)
1274                        return 1;
1275                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1276                        return 1;
1277        }
1278        pmatch->rm_so = match.rm_so;
1279        pmatch->rm_eo = match.rm_eo;
1280        return 1;
1281}
1282
1283static int next_match(struct grep_opt *opt, char *bol, char *eol,
1284                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1285{
1286        struct grep_pat *p;
1287        int hit = 0;
1288
1289        pmatch->rm_so = pmatch->rm_eo = -1;
1290        if (bol < eol) {
1291                for (p = opt->pattern_list; p; p = p->next) {
1292                        switch (p->token) {
1293                        case GREP_PATTERN: /* atom */
1294                        case GREP_PATTERN_HEAD:
1295                        case GREP_PATTERN_BODY:
1296                                hit |= match_next_pattern(p, bol, eol, ctx,
1297                                                          pmatch, eflags);
1298                                break;
1299                        default:
1300                                break;
1301                        }
1302                }
1303        }
1304        return hit;
1305}
1306
1307static void show_line(struct grep_opt *opt, char *bol, char *eol,
1308                      const char *name, unsigned lno, char sign)
1309{
1310        int rest = eol - bol;
1311        const char *match_color, *line_color = NULL;
1312
1313        if (opt->file_break && opt->last_shown == 0) {
1314                if (opt->show_hunk_mark)
1315                        opt->output(opt, "\n", 1);
1316        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1317                if (opt->last_shown == 0) {
1318                        if (opt->show_hunk_mark) {
1319                                output_color(opt, "--", 2, opt->color_sep);
1320                                opt->output(opt, "\n", 1);
1321                        }
1322                } else if (lno > opt->last_shown + 1) {
1323                        output_color(opt, "--", 2, opt->color_sep);
1324                        opt->output(opt, "\n", 1);
1325                }
1326        }
1327        if (opt->heading && opt->last_shown == 0) {
1328                output_color(opt, name, strlen(name), opt->color_filename);
1329                opt->output(opt, "\n", 1);
1330        }
1331        opt->last_shown = lno;
1332
1333        if (!opt->heading && opt->pathname) {
1334                output_color(opt, name, strlen(name), opt->color_filename);
1335                output_sep(opt, sign);
1336        }
1337        if (opt->linenum) {
1338                char buf[32];
1339                xsnprintf(buf, sizeof(buf), "%d", lno);
1340                output_color(opt, buf, strlen(buf), opt->color_lineno);
1341                output_sep(opt, sign);
1342        }
1343        if (opt->color) {
1344                regmatch_t match;
1345                enum grep_context ctx = GREP_CONTEXT_BODY;
1346                int ch = *eol;
1347                int eflags = 0;
1348
1349                if (sign == ':')
1350                        match_color = opt->color_match_selected;
1351                else
1352                        match_color = opt->color_match_context;
1353                if (sign == ':')
1354                        line_color = opt->color_selected;
1355                else if (sign == '-')
1356                        line_color = opt->color_context;
1357                else if (sign == '=')
1358                        line_color = opt->color_function;
1359                *eol = '\0';
1360                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1361                        if (match.rm_so == match.rm_eo)
1362                                break;
1363
1364                        output_color(opt, bol, match.rm_so, line_color);
1365                        output_color(opt, bol + match.rm_so,
1366                                     match.rm_eo - match.rm_so, match_color);
1367                        bol += match.rm_eo;
1368                        rest -= match.rm_eo;
1369                        eflags = REG_NOTBOL;
1370                }
1371                *eol = ch;
1372        }
1373        output_color(opt, bol, rest, line_color);
1374        opt->output(opt, "\n", 1);
1375}
1376
1377#ifndef NO_PTHREADS
1378int grep_use_locks;
1379
1380/*
1381 * This lock protects access to the gitattributes machinery, which is
1382 * not thread-safe.
1383 */
1384pthread_mutex_t grep_attr_mutex;
1385
1386static inline void grep_attr_lock(void)
1387{
1388        if (grep_use_locks)
1389                pthread_mutex_lock(&grep_attr_mutex);
1390}
1391
1392static inline void grep_attr_unlock(void)
1393{
1394        if (grep_use_locks)
1395                pthread_mutex_unlock(&grep_attr_mutex);
1396}
1397
1398/*
1399 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1400 */
1401pthread_mutex_t grep_read_mutex;
1402
1403#else
1404#define grep_attr_lock()
1405#define grep_attr_unlock()
1406#endif
1407
1408static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1409{
1410        xdemitconf_t *xecfg = opt->priv;
1411        if (xecfg && !xecfg->find_func) {
1412                grep_source_load_driver(gs);
1413                if (gs->driver->funcname.pattern) {
1414                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1415                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1416                } else {
1417                        xecfg = opt->priv = NULL;
1418                }
1419        }
1420
1421        if (xecfg) {
1422                char buf[1];
1423                return xecfg->find_func(bol, eol - bol, buf, 1,
1424                                        xecfg->find_func_priv) >= 0;
1425        }
1426
1427        if (bol == eol)
1428                return 0;
1429        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1430                return 1;
1431        return 0;
1432}
1433
1434static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1435                               char *bol, unsigned lno)
1436{
1437        while (bol > gs->buf) {
1438                char *eol = --bol;
1439
1440                while (bol > gs->buf && bol[-1] != '\n')
1441                        bol--;
1442                lno--;
1443
1444                if (lno <= opt->last_shown)
1445                        break;
1446
1447                if (match_funcname(opt, gs, bol, eol)) {
1448                        show_line(opt, bol, eol, gs->name, lno, '=');
1449                        break;
1450                }
1451        }
1452}
1453
1454static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1455                             char *bol, char *end, unsigned lno)
1456{
1457        unsigned cur = lno, from = 1, funcname_lno = 0;
1458        int funcname_needed = !!opt->funcname;
1459
1460        if (opt->funcbody && !match_funcname(opt, gs, bol, end))
1461                funcname_needed = 2;
1462
1463        if (opt->pre_context < lno)
1464                from = lno - opt->pre_context;
1465        if (from <= opt->last_shown)
1466                from = opt->last_shown + 1;
1467
1468        /* Rewind. */
1469        while (bol > gs->buf &&
1470               cur > (funcname_needed == 2 ? opt->last_shown + 1 : from)) {
1471                char *eol = --bol;
1472
1473                while (bol > gs->buf && bol[-1] != '\n')
1474                        bol--;
1475                cur--;
1476                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1477                        funcname_lno = cur;
1478                        funcname_needed = 0;
1479                }
1480        }
1481
1482        /* We need to look even further back to find a function signature. */
1483        if (opt->funcname && funcname_needed)
1484                show_funcname_line(opt, gs, bol, cur);
1485
1486        /* Back forward. */
1487        while (cur < lno) {
1488                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1489
1490                while (*eol != '\n')
1491                        eol++;
1492                show_line(opt, bol, eol, gs->name, cur, sign);
1493                bol = eol + 1;
1494                cur++;
1495        }
1496}
1497
1498static int should_lookahead(struct grep_opt *opt)
1499{
1500        struct grep_pat *p;
1501
1502        if (opt->extended)
1503                return 0; /* punt for too complex stuff */
1504        if (opt->invert)
1505                return 0;
1506        for (p = opt->pattern_list; p; p = p->next) {
1507                if (p->token != GREP_PATTERN)
1508                        return 0; /* punt for "header only" and stuff */
1509        }
1510        return 1;
1511}
1512
1513static int look_ahead(struct grep_opt *opt,
1514                      unsigned long *left_p,
1515                      unsigned *lno_p,
1516                      char **bol_p)
1517{
1518        unsigned lno = *lno_p;
1519        char *bol = *bol_p;
1520        struct grep_pat *p;
1521        char *sp, *last_bol;
1522        regoff_t earliest = -1;
1523
1524        for (p = opt->pattern_list; p; p = p->next) {
1525                int hit;
1526                regmatch_t m;
1527
1528                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1529                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1530                        continue;
1531                if (earliest < 0 || m.rm_so < earliest)
1532                        earliest = m.rm_so;
1533        }
1534
1535        if (earliest < 0) {
1536                *bol_p = bol + *left_p;
1537                *left_p = 0;
1538                return 1;
1539        }
1540        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1541                ; /* find the beginning of the line */
1542        last_bol = sp;
1543
1544        for (sp = bol; sp < last_bol; sp++) {
1545                if (*sp == '\n')
1546                        lno++;
1547        }
1548        *left_p -= last_bol - bol;
1549        *bol_p = last_bol;
1550        *lno_p = lno;
1551        return 0;
1552}
1553
1554static int fill_textconv_grep(struct userdiff_driver *driver,
1555                              struct grep_source *gs)
1556{
1557        struct diff_filespec *df;
1558        char *buf;
1559        size_t size;
1560
1561        if (!driver || !driver->textconv)
1562                return grep_source_load(gs);
1563
1564        /*
1565         * The textconv interface is intimately tied to diff_filespecs, so we
1566         * have to pretend to be one. If we could unify the grep_source
1567         * and diff_filespec structs, this mess could just go away.
1568         */
1569        df = alloc_filespec(gs->path);
1570        switch (gs->type) {
1571        case GREP_SOURCE_OID:
1572                fill_filespec(df, gs->identifier, 1, 0100644);
1573                break;
1574        case GREP_SOURCE_FILE:
1575                fill_filespec(df, &null_oid, 0, 0100644);
1576                break;
1577        default:
1578                die("BUG: attempt to textconv something without a path?");
1579        }
1580
1581        /*
1582         * fill_textconv is not remotely thread-safe; it may load objects
1583         * behind the scenes, and it modifies the global diff tempfile
1584         * structure.
1585         */
1586        grep_read_lock();
1587        size = fill_textconv(driver, df, &buf);
1588        grep_read_unlock();
1589        free_filespec(df);
1590
1591        /*
1592         * The normal fill_textconv usage by the diff machinery would just keep
1593         * the textconv'd buf separate from the diff_filespec. But much of the
1594         * grep code passes around a grep_source and assumes that its "buf"
1595         * pointer is the beginning of the thing we are searching. So let's
1596         * install our textconv'd version into the grep_source, taking care not
1597         * to leak any existing buffer.
1598         */
1599        grep_source_clear_data(gs);
1600        gs->buf = buf;
1601        gs->size = size;
1602
1603        return 0;
1604}
1605
1606static int is_empty_line(const char *bol, const char *eol)
1607{
1608        while (bol < eol && isspace(*bol))
1609                bol++;
1610        return bol == eol;
1611}
1612
1613static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1614{
1615        char *bol;
1616        char *peek_bol = NULL;
1617        unsigned long left;
1618        unsigned lno = 1;
1619        unsigned last_hit = 0;
1620        int binary_match_only = 0;
1621        unsigned count = 0;
1622        int try_lookahead = 0;
1623        int show_function = 0;
1624        struct userdiff_driver *textconv = NULL;
1625        enum grep_context ctx = GREP_CONTEXT_HEAD;
1626        xdemitconf_t xecfg;
1627
1628        if (!opt->output)
1629                opt->output = std_output;
1630
1631        if (opt->pre_context || opt->post_context || opt->file_break ||
1632            opt->funcbody) {
1633                /* Show hunk marks, except for the first file. */
1634                if (opt->last_shown)
1635                        opt->show_hunk_mark = 1;
1636                /*
1637                 * If we're using threads then we can't easily identify
1638                 * the first file.  Always put hunk marks in that case
1639                 * and skip the very first one later in work_done().
1640                 */
1641                if (opt->output != std_output)
1642                        opt->show_hunk_mark = 1;
1643        }
1644        opt->last_shown = 0;
1645
1646        if (opt->allow_textconv) {
1647                grep_source_load_driver(gs);
1648                /*
1649                 * We might set up the shared textconv cache data here, which
1650                 * is not thread-safe.
1651                 */
1652                grep_attr_lock();
1653                textconv = userdiff_get_textconv(gs->driver);
1654                grep_attr_unlock();
1655        }
1656
1657        /*
1658         * We know the result of a textconv is text, so we only have to care
1659         * about binary handling if we are not using it.
1660         */
1661        if (!textconv) {
1662                switch (opt->binary) {
1663                case GREP_BINARY_DEFAULT:
1664                        if (grep_source_is_binary(gs))
1665                                binary_match_only = 1;
1666                        break;
1667                case GREP_BINARY_NOMATCH:
1668                        if (grep_source_is_binary(gs))
1669                                return 0; /* Assume unmatch */
1670                        break;
1671                case GREP_BINARY_TEXT:
1672                        break;
1673                default:
1674                        die("BUG: unknown binary handling mode");
1675                }
1676        }
1677
1678        memset(&xecfg, 0, sizeof(xecfg));
1679        opt->priv = &xecfg;
1680
1681        try_lookahead = should_lookahead(opt);
1682
1683        if (fill_textconv_grep(textconv, gs) < 0)
1684                return 0;
1685
1686        bol = gs->buf;
1687        left = gs->size;
1688        while (left) {
1689                char *eol, ch;
1690                int hit;
1691
1692                /*
1693                 * look_ahead() skips quickly to the line that possibly
1694                 * has the next hit; don't call it if we need to do
1695                 * something more than just skipping the current line
1696                 * in response to an unmatch for the current line.  E.g.
1697                 * inside a post-context window, we will show the current
1698                 * line as a context around the previous hit when it
1699                 * doesn't hit.
1700                 */
1701                if (try_lookahead
1702                    && !(last_hit
1703                         && (show_function ||
1704                             lno <= last_hit + opt->post_context))
1705                    && look_ahead(opt, &left, &lno, &bol))
1706                        break;
1707                eol = end_of_line(bol, &left);
1708                ch = *eol;
1709                *eol = 0;
1710
1711                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1712                        ctx = GREP_CONTEXT_BODY;
1713
1714                hit = match_line(opt, bol, eol, ctx, collect_hits);
1715                *eol = ch;
1716
1717                if (collect_hits)
1718                        goto next_line;
1719
1720                /* "grep -v -e foo -e bla" should list lines
1721                 * that do not have either, so inversion should
1722                 * be done outside.
1723                 */
1724                if (opt->invert)
1725                        hit = !hit;
1726                if (opt->unmatch_name_only) {
1727                        if (hit)
1728                                return 0;
1729                        goto next_line;
1730                }
1731                if (hit) {
1732                        count++;
1733                        if (opt->status_only)
1734                                return 1;
1735                        if (opt->name_only) {
1736                                show_name(opt, gs->name);
1737                                return 1;
1738                        }
1739                        if (opt->count)
1740                                goto next_line;
1741                        if (binary_match_only) {
1742                                opt->output(opt, "Binary file ", 12);
1743                                output_color(opt, gs->name, strlen(gs->name),
1744                                             opt->color_filename);
1745                                opt->output(opt, " matches\n", 9);
1746                                return 1;
1747                        }
1748                        /* Hit at this line.  If we haven't shown the
1749                         * pre-context lines, we would need to show them.
1750                         */
1751                        if (opt->pre_context || opt->funcbody)
1752                                show_pre_context(opt, gs, bol, eol, lno);
1753                        else if (opt->funcname)
1754                                show_funcname_line(opt, gs, bol, lno);
1755                        show_line(opt, bol, eol, gs->name, lno, ':');
1756                        last_hit = lno;
1757                        if (opt->funcbody)
1758                                show_function = 1;
1759                        goto next_line;
1760                }
1761                if (show_function && (!peek_bol || peek_bol < bol)) {
1762                        unsigned long peek_left = left;
1763                        char *peek_eol = eol;
1764
1765                        /*
1766                         * Trailing empty lines are not interesting.
1767                         * Peek past them to see if they belong to the
1768                         * body of the current function.
1769                         */
1770                        peek_bol = bol;
1771                        while (is_empty_line(peek_bol, peek_eol)) {
1772                                peek_bol = peek_eol + 1;
1773                                peek_eol = end_of_line(peek_bol, &peek_left);
1774                        }
1775
1776                        if (match_funcname(opt, gs, peek_bol, peek_eol))
1777                                show_function = 0;
1778                }
1779                if (show_function ||
1780                    (last_hit && lno <= last_hit + opt->post_context)) {
1781                        /* If the last hit is within the post context,
1782                         * we need to show this line.
1783                         */
1784                        show_line(opt, bol, eol, gs->name, lno, '-');
1785                }
1786
1787        next_line:
1788                bol = eol + 1;
1789                if (!left)
1790                        break;
1791                left--;
1792                lno++;
1793        }
1794
1795        if (collect_hits)
1796                return 0;
1797
1798        if (opt->status_only)
1799                return 0;
1800        if (opt->unmatch_name_only) {
1801                /* We did not see any hit, so we want to show this */
1802                show_name(opt, gs->name);
1803                return 1;
1804        }
1805
1806        xdiff_clear_find_func(&xecfg);
1807        opt->priv = NULL;
1808
1809        /* NEEDSWORK:
1810         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1811         * which feels mostly useless but sometimes useful.  Maybe
1812         * make it another option?  For now suppress them.
1813         */
1814        if (opt->count && count) {
1815                char buf[32];
1816                if (opt->pathname) {
1817                        output_color(opt, gs->name, strlen(gs->name),
1818                                     opt->color_filename);
1819                        output_sep(opt, ':');
1820                }
1821                xsnprintf(buf, sizeof(buf), "%u\n", count);
1822                opt->output(opt, buf, strlen(buf));
1823                return 1;
1824        }
1825        return !!last_hit;
1826}
1827
1828static void clr_hit_marker(struct grep_expr *x)
1829{
1830        /* All-hit markers are meaningful only at the very top level
1831         * OR node.
1832         */
1833        while (1) {
1834                x->hit = 0;
1835                if (x->node != GREP_NODE_OR)
1836                        return;
1837                x->u.binary.left->hit = 0;
1838                x = x->u.binary.right;
1839        }
1840}
1841
1842static int chk_hit_marker(struct grep_expr *x)
1843{
1844        /* Top level nodes have hit markers.  See if they all are hits */
1845        while (1) {
1846                if (x->node != GREP_NODE_OR)
1847                        return x->hit;
1848                if (!x->u.binary.left->hit)
1849                        return 0;
1850                x = x->u.binary.right;
1851        }
1852}
1853
1854int grep_source(struct grep_opt *opt, struct grep_source *gs)
1855{
1856        /*
1857         * we do not have to do the two-pass grep when we do not check
1858         * buffer-wide "all-match".
1859         */
1860        if (!opt->all_match)
1861                return grep_source_1(opt, gs, 0);
1862
1863        /* Otherwise the toplevel "or" terms hit a bit differently.
1864         * We first clear hit markers from them.
1865         */
1866        clr_hit_marker(opt->pattern_expression);
1867        grep_source_1(opt, gs, 1);
1868
1869        if (!chk_hit_marker(opt->pattern_expression))
1870                return 0;
1871
1872        return grep_source_1(opt, gs, 0);
1873}
1874
1875int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
1876{
1877        struct grep_source gs;
1878        int r;
1879
1880        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
1881        gs.buf = buf;
1882        gs.size = size;
1883
1884        r = grep_source(opt, &gs);
1885
1886        grep_source_clear(&gs);
1887        return r;
1888}
1889
1890void grep_source_init(struct grep_source *gs, enum grep_source_type type,
1891                      const char *name, const char *path,
1892                      const void *identifier)
1893{
1894        gs->type = type;
1895        gs->name = xstrdup_or_null(name);
1896        gs->path = xstrdup_or_null(path);
1897        gs->buf = NULL;
1898        gs->size = 0;
1899        gs->driver = NULL;
1900
1901        switch (type) {
1902        case GREP_SOURCE_FILE:
1903                gs->identifier = xstrdup(identifier);
1904                break;
1905        case GREP_SOURCE_SUBMODULE:
1906                if (!identifier) {
1907                        gs->identifier = NULL;
1908                        break;
1909                }
1910                /*
1911                 * FALL THROUGH
1912                 * If the identifier is non-NULL (in the submodule case) it
1913                 * will be a SHA1 that needs to be copied.
1914                 */
1915        case GREP_SOURCE_OID:
1916                gs->identifier = oiddup(identifier);
1917                break;
1918        case GREP_SOURCE_BUF:
1919                gs->identifier = NULL;
1920                break;
1921        }
1922}
1923
1924void grep_source_clear(struct grep_source *gs)
1925{
1926        FREE_AND_NULL(gs->name);
1927        FREE_AND_NULL(gs->path);
1928        FREE_AND_NULL(gs->identifier);
1929        grep_source_clear_data(gs);
1930}
1931
1932void grep_source_clear_data(struct grep_source *gs)
1933{
1934        switch (gs->type) {
1935        case GREP_SOURCE_FILE:
1936        case GREP_SOURCE_OID:
1937        case GREP_SOURCE_SUBMODULE:
1938                FREE_AND_NULL(gs->buf);
1939                gs->size = 0;
1940                break;
1941        case GREP_SOURCE_BUF:
1942                /* leave user-provided buf intact */
1943                break;
1944        }
1945}
1946
1947static int grep_source_load_oid(struct grep_source *gs)
1948{
1949        enum object_type type;
1950
1951        grep_read_lock();
1952        gs->buf = read_sha1_file(gs->identifier, &type, &gs->size);
1953        grep_read_unlock();
1954
1955        if (!gs->buf)
1956                return error(_("'%s': unable to read %s"),
1957                             gs->name,
1958                             oid_to_hex(gs->identifier));
1959        return 0;
1960}
1961
1962static int grep_source_load_file(struct grep_source *gs)
1963{
1964        const char *filename = gs->identifier;
1965        struct stat st;
1966        char *data;
1967        size_t size;
1968        int i;
1969
1970        if (lstat(filename, &st) < 0) {
1971        err_ret:
1972                if (errno != ENOENT)
1973                        error_errno(_("failed to stat '%s'"), filename);
1974                return -1;
1975        }
1976        if (!S_ISREG(st.st_mode))
1977                return -1;
1978        size = xsize_t(st.st_size);
1979        i = open(filename, O_RDONLY);
1980        if (i < 0)
1981                goto err_ret;
1982        data = xmallocz(size);
1983        if (st.st_size != read_in_full(i, data, size)) {
1984                error_errno(_("'%s': short read"), filename);
1985                close(i);
1986                free(data);
1987                return -1;
1988        }
1989        close(i);
1990
1991        gs->buf = data;
1992        gs->size = size;
1993        return 0;
1994}
1995
1996static int grep_source_load(struct grep_source *gs)
1997{
1998        if (gs->buf)
1999                return 0;
2000
2001        switch (gs->type) {
2002        case GREP_SOURCE_FILE:
2003                return grep_source_load_file(gs);
2004        case GREP_SOURCE_OID:
2005                return grep_source_load_oid(gs);
2006        case GREP_SOURCE_BUF:
2007                return gs->buf ? 0 : -1;
2008        case GREP_SOURCE_SUBMODULE:
2009                break;
2010        }
2011        die("BUG: invalid grep_source type to load");
2012}
2013
2014void grep_source_load_driver(struct grep_source *gs)
2015{
2016        if (gs->driver)
2017                return;
2018
2019        grep_attr_lock();
2020        if (gs->path)
2021                gs->driver = userdiff_find_by_path(gs->path);
2022        if (!gs->driver)
2023                gs->driver = userdiff_find_by_name("default");
2024        grep_attr_unlock();
2025}
2026
2027static int grep_source_is_binary(struct grep_source *gs)
2028{
2029        grep_source_load_driver(gs);
2030        if (gs->driver->binary != -1)
2031                return gs->driver->binary;
2032
2033        if (!grep_source_load(gs))
2034                return buffer_is_binary(gs->buf, gs->size);
2035
2036        return 0;
2037}