grep.con commit grep.c: teach 'git grep --only-matching' (9d8db06)
   1#include "cache.h"
   2#include "config.h"
   3#include "grep.h"
   4#include "userdiff.h"
   5#include "xdiff-interface.h"
   6#include "diff.h"
   7#include "diffcore.h"
   8#include "commit.h"
   9#include "quote.h"
  10
  11static int grep_source_load(struct grep_source *gs);
  12static int grep_source_is_binary(struct grep_source *gs);
  13
  14static struct grep_opt grep_defaults;
  15
  16static void std_output(struct grep_opt *opt, const void *buf, size_t size)
  17{
  18        fwrite(buf, size, 1, stdout);
  19}
  20
  21static void color_set(char *dst, const char *color_bytes)
  22{
  23        xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
  24}
  25
  26/*
  27 * Initialize the grep_defaults template with hardcoded defaults.
  28 * We could let the compiler do this, but without C99 initializers
  29 * the code gets unwieldy and unreadable, so...
  30 */
  31void init_grep_defaults(void)
  32{
  33        struct grep_opt *opt = &grep_defaults;
  34        static int run_once;
  35
  36        if (run_once)
  37                return;
  38        run_once++;
  39
  40        memset(opt, 0, sizeof(*opt));
  41        opt->relative = 1;
  42        opt->pathname = 1;
  43        opt->max_depth = -1;
  44        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  45        color_set(opt->color_context, "");
  46        color_set(opt->color_filename, "");
  47        color_set(opt->color_function, "");
  48        color_set(opt->color_lineno, "");
  49        color_set(opt->color_columnno, "");
  50        color_set(opt->color_match_context, GIT_COLOR_BOLD_RED);
  51        color_set(opt->color_match_selected, GIT_COLOR_BOLD_RED);
  52        color_set(opt->color_selected, "");
  53        color_set(opt->color_sep, GIT_COLOR_CYAN);
  54        opt->only_matching = 0;
  55        opt->color = -1;
  56        opt->output = std_output;
  57}
  58
  59static int parse_pattern_type_arg(const char *opt, const char *arg)
  60{
  61        if (!strcmp(arg, "default"))
  62                return GREP_PATTERN_TYPE_UNSPECIFIED;
  63        else if (!strcmp(arg, "basic"))
  64                return GREP_PATTERN_TYPE_BRE;
  65        else if (!strcmp(arg, "extended"))
  66                return GREP_PATTERN_TYPE_ERE;
  67        else if (!strcmp(arg, "fixed"))
  68                return GREP_PATTERN_TYPE_FIXED;
  69        else if (!strcmp(arg, "perl"))
  70                return GREP_PATTERN_TYPE_PCRE;
  71        die("bad %s argument: %s", opt, arg);
  72}
  73
  74/*
  75 * Read the configuration file once and store it in
  76 * the grep_defaults template.
  77 */
  78int grep_config(const char *var, const char *value, void *cb)
  79{
  80        struct grep_opt *opt = &grep_defaults;
  81        char *color = NULL;
  82
  83        if (userdiff_config(var, value) < 0)
  84                return -1;
  85
  86        if (!strcmp(var, "grep.extendedregexp")) {
  87                opt->extended_regexp_option = git_config_bool(var, value);
  88                return 0;
  89        }
  90
  91        if (!strcmp(var, "grep.patterntype")) {
  92                opt->pattern_type_option = parse_pattern_type_arg(var, value);
  93                return 0;
  94        }
  95
  96        if (!strcmp(var, "grep.linenumber")) {
  97                opt->linenum = git_config_bool(var, value);
  98                return 0;
  99        }
 100        if (!strcmp(var, "grep.column")) {
 101                opt->columnnum = git_config_bool(var, value);
 102                return 0;
 103        }
 104
 105        if (!strcmp(var, "grep.fullname")) {
 106                opt->relative = !git_config_bool(var, value);
 107                return 0;
 108        }
 109
 110        if (!strcmp(var, "color.grep"))
 111                opt->color = git_config_colorbool(var, value);
 112        else if (!strcmp(var, "color.grep.context"))
 113                color = opt->color_context;
 114        else if (!strcmp(var, "color.grep.filename"))
 115                color = opt->color_filename;
 116        else if (!strcmp(var, "color.grep.function"))
 117                color = opt->color_function;
 118        else if (!strcmp(var, "color.grep.linenumber"))
 119                color = opt->color_lineno;
 120        else if (!strcmp(var, "color.grep.column"))
 121                color = opt->color_columnno;
 122        else if (!strcmp(var, "color.grep.matchcontext"))
 123                color = opt->color_match_context;
 124        else if (!strcmp(var, "color.grep.matchselected"))
 125                color = opt->color_match_selected;
 126        else if (!strcmp(var, "color.grep.selected"))
 127                color = opt->color_selected;
 128        else if (!strcmp(var, "color.grep.separator"))
 129                color = opt->color_sep;
 130        else if (!strcmp(var, "color.grep.match")) {
 131                int rc = 0;
 132                if (!value)
 133                        return config_error_nonbool(var);
 134                rc |= color_parse(value, opt->color_match_context);
 135                rc |= color_parse(value, opt->color_match_selected);
 136                return rc;
 137        }
 138
 139        if (color) {
 140                if (!value)
 141                        return config_error_nonbool(var);
 142                return color_parse(value, color);
 143        }
 144        return 0;
 145}
 146
 147/*
 148 * Initialize one instance of grep_opt and copy the
 149 * default values from the template we read the configuration
 150 * information in an earlier call to git_config(grep_config).
 151 */
 152void grep_init(struct grep_opt *opt, const char *prefix)
 153{
 154        struct grep_opt *def = &grep_defaults;
 155
 156        memset(opt, 0, sizeof(*opt));
 157        opt->prefix = prefix;
 158        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 159        opt->pattern_tail = &opt->pattern_list;
 160        opt->header_tail = &opt->header_list;
 161
 162        opt->only_matching = def->only_matching;
 163        opt->color = def->color;
 164        opt->extended_regexp_option = def->extended_regexp_option;
 165        opt->pattern_type_option = def->pattern_type_option;
 166        opt->linenum = def->linenum;
 167        opt->columnnum = def->columnnum;
 168        opt->max_depth = def->max_depth;
 169        opt->pathname = def->pathname;
 170        opt->relative = def->relative;
 171        opt->output = def->output;
 172
 173        color_set(opt->color_context, def->color_context);
 174        color_set(opt->color_filename, def->color_filename);
 175        color_set(opt->color_function, def->color_function);
 176        color_set(opt->color_lineno, def->color_lineno);
 177        color_set(opt->color_columnno, def->color_columnno);
 178        color_set(opt->color_match_context, def->color_match_context);
 179        color_set(opt->color_match_selected, def->color_match_selected);
 180        color_set(opt->color_selected, def->color_selected);
 181        color_set(opt->color_sep, def->color_sep);
 182}
 183
 184static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 185{
 186        /*
 187         * When committing to the pattern type by setting the relevant
 188         * fields in grep_opt it's generally not necessary to zero out
 189         * the fields we're not choosing, since they won't have been
 190         * set by anything. The extended_regexp_option field is the
 191         * only exception to this.
 192         *
 193         * This is because in the process of parsing grep.patternType
 194         * & grep.extendedRegexp we set opt->pattern_type_option and
 195         * opt->extended_regexp_option, respectively. We then
 196         * internally use opt->extended_regexp_option to see if we're
 197         * compiling an ERE. It must be unset if that's not actually
 198         * the case.
 199         */
 200        if (pattern_type != GREP_PATTERN_TYPE_ERE &&
 201            opt->extended_regexp_option)
 202                opt->extended_regexp_option = 0;
 203
 204        switch (pattern_type) {
 205        case GREP_PATTERN_TYPE_UNSPECIFIED:
 206                /* fall through */
 207
 208        case GREP_PATTERN_TYPE_BRE:
 209                break;
 210
 211        case GREP_PATTERN_TYPE_ERE:
 212                opt->extended_regexp_option = 1;
 213                break;
 214
 215        case GREP_PATTERN_TYPE_FIXED:
 216                opt->fixed = 1;
 217                break;
 218
 219        case GREP_PATTERN_TYPE_PCRE:
 220#ifdef USE_LIBPCRE2
 221                opt->pcre2 = 1;
 222#else
 223                /*
 224                 * It's important that pcre1 always be assigned to
 225                 * even when there's no USE_LIBPCRE* defined. We still
 226                 * call the PCRE stub function, it just dies with
 227                 * "cannot use Perl-compatible regexes[...]".
 228                 */
 229                opt->pcre1 = 1;
 230#endif
 231                break;
 232        }
 233}
 234
 235void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 236{
 237        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 238                grep_set_pattern_type_option(pattern_type, opt);
 239        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 240                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 241        else if (opt->extended_regexp_option)
 242                /*
 243                 * This branch *must* happen after setting from the
 244                 * opt->pattern_type_option above, we don't want
 245                 * grep.extendedRegexp to override grep.patternType!
 246                 */
 247                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 248}
 249
 250static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 251                                        const char *origin, int no,
 252                                        enum grep_pat_token t,
 253                                        enum grep_header_field field)
 254{
 255        struct grep_pat *p = xcalloc(1, sizeof(*p));
 256        p->pattern = xmemdupz(pat, patlen);
 257        p->patternlen = patlen;
 258        p->origin = origin;
 259        p->no = no;
 260        p->token = t;
 261        p->field = field;
 262        return p;
 263}
 264
 265static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 266{
 267        **tail = p;
 268        *tail = &p->next;
 269        p->next = NULL;
 270
 271        switch (p->token) {
 272        case GREP_PATTERN: /* atom */
 273        case GREP_PATTERN_HEAD:
 274        case GREP_PATTERN_BODY:
 275                for (;;) {
 276                        struct grep_pat *new_pat;
 277                        size_t len = 0;
 278                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 279                        while (++len <= p->patternlen) {
 280                                if (*(--cp) == '\n') {
 281                                        nl = cp;
 282                                        break;
 283                                }
 284                        }
 285                        if (!nl)
 286                                break;
 287                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 288                                                  p->no, p->token, p->field);
 289                        new_pat->next = p->next;
 290                        if (!p->next)
 291                                *tail = &new_pat->next;
 292                        p->next = new_pat;
 293                        *nl = '\0';
 294                        p->patternlen -= len;
 295                }
 296                break;
 297        default:
 298                break;
 299        }
 300}
 301
 302void append_header_grep_pattern(struct grep_opt *opt,
 303                                enum grep_header_field field, const char *pat)
 304{
 305        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 306                                             GREP_PATTERN_HEAD, field);
 307        if (field == GREP_HEADER_REFLOG)
 308                opt->use_reflog_filter = 1;
 309        do_append_grep_pat(&opt->header_tail, p);
 310}
 311
 312void append_grep_pattern(struct grep_opt *opt, const char *pat,
 313                         const char *origin, int no, enum grep_pat_token t)
 314{
 315        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 316}
 317
 318void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 319                     const char *origin, int no, enum grep_pat_token t)
 320{
 321        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 322        do_append_grep_pat(&opt->pattern_tail, p);
 323}
 324
 325struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 326{
 327        struct grep_pat *pat;
 328        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 329        *ret = *opt;
 330
 331        ret->pattern_list = NULL;
 332        ret->pattern_tail = &ret->pattern_list;
 333
 334        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 335        {
 336                if(pat->token == GREP_PATTERN_HEAD)
 337                        append_header_grep_pattern(ret, pat->field,
 338                                                   pat->pattern);
 339                else
 340                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 341                                        pat->origin, pat->no, pat->token);
 342        }
 343
 344        return ret;
 345}
 346
 347static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 348                const char *error)
 349{
 350        char where[1024];
 351
 352        if (p->no)
 353                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 354        else if (p->origin)
 355                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 356        else
 357                where[0] = 0;
 358
 359        die("%s'%s': %s", where, p->pattern, error);
 360}
 361
 362static int is_fixed(const char *s, size_t len)
 363{
 364        size_t i;
 365
 366        for (i = 0; i < len; i++) {
 367                if (is_regex_special(s[i]))
 368                        return 0;
 369        }
 370
 371        return 1;
 372}
 373
 374static int has_null(const char *s, size_t len)
 375{
 376        /*
 377         * regcomp cannot accept patterns with NULs so when using it
 378         * we consider any pattern containing a NUL fixed.
 379         */
 380        if (memchr(s, 0, len))
 381                return 1;
 382
 383        return 0;
 384}
 385
 386#ifdef USE_LIBPCRE1
 387static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 388{
 389        const char *error;
 390        int erroffset;
 391        int options = PCRE_MULTILINE;
 392
 393        if (opt->ignore_case) {
 394                if (has_non_ascii(p->pattern))
 395                        p->pcre1_tables = pcre_maketables();
 396                options |= PCRE_CASELESS;
 397        }
 398        if (is_utf8_locale() && has_non_ascii(p->pattern))
 399                options |= PCRE_UTF8;
 400
 401        p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 402                                      p->pcre1_tables);
 403        if (!p->pcre1_regexp)
 404                compile_regexp_failed(p, error);
 405
 406        p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
 407        if (!p->pcre1_extra_info && error)
 408                die("%s", error);
 409
 410#ifdef GIT_PCRE1_USE_JIT
 411        pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 412        if (p->pcre1_jit_on == 1) {
 413                p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
 414                if (!p->pcre1_jit_stack)
 415                        die("Couldn't allocate PCRE JIT stack");
 416                pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
 417        } else if (p->pcre1_jit_on != 0) {
 418                BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
 419                    p->pcre1_jit_on);
 420        }
 421#endif
 422}
 423
 424static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 425                regmatch_t *match, int eflags)
 426{
 427        int ovector[30], ret, flags = 0;
 428
 429        if (eflags & REG_NOTBOL)
 430                flags |= PCRE_NOTBOL;
 431
 432#ifdef GIT_PCRE1_USE_JIT
 433        if (p->pcre1_jit_on) {
 434                ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 435                                    eol - line, 0, flags, ovector,
 436                                    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
 437        } else
 438#endif
 439        {
 440                ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 441                                eol - line, 0, flags, ovector,
 442                                ARRAY_SIZE(ovector));
 443        }
 444
 445        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 446                die("pcre_exec failed with error code %d", ret);
 447        if (ret > 0) {
 448                ret = 0;
 449                match->rm_so = ovector[0];
 450                match->rm_eo = ovector[1];
 451        }
 452
 453        return ret;
 454}
 455
 456static void free_pcre1_regexp(struct grep_pat *p)
 457{
 458        pcre_free(p->pcre1_regexp);
 459#ifdef GIT_PCRE1_USE_JIT
 460        if (p->pcre1_jit_on) {
 461                pcre_free_study(p->pcre1_extra_info);
 462                pcre_jit_stack_free(p->pcre1_jit_stack);
 463        } else
 464#endif
 465        {
 466                pcre_free(p->pcre1_extra_info);
 467        }
 468        pcre_free((void *)p->pcre1_tables);
 469}
 470#else /* !USE_LIBPCRE1 */
 471static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 472{
 473        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 474}
 475
 476static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 477                regmatch_t *match, int eflags)
 478{
 479        return 1;
 480}
 481
 482static void free_pcre1_regexp(struct grep_pat *p)
 483{
 484}
 485#endif /* !USE_LIBPCRE1 */
 486
 487#ifdef USE_LIBPCRE2
 488static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 489{
 490        int error;
 491        PCRE2_UCHAR errbuf[256];
 492        PCRE2_SIZE erroffset;
 493        int options = PCRE2_MULTILINE;
 494        const uint8_t *character_tables = NULL;
 495        int jitret;
 496        int patinforet;
 497        size_t jitsizearg;
 498
 499        assert(opt->pcre2);
 500
 501        p->pcre2_compile_context = NULL;
 502
 503        if (opt->ignore_case) {
 504                if (has_non_ascii(p->pattern)) {
 505                        character_tables = pcre2_maketables(NULL);
 506                        p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 507                        pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 508                }
 509                options |= PCRE2_CASELESS;
 510        }
 511        if (is_utf8_locale() && has_non_ascii(p->pattern))
 512                options |= PCRE2_UTF;
 513
 514        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
 515                                         p->patternlen, options, &error, &erroffset,
 516                                         p->pcre2_compile_context);
 517
 518        if (p->pcre2_pattern) {
 519                p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
 520                if (!p->pcre2_match_data)
 521                        die("Couldn't allocate PCRE2 match data");
 522        } else {
 523                pcre2_get_error_message(error, errbuf, sizeof(errbuf));
 524                compile_regexp_failed(p, (const char *)&errbuf);
 525        }
 526
 527        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
 528        if (p->pcre2_jit_on == 1) {
 529                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 530                if (jitret)
 531                        die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
 532
 533                /*
 534                 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
 535                 * tells us whether the library itself supports JIT,
 536                 * but to see whether we're going to be actually using
 537                 * JIT we need to extract PCRE2_INFO_JITSIZE from the
 538                 * pattern *after* we do pcre2_jit_compile() above.
 539                 *
 540                 * This is because if the pattern contains the
 541                 * (*NO_JIT) verb (see pcre2syntax(3))
 542                 * pcre2_jit_compile() will exit early with 0. If we
 543                 * then proceed to call pcre2_jit_match() further down
 544                 * the line instead of pcre2_match() we'll either
 545                 * segfault (pre PCRE 10.31) or run into a fatal error
 546                 * (post PCRE2 10.31)
 547                 */
 548                patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
 549                if (patinforet)
 550                        BUG("pcre2_pattern_info() failed: %d", patinforet);
 551                if (jitsizearg == 0) {
 552                        p->pcre2_jit_on = 0;
 553                        return;
 554                }
 555
 556                p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
 557                if (!p->pcre2_jit_stack)
 558                        die("Couldn't allocate PCRE2 JIT stack");
 559                p->pcre2_match_context = pcre2_match_context_create(NULL);
 560                if (!p->pcre2_match_context)
 561                        die("Couldn't allocate PCRE2 match context");
 562                pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
 563        } else if (p->pcre2_jit_on != 0) {
 564                BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
 565                    p->pcre1_jit_on);
 566        }
 567}
 568
 569static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 570                regmatch_t *match, int eflags)
 571{
 572        int ret, flags = 0;
 573        PCRE2_SIZE *ovector;
 574        PCRE2_UCHAR errbuf[256];
 575
 576        if (eflags & REG_NOTBOL)
 577                flags |= PCRE2_NOTBOL;
 578
 579        if (p->pcre2_jit_on)
 580                ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
 581                                      eol - line, 0, flags, p->pcre2_match_data,
 582                                      NULL);
 583        else
 584                ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
 585                                  eol - line, 0, flags, p->pcre2_match_data,
 586                                  NULL);
 587
 588        if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
 589                pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
 590                die("%s failed with error code %d: %s",
 591                    (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
 592                    errbuf);
 593        }
 594        if (ret > 0) {
 595                ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
 596                ret = 0;
 597                match->rm_so = (int)ovector[0];
 598                match->rm_eo = (int)ovector[1];
 599        }
 600
 601        return ret;
 602}
 603
 604static void free_pcre2_pattern(struct grep_pat *p)
 605{
 606        pcre2_compile_context_free(p->pcre2_compile_context);
 607        pcre2_code_free(p->pcre2_pattern);
 608        pcre2_match_data_free(p->pcre2_match_data);
 609        pcre2_jit_stack_free(p->pcre2_jit_stack);
 610        pcre2_match_context_free(p->pcre2_match_context);
 611}
 612#else /* !USE_LIBPCRE2 */
 613static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 614{
 615        /*
 616         * Unreachable until USE_LIBPCRE2 becomes synonymous with
 617         * USE_LIBPCRE. See the sibling comment in
 618         * grep_set_pattern_type_option().
 619         */
 620        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 621}
 622
 623static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 624                regmatch_t *match, int eflags)
 625{
 626        return 1;
 627}
 628
 629static void free_pcre2_pattern(struct grep_pat *p)
 630{
 631}
 632#endif /* !USE_LIBPCRE2 */
 633
 634static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 635{
 636        struct strbuf sb = STRBUF_INIT;
 637        int err;
 638        int regflags = 0;
 639
 640        basic_regex_quote_buf(&sb, p->pattern);
 641        if (opt->ignore_case)
 642                regflags |= REG_ICASE;
 643        err = regcomp(&p->regexp, sb.buf, regflags);
 644        if (opt->debug)
 645                fprintf(stderr, "fixed %s\n", sb.buf);
 646        strbuf_release(&sb);
 647        if (err) {
 648                char errbuf[1024];
 649                regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 650                compile_regexp_failed(p, errbuf);
 651        }
 652}
 653
 654static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 655{
 656        int ascii_only;
 657        int err;
 658        int regflags = REG_NEWLINE;
 659
 660        p->word_regexp = opt->word_regexp;
 661        p->ignore_case = opt->ignore_case;
 662        ascii_only     = !has_non_ascii(p->pattern);
 663
 664        /*
 665         * Even when -F (fixed) asks us to do a non-regexp search, we
 666         * may not be able to correctly case-fold when -i
 667         * (ignore-case) is asked (in which case, we'll synthesize a
 668         * regexp to match the pattern that matches regexp special
 669         * characters literally, while ignoring case differences).  On
 670         * the other hand, even without -F, if the pattern does not
 671         * have any regexp special characters and there is no need for
 672         * case-folding search, we can internally turn it into a
 673         * simple string match using kws.  p->fixed tells us if we
 674         * want to use kws.
 675         */
 676        if (opt->fixed ||
 677            has_null(p->pattern, p->patternlen) ||
 678            is_fixed(p->pattern, p->patternlen))
 679                p->fixed = !p->ignore_case || ascii_only;
 680
 681        if (p->fixed) {
 682                p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
 683                kwsincr(p->kws, p->pattern, p->patternlen);
 684                kwsprep(p->kws);
 685                return;
 686        } else if (opt->fixed) {
 687                /*
 688                 * We come here when the pattern has the non-ascii
 689                 * characters we cannot case-fold, and asked to
 690                 * ignore-case.
 691                 */
 692                compile_fixed_regexp(p, opt);
 693                return;
 694        }
 695
 696        if (opt->pcre2) {
 697                compile_pcre2_pattern(p, opt);
 698                return;
 699        }
 700
 701        if (opt->pcre1) {
 702                compile_pcre1_regexp(p, opt);
 703                return;
 704        }
 705
 706        if (p->ignore_case)
 707                regflags |= REG_ICASE;
 708        if (opt->extended_regexp_option)
 709                regflags |= REG_EXTENDED;
 710        err = regcomp(&p->regexp, p->pattern, regflags);
 711        if (err) {
 712                char errbuf[1024];
 713                regerror(err, &p->regexp, errbuf, 1024);
 714                compile_regexp_failed(p, errbuf);
 715        }
 716}
 717
 718static struct grep_expr *compile_pattern_or(struct grep_pat **);
 719static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 720{
 721        struct grep_pat *p;
 722        struct grep_expr *x;
 723
 724        p = *list;
 725        if (!p)
 726                return NULL;
 727        switch (p->token) {
 728        case GREP_PATTERN: /* atom */
 729        case GREP_PATTERN_HEAD:
 730        case GREP_PATTERN_BODY:
 731                x = xcalloc(1, sizeof (struct grep_expr));
 732                x->node = GREP_NODE_ATOM;
 733                x->u.atom = p;
 734                *list = p->next;
 735                return x;
 736        case GREP_OPEN_PAREN:
 737                *list = p->next;
 738                x = compile_pattern_or(list);
 739                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 740                        die("unmatched parenthesis");
 741                *list = (*list)->next;
 742                return x;
 743        default:
 744                return NULL;
 745        }
 746}
 747
 748static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 749{
 750        struct grep_pat *p;
 751        struct grep_expr *x;
 752
 753        p = *list;
 754        if (!p)
 755                return NULL;
 756        switch (p->token) {
 757        case GREP_NOT:
 758                if (!p->next)
 759                        die("--not not followed by pattern expression");
 760                *list = p->next;
 761                x = xcalloc(1, sizeof (struct grep_expr));
 762                x->node = GREP_NODE_NOT;
 763                x->u.unary = compile_pattern_not(list);
 764                if (!x->u.unary)
 765                        die("--not followed by non pattern expression");
 766                return x;
 767        default:
 768                return compile_pattern_atom(list);
 769        }
 770}
 771
 772static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 773{
 774        struct grep_pat *p;
 775        struct grep_expr *x, *y, *z;
 776
 777        x = compile_pattern_not(list);
 778        p = *list;
 779        if (p && p->token == GREP_AND) {
 780                if (!p->next)
 781                        die("--and not followed by pattern expression");
 782                *list = p->next;
 783                y = compile_pattern_and(list);
 784                if (!y)
 785                        die("--and not followed by pattern expression");
 786                z = xcalloc(1, sizeof (struct grep_expr));
 787                z->node = GREP_NODE_AND;
 788                z->u.binary.left = x;
 789                z->u.binary.right = y;
 790                return z;
 791        }
 792        return x;
 793}
 794
 795static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 796{
 797        struct grep_pat *p;
 798        struct grep_expr *x, *y, *z;
 799
 800        x = compile_pattern_and(list);
 801        p = *list;
 802        if (x && p && p->token != GREP_CLOSE_PAREN) {
 803                y = compile_pattern_or(list);
 804                if (!y)
 805                        die("not a pattern expression %s", p->pattern);
 806                z = xcalloc(1, sizeof (struct grep_expr));
 807                z->node = GREP_NODE_OR;
 808                z->u.binary.left = x;
 809                z->u.binary.right = y;
 810                return z;
 811        }
 812        return x;
 813}
 814
 815static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 816{
 817        return compile_pattern_or(list);
 818}
 819
 820static void indent(int in)
 821{
 822        while (in-- > 0)
 823                fputc(' ', stderr);
 824}
 825
 826static void dump_grep_pat(struct grep_pat *p)
 827{
 828        switch (p->token) {
 829        case GREP_AND: fprintf(stderr, "*and*"); break;
 830        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 831        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 832        case GREP_NOT: fprintf(stderr, "*not*"); break;
 833        case GREP_OR: fprintf(stderr, "*or*"); break;
 834
 835        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 836        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 837        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 838        }
 839
 840        switch (p->token) {
 841        default: break;
 842        case GREP_PATTERN_HEAD:
 843                fprintf(stderr, "<head %d>", p->field); break;
 844        case GREP_PATTERN_BODY:
 845                fprintf(stderr, "<body>"); break;
 846        }
 847        switch (p->token) {
 848        default: break;
 849        case GREP_PATTERN_HEAD:
 850        case GREP_PATTERN_BODY:
 851        case GREP_PATTERN:
 852                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 853                break;
 854        }
 855        fputc('\n', stderr);
 856}
 857
 858static void dump_grep_expression_1(struct grep_expr *x, int in)
 859{
 860        indent(in);
 861        switch (x->node) {
 862        case GREP_NODE_TRUE:
 863                fprintf(stderr, "true\n");
 864                break;
 865        case GREP_NODE_ATOM:
 866                dump_grep_pat(x->u.atom);
 867                break;
 868        case GREP_NODE_NOT:
 869                fprintf(stderr, "(not\n");
 870                dump_grep_expression_1(x->u.unary, in+1);
 871                indent(in);
 872                fprintf(stderr, ")\n");
 873                break;
 874        case GREP_NODE_AND:
 875                fprintf(stderr, "(and\n");
 876                dump_grep_expression_1(x->u.binary.left, in+1);
 877                dump_grep_expression_1(x->u.binary.right, in+1);
 878                indent(in);
 879                fprintf(stderr, ")\n");
 880                break;
 881        case GREP_NODE_OR:
 882                fprintf(stderr, "(or\n");
 883                dump_grep_expression_1(x->u.binary.left, in+1);
 884                dump_grep_expression_1(x->u.binary.right, in+1);
 885                indent(in);
 886                fprintf(stderr, ")\n");
 887                break;
 888        }
 889}
 890
 891static void dump_grep_expression(struct grep_opt *opt)
 892{
 893        struct grep_expr *x = opt->pattern_expression;
 894
 895        if (opt->all_match)
 896                fprintf(stderr, "[all-match]\n");
 897        dump_grep_expression_1(x, 0);
 898        fflush(NULL);
 899}
 900
 901static struct grep_expr *grep_true_expr(void)
 902{
 903        struct grep_expr *z = xcalloc(1, sizeof(*z));
 904        z->node = GREP_NODE_TRUE;
 905        return z;
 906}
 907
 908static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 909{
 910        struct grep_expr *z = xcalloc(1, sizeof(*z));
 911        z->node = GREP_NODE_OR;
 912        z->u.binary.left = left;
 913        z->u.binary.right = right;
 914        return z;
 915}
 916
 917static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 918{
 919        struct grep_pat *p;
 920        struct grep_expr *header_expr;
 921        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 922        enum grep_header_field fld;
 923
 924        if (!opt->header_list)
 925                return NULL;
 926
 927        for (p = opt->header_list; p; p = p->next) {
 928                if (p->token != GREP_PATTERN_HEAD)
 929                        BUG("a non-header pattern in grep header list.");
 930                if (p->field < GREP_HEADER_FIELD_MIN ||
 931                    GREP_HEADER_FIELD_MAX <= p->field)
 932                        BUG("unknown header field %d", p->field);
 933                compile_regexp(p, opt);
 934        }
 935
 936        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 937                header_group[fld] = NULL;
 938
 939        for (p = opt->header_list; p; p = p->next) {
 940                struct grep_expr *h;
 941                struct grep_pat *pp = p;
 942
 943                h = compile_pattern_atom(&pp);
 944                if (!h || pp != p->next)
 945                        BUG("malformed header expr");
 946                if (!header_group[p->field]) {
 947                        header_group[p->field] = h;
 948                        continue;
 949                }
 950                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 951        }
 952
 953        header_expr = NULL;
 954
 955        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 956                if (!header_group[fld])
 957                        continue;
 958                if (!header_expr)
 959                        header_expr = grep_true_expr();
 960                header_expr = grep_or_expr(header_group[fld], header_expr);
 961        }
 962        return header_expr;
 963}
 964
 965static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 966{
 967        struct grep_expr *z = x;
 968
 969        while (x) {
 970                assert(x->node == GREP_NODE_OR);
 971                if (x->u.binary.right &&
 972                    x->u.binary.right->node == GREP_NODE_TRUE) {
 973                        x->u.binary.right = y;
 974                        break;
 975                }
 976                x = x->u.binary.right;
 977        }
 978        return z;
 979}
 980
 981static void compile_grep_patterns_real(struct grep_opt *opt)
 982{
 983        struct grep_pat *p;
 984        struct grep_expr *header_expr = prep_header_patterns(opt);
 985
 986        for (p = opt->pattern_list; p; p = p->next) {
 987                switch (p->token) {
 988                case GREP_PATTERN: /* atom */
 989                case GREP_PATTERN_HEAD:
 990                case GREP_PATTERN_BODY:
 991                        compile_regexp(p, opt);
 992                        break;
 993                default:
 994                        opt->extended = 1;
 995                        break;
 996                }
 997        }
 998
 999        if (opt->all_match || header_expr)
1000                opt->extended = 1;
1001        else if (!opt->extended && !opt->debug)
1002                return;
1003
1004        p = opt->pattern_list;
1005        if (p)
1006                opt->pattern_expression = compile_pattern_expr(&p);
1007        if (p)
1008                die("incomplete pattern expression: %s", p->pattern);
1009
1010        if (!header_expr)
1011                return;
1012
1013        if (!opt->pattern_expression)
1014                opt->pattern_expression = header_expr;
1015        else if (opt->all_match)
1016                opt->pattern_expression = grep_splice_or(header_expr,
1017                                                         opt->pattern_expression);
1018        else
1019                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
1020                                                       header_expr);
1021        opt->all_match = 1;
1022}
1023
1024void compile_grep_patterns(struct grep_opt *opt)
1025{
1026        compile_grep_patterns_real(opt);
1027        if (opt->debug)
1028                dump_grep_expression(opt);
1029}
1030
1031static void free_pattern_expr(struct grep_expr *x)
1032{
1033        switch (x->node) {
1034        case GREP_NODE_TRUE:
1035        case GREP_NODE_ATOM:
1036                break;
1037        case GREP_NODE_NOT:
1038                free_pattern_expr(x->u.unary);
1039                break;
1040        case GREP_NODE_AND:
1041        case GREP_NODE_OR:
1042                free_pattern_expr(x->u.binary.left);
1043                free_pattern_expr(x->u.binary.right);
1044                break;
1045        }
1046        free(x);
1047}
1048
1049void free_grep_patterns(struct grep_opt *opt)
1050{
1051        struct grep_pat *p, *n;
1052
1053        for (p = opt->pattern_list; p; p = n) {
1054                n = p->next;
1055                switch (p->token) {
1056                case GREP_PATTERN: /* atom */
1057                case GREP_PATTERN_HEAD:
1058                case GREP_PATTERN_BODY:
1059                        if (p->kws)
1060                                kwsfree(p->kws);
1061                        else if (p->pcre1_regexp)
1062                                free_pcre1_regexp(p);
1063                        else if (p->pcre2_pattern)
1064                                free_pcre2_pattern(p);
1065                        else
1066                                regfree(&p->regexp);
1067                        free(p->pattern);
1068                        break;
1069                default:
1070                        break;
1071                }
1072                free(p);
1073        }
1074
1075        if (!opt->extended)
1076                return;
1077        free_pattern_expr(opt->pattern_expression);
1078}
1079
1080static char *end_of_line(char *cp, unsigned long *left)
1081{
1082        unsigned long l = *left;
1083        while (l && *cp != '\n') {
1084                l--;
1085                cp++;
1086        }
1087        *left = l;
1088        return cp;
1089}
1090
1091static int word_char(char ch)
1092{
1093        return isalnum(ch) || ch == '_';
1094}
1095
1096static void output_color(struct grep_opt *opt, const void *data, size_t size,
1097                         const char *color)
1098{
1099        if (want_color(opt->color) && color && color[0]) {
1100                opt->output(opt, color, strlen(color));
1101                opt->output(opt, data, size);
1102                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1103        } else
1104                opt->output(opt, data, size);
1105}
1106
1107static void output_sep(struct grep_opt *opt, char sign)
1108{
1109        if (opt->null_following_name)
1110                opt->output(opt, "\0", 1);
1111        else
1112                output_color(opt, &sign, 1, opt->color_sep);
1113}
1114
1115static void show_name(struct grep_opt *opt, const char *name)
1116{
1117        output_color(opt, name, strlen(name), opt->color_filename);
1118        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
1119}
1120
1121static int fixmatch(struct grep_pat *p, char *line, char *eol,
1122                    regmatch_t *match)
1123{
1124        struct kwsmatch kwsm;
1125        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1126        if (offset == -1) {
1127                match->rm_so = match->rm_eo = -1;
1128                return REG_NOMATCH;
1129        } else {
1130                match->rm_so = offset;
1131                match->rm_eo = match->rm_so + kwsm.size[0];
1132                return 0;
1133        }
1134}
1135
1136static int patmatch(struct grep_pat *p, char *line, char *eol,
1137                    regmatch_t *match, int eflags)
1138{
1139        int hit;
1140
1141        if (p->fixed)
1142                hit = !fixmatch(p, line, eol, match);
1143        else if (p->pcre1_regexp)
1144                hit = !pcre1match(p, line, eol, match, eflags);
1145        else if (p->pcre2_pattern)
1146                hit = !pcre2match(p, line, eol, match, eflags);
1147        else
1148                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1149                                   eflags);
1150
1151        return hit;
1152}
1153
1154static int strip_timestamp(char *bol, char **eol_p)
1155{
1156        char *eol = *eol_p;
1157        int ch;
1158
1159        while (bol < --eol) {
1160                if (*eol != '>')
1161                        continue;
1162                *eol_p = ++eol;
1163                ch = *eol;
1164                *eol = '\0';
1165                return ch;
1166        }
1167        return 0;
1168}
1169
1170static struct {
1171        const char *field;
1172        size_t len;
1173} header_field[] = {
1174        { "author ", 7 },
1175        { "committer ", 10 },
1176        { "reflog ", 7 },
1177};
1178
1179static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
1180                             enum grep_context ctx,
1181                             regmatch_t *pmatch, int eflags)
1182{
1183        int hit = 0;
1184        int saved_ch = 0;
1185        const char *start = bol;
1186
1187        if ((p->token != GREP_PATTERN) &&
1188            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1189                return 0;
1190
1191        if (p->token == GREP_PATTERN_HEAD) {
1192                const char *field;
1193                size_t len;
1194                assert(p->field < ARRAY_SIZE(header_field));
1195                field = header_field[p->field].field;
1196                len = header_field[p->field].len;
1197                if (strncmp(bol, field, len))
1198                        return 0;
1199                bol += len;
1200                switch (p->field) {
1201                case GREP_HEADER_AUTHOR:
1202                case GREP_HEADER_COMMITTER:
1203                        saved_ch = strip_timestamp(bol, &eol);
1204                        break;
1205                default:
1206                        break;
1207                }
1208        }
1209
1210 again:
1211        hit = patmatch(p, bol, eol, pmatch, eflags);
1212
1213        if (hit && p->word_regexp) {
1214                if ((pmatch[0].rm_so < 0) ||
1215                    (eol - bol) < pmatch[0].rm_so ||
1216                    (pmatch[0].rm_eo < 0) ||
1217                    (eol - bol) < pmatch[0].rm_eo)
1218                        die("regexp returned nonsense");
1219
1220                /* Match beginning must be either beginning of the
1221                 * line, or at word boundary (i.e. the last char must
1222                 * not be a word char).  Similarly, match end must be
1223                 * either end of the line, or at word boundary
1224                 * (i.e. the next char must not be a word char).
1225                 */
1226                if ( ((pmatch[0].rm_so == 0) ||
1227                      !word_char(bol[pmatch[0].rm_so-1])) &&
1228                     ((pmatch[0].rm_eo == (eol-bol)) ||
1229                      !word_char(bol[pmatch[0].rm_eo])) )
1230                        ;
1231                else
1232                        hit = 0;
1233
1234                /* Words consist of at least one character. */
1235                if (pmatch->rm_so == pmatch->rm_eo)
1236                        hit = 0;
1237
1238                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1239                        /* There could be more than one match on the
1240                         * line, and the first match might not be
1241                         * strict word match.  But later ones could be!
1242                         * Forward to the next possible start, i.e. the
1243                         * next position following a non-word char.
1244                         */
1245                        bol = pmatch[0].rm_so + bol + 1;
1246                        while (word_char(bol[-1]) && bol < eol)
1247                                bol++;
1248                        eflags |= REG_NOTBOL;
1249                        if (bol < eol)
1250                                goto again;
1251                }
1252        }
1253        if (p->token == GREP_PATTERN_HEAD && saved_ch)
1254                *eol = saved_ch;
1255        if (hit) {
1256                pmatch[0].rm_so += bol - start;
1257                pmatch[0].rm_eo += bol - start;
1258        }
1259        return hit;
1260}
1261
1262static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol,
1263                           char *eol, enum grep_context ctx, ssize_t *col,
1264                           ssize_t *icol, int collect_hits)
1265{
1266        int h = 0;
1267
1268        if (!x)
1269                die("Not a valid grep expression");
1270        switch (x->node) {
1271        case GREP_NODE_TRUE:
1272                h = 1;
1273                break;
1274        case GREP_NODE_ATOM:
1275                {
1276                        regmatch_t tmp;
1277                        h = match_one_pattern(x->u.atom, bol, eol, ctx,
1278                                              &tmp, 0);
1279                        if (h && (*col < 0 || tmp.rm_so < *col))
1280                                *col = tmp.rm_so;
1281                }
1282                break;
1283        case GREP_NODE_NOT:
1284                /*
1285                 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1286                 */
1287                h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1288                                     0);
1289                break;
1290        case GREP_NODE_AND:
1291                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1292                                    icol, 0);
1293                if (h || opt->columnnum) {
1294                        /*
1295                         * Don't short-circuit AND when given --column, since a
1296                         * NOT earlier in the tree may turn this into an OR. In
1297                         * this case, see the below comment.
1298                         */
1299                        h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1300                                             ctx, col, icol, 0);
1301                }
1302                break;
1303        case GREP_NODE_OR:
1304                if (!(collect_hits || opt->columnnum)) {
1305                        /*
1306                         * Don't short-circuit OR when given --column (or
1307                         * collecting hits) to ensure we don't skip a later
1308                         * child that would produce an earlier match.
1309                         */
1310                        return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1311                                                ctx, col, icol, 0) ||
1312                                match_expr_eval(opt, x->u.binary.right, bol,
1313                                                eol, ctx, col, icol, 0));
1314                }
1315                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1316                                    icol, 0);
1317                if (collect_hits)
1318                        x->u.binary.left->hit |= h;
1319                h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1320                                     icol, collect_hits);
1321                break;
1322        default:
1323                die("Unexpected node type (internal error) %d", x->node);
1324        }
1325        if (collect_hits)
1326                x->hit |= h;
1327        return h;
1328}
1329
1330static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1331                      enum grep_context ctx, ssize_t *col,
1332                      ssize_t *icol, int collect_hits)
1333{
1334        struct grep_expr *x = opt->pattern_expression;
1335        return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1336}
1337
1338static int match_line(struct grep_opt *opt, char *bol, char *eol,
1339                      ssize_t *col, ssize_t *icol,
1340                      enum grep_context ctx, int collect_hits)
1341{
1342        struct grep_pat *p;
1343        int hit = 0;
1344
1345        if (opt->extended)
1346                return match_expr(opt, bol, eol, ctx, col, icol,
1347                                  collect_hits);
1348
1349        /* we do not call with collect_hits without being extended */
1350        for (p = opt->pattern_list; p; p = p->next) {
1351                regmatch_t tmp;
1352                if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1353                        hit |= 1;
1354                        if (!opt->columnnum) {
1355                                /*
1356                                 * Without --column, any single match on a line
1357                                 * is enough to know that it needs to be
1358                                 * printed. With --column, scan _all_ patterns
1359                                 * to find the earliest.
1360                                 */
1361                                break;
1362                        }
1363                        if (*col < 0 || tmp.rm_so < *col)
1364                                *col = tmp.rm_so;
1365                }
1366        }
1367        return hit;
1368}
1369
1370static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1371                              enum grep_context ctx,
1372                              regmatch_t *pmatch, int eflags)
1373{
1374        regmatch_t match;
1375
1376        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1377                return 0;
1378        if (match.rm_so < 0 || match.rm_eo < 0)
1379                return 0;
1380        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1381                if (match.rm_so > pmatch->rm_so)
1382                        return 1;
1383                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1384                        return 1;
1385        }
1386        pmatch->rm_so = match.rm_so;
1387        pmatch->rm_eo = match.rm_eo;
1388        return 1;
1389}
1390
1391static int next_match(struct grep_opt *opt, char *bol, char *eol,
1392                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1393{
1394        struct grep_pat *p;
1395        int hit = 0;
1396
1397        pmatch->rm_so = pmatch->rm_eo = -1;
1398        if (bol < eol) {
1399                for (p = opt->pattern_list; p; p = p->next) {
1400                        switch (p->token) {
1401                        case GREP_PATTERN: /* atom */
1402                        case GREP_PATTERN_HEAD:
1403                        case GREP_PATTERN_BODY:
1404                                hit |= match_next_pattern(p, bol, eol, ctx,
1405                                                          pmatch, eflags);
1406                                break;
1407                        default:
1408                                break;
1409                        }
1410                }
1411        }
1412        return hit;
1413}
1414
1415static void show_line_header(struct grep_opt *opt, const char *name,
1416                             unsigned lno, ssize_t cno, char sign)
1417{
1418        if (opt->heading && opt->last_shown == 0) {
1419                output_color(opt, name, strlen(name), opt->color_filename);
1420                opt->output(opt, "\n", 1);
1421        }
1422        opt->last_shown = lno;
1423
1424        if (!opt->heading && opt->pathname) {
1425                output_color(opt, name, strlen(name), opt->color_filename);
1426                output_sep(opt, sign);
1427        }
1428        if (opt->linenum) {
1429                char buf[32];
1430                xsnprintf(buf, sizeof(buf), "%d", lno);
1431                output_color(opt, buf, strlen(buf), opt->color_lineno);
1432                output_sep(opt, sign);
1433        }
1434        /*
1435         * Treat 'cno' as the 1-indexed offset from the start of a non-context
1436         * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1437         * being called with a context line.
1438         */
1439        if (opt->columnnum && cno) {
1440                char buf[32];
1441                xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
1442                output_color(opt, buf, strlen(buf), opt->color_columnno);
1443                output_sep(opt, sign);
1444        }
1445}
1446
1447static void show_line(struct grep_opt *opt, char *bol, char *eol,
1448                      const char *name, unsigned lno, ssize_t cno, char sign)
1449{
1450        int rest = eol - bol;
1451        const char *match_color = NULL;
1452        const char *line_color = NULL;
1453
1454        if (opt->file_break && opt->last_shown == 0) {
1455                if (opt->show_hunk_mark)
1456                        opt->output(opt, "\n", 1);
1457        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1458                if (opt->last_shown == 0) {
1459                        if (opt->show_hunk_mark) {
1460                                output_color(opt, "--", 2, opt->color_sep);
1461                                opt->output(opt, "\n", 1);
1462                        }
1463                } else if (lno > opt->last_shown + 1) {
1464                        output_color(opt, "--", 2, opt->color_sep);
1465                        opt->output(opt, "\n", 1);
1466                }
1467        }
1468        if (!opt->only_matching) {
1469                /*
1470                 * In case the line we're being called with contains more than
1471                 * one match, leave printing each header to the loop below.
1472                 */
1473                show_line_header(opt, name, lno, cno, sign);
1474        }
1475        if (opt->color || opt->only_matching) {
1476                regmatch_t match;
1477                enum grep_context ctx = GREP_CONTEXT_BODY;
1478                int ch = *eol;
1479                int eflags = 0;
1480
1481                if (opt->color) {
1482                        if (sign == ':')
1483                                match_color = opt->color_match_selected;
1484                        else
1485                                match_color = opt->color_match_context;
1486                        if (sign == ':')
1487                                line_color = opt->color_selected;
1488                        else if (sign == '-')
1489                                line_color = opt->color_context;
1490                        else if (sign == '=')
1491                                line_color = opt->color_function;
1492                }
1493                *eol = '\0';
1494                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1495                        if (match.rm_so == match.rm_eo)
1496                                break;
1497
1498                        if (opt->only_matching)
1499                                show_line_header(opt, name, lno, cno, sign);
1500                        else
1501                                output_color(opt, bol, match.rm_so, line_color);
1502                        output_color(opt, bol + match.rm_so,
1503                                     match.rm_eo - match.rm_so, match_color);
1504                        if (opt->only_matching)
1505                                opt->output(opt, "\n", 1);
1506                        bol += match.rm_eo;
1507                        cno += match.rm_eo;
1508                        rest -= match.rm_eo;
1509                        eflags = REG_NOTBOL;
1510                }
1511                *eol = ch;
1512        }
1513        if (!opt->only_matching) {
1514                output_color(opt, bol, rest, line_color);
1515                opt->output(opt, "\n", 1);
1516        }
1517}
1518
1519#ifndef NO_PTHREADS
1520int grep_use_locks;
1521
1522/*
1523 * This lock protects access to the gitattributes machinery, which is
1524 * not thread-safe.
1525 */
1526pthread_mutex_t grep_attr_mutex;
1527
1528static inline void grep_attr_lock(void)
1529{
1530        if (grep_use_locks)
1531                pthread_mutex_lock(&grep_attr_mutex);
1532}
1533
1534static inline void grep_attr_unlock(void)
1535{
1536        if (grep_use_locks)
1537                pthread_mutex_unlock(&grep_attr_mutex);
1538}
1539
1540/*
1541 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1542 */
1543pthread_mutex_t grep_read_mutex;
1544
1545#else
1546#define grep_attr_lock()
1547#define grep_attr_unlock()
1548#endif
1549
1550static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1551{
1552        xdemitconf_t *xecfg = opt->priv;
1553        if (xecfg && !xecfg->find_func) {
1554                grep_source_load_driver(gs);
1555                if (gs->driver->funcname.pattern) {
1556                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1557                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1558                } else {
1559                        xecfg = opt->priv = NULL;
1560                }
1561        }
1562
1563        if (xecfg) {
1564                char buf[1];
1565                return xecfg->find_func(bol, eol - bol, buf, 1,
1566                                        xecfg->find_func_priv) >= 0;
1567        }
1568
1569        if (bol == eol)
1570                return 0;
1571        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1572                return 1;
1573        return 0;
1574}
1575
1576static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1577                               char *bol, unsigned lno)
1578{
1579        while (bol > gs->buf) {
1580                char *eol = --bol;
1581
1582                while (bol > gs->buf && bol[-1] != '\n')
1583                        bol--;
1584                lno--;
1585
1586                if (lno <= opt->last_shown)
1587                        break;
1588
1589                if (match_funcname(opt, gs, bol, eol)) {
1590                        show_line(opt, bol, eol, gs->name, lno, 0, '=');
1591                        break;
1592                }
1593        }
1594}
1595
1596static int is_empty_line(const char *bol, const char *eol);
1597
1598static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1599                             char *bol, char *end, unsigned lno)
1600{
1601        unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1602        int funcname_needed = !!opt->funcname, comment_needed = 0;
1603
1604        if (opt->pre_context < lno)
1605                from = lno - opt->pre_context;
1606        if (from <= opt->last_shown)
1607                from = opt->last_shown + 1;
1608        orig_from = from;
1609        if (opt->funcbody) {
1610                if (match_funcname(opt, gs, bol, end))
1611                        comment_needed = 1;
1612                else
1613                        funcname_needed = 1;
1614                from = opt->last_shown + 1;
1615        }
1616
1617        /* Rewind. */
1618        while (bol > gs->buf && cur > from) {
1619                char *next_bol = bol;
1620                char *eol = --bol;
1621
1622                while (bol > gs->buf && bol[-1] != '\n')
1623                        bol--;
1624                cur--;
1625                if (comment_needed && (is_empty_line(bol, eol) ||
1626                                       match_funcname(opt, gs, bol, eol))) {
1627                        comment_needed = 0;
1628                        from = orig_from;
1629                        if (cur < from) {
1630                                cur++;
1631                                bol = next_bol;
1632                                break;
1633                        }
1634                }
1635                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1636                        funcname_lno = cur;
1637                        funcname_needed = 0;
1638                        if (opt->funcbody)
1639                                comment_needed = 1;
1640                        else
1641                                from = orig_from;
1642                }
1643        }
1644
1645        /* We need to look even further back to find a function signature. */
1646        if (opt->funcname && funcname_needed)
1647                show_funcname_line(opt, gs, bol, cur);
1648
1649        /* Back forward. */
1650        while (cur < lno) {
1651                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1652
1653                while (*eol != '\n')
1654                        eol++;
1655                show_line(opt, bol, eol, gs->name, cur, 0, sign);
1656                bol = eol + 1;
1657                cur++;
1658        }
1659}
1660
1661static int should_lookahead(struct grep_opt *opt)
1662{
1663        struct grep_pat *p;
1664
1665        if (opt->extended)
1666                return 0; /* punt for too complex stuff */
1667        if (opt->invert)
1668                return 0;
1669        for (p = opt->pattern_list; p; p = p->next) {
1670                if (p->token != GREP_PATTERN)
1671                        return 0; /* punt for "header only" and stuff */
1672        }
1673        return 1;
1674}
1675
1676static int look_ahead(struct grep_opt *opt,
1677                      unsigned long *left_p,
1678                      unsigned *lno_p,
1679                      char **bol_p)
1680{
1681        unsigned lno = *lno_p;
1682        char *bol = *bol_p;
1683        struct grep_pat *p;
1684        char *sp, *last_bol;
1685        regoff_t earliest = -1;
1686
1687        for (p = opt->pattern_list; p; p = p->next) {
1688                int hit;
1689                regmatch_t m;
1690
1691                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1692                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1693                        continue;
1694                if (earliest < 0 || m.rm_so < earliest)
1695                        earliest = m.rm_so;
1696        }
1697
1698        if (earliest < 0) {
1699                *bol_p = bol + *left_p;
1700                *left_p = 0;
1701                return 1;
1702        }
1703        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1704                ; /* find the beginning of the line */
1705        last_bol = sp;
1706
1707        for (sp = bol; sp < last_bol; sp++) {
1708                if (*sp == '\n')
1709                        lno++;
1710        }
1711        *left_p -= last_bol - bol;
1712        *bol_p = last_bol;
1713        *lno_p = lno;
1714        return 0;
1715}
1716
1717static int fill_textconv_grep(struct userdiff_driver *driver,
1718                              struct grep_source *gs)
1719{
1720        struct diff_filespec *df;
1721        char *buf;
1722        size_t size;
1723
1724        if (!driver || !driver->textconv)
1725                return grep_source_load(gs);
1726
1727        /*
1728         * The textconv interface is intimately tied to diff_filespecs, so we
1729         * have to pretend to be one. If we could unify the grep_source
1730         * and diff_filespec structs, this mess could just go away.
1731         */
1732        df = alloc_filespec(gs->path);
1733        switch (gs->type) {
1734        case GREP_SOURCE_OID:
1735                fill_filespec(df, gs->identifier, 1, 0100644);
1736                break;
1737        case GREP_SOURCE_FILE:
1738                fill_filespec(df, &null_oid, 0, 0100644);
1739                break;
1740        default:
1741                BUG("attempt to textconv something without a path?");
1742        }
1743
1744        /*
1745         * fill_textconv is not remotely thread-safe; it may load objects
1746         * behind the scenes, and it modifies the global diff tempfile
1747         * structure.
1748         */
1749        grep_read_lock();
1750        size = fill_textconv(driver, df, &buf);
1751        grep_read_unlock();
1752        free_filespec(df);
1753
1754        /*
1755         * The normal fill_textconv usage by the diff machinery would just keep
1756         * the textconv'd buf separate from the diff_filespec. But much of the
1757         * grep code passes around a grep_source and assumes that its "buf"
1758         * pointer is the beginning of the thing we are searching. So let's
1759         * install our textconv'd version into the grep_source, taking care not
1760         * to leak any existing buffer.
1761         */
1762        grep_source_clear_data(gs);
1763        gs->buf = buf;
1764        gs->size = size;
1765
1766        return 0;
1767}
1768
1769static int is_empty_line(const char *bol, const char *eol)
1770{
1771        while (bol < eol && isspace(*bol))
1772                bol++;
1773        return bol == eol;
1774}
1775
1776static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1777{
1778        char *bol;
1779        char *peek_bol = NULL;
1780        unsigned long left;
1781        unsigned lno = 1;
1782        unsigned last_hit = 0;
1783        int binary_match_only = 0;
1784        unsigned count = 0;
1785        int try_lookahead = 0;
1786        int show_function = 0;
1787        struct userdiff_driver *textconv = NULL;
1788        enum grep_context ctx = GREP_CONTEXT_HEAD;
1789        xdemitconf_t xecfg;
1790
1791        if (!opt->output)
1792                opt->output = std_output;
1793
1794        if (opt->pre_context || opt->post_context || opt->file_break ||
1795            opt->funcbody) {
1796                /* Show hunk marks, except for the first file. */
1797                if (opt->last_shown)
1798                        opt->show_hunk_mark = 1;
1799                /*
1800                 * If we're using threads then we can't easily identify
1801                 * the first file.  Always put hunk marks in that case
1802                 * and skip the very first one later in work_done().
1803                 */
1804                if (opt->output != std_output)
1805                        opt->show_hunk_mark = 1;
1806        }
1807        opt->last_shown = 0;
1808
1809        if (opt->allow_textconv) {
1810                grep_source_load_driver(gs);
1811                /*
1812                 * We might set up the shared textconv cache data here, which
1813                 * is not thread-safe.
1814                 */
1815                grep_attr_lock();
1816                textconv = userdiff_get_textconv(gs->driver);
1817                grep_attr_unlock();
1818        }
1819
1820        /*
1821         * We know the result of a textconv is text, so we only have to care
1822         * about binary handling if we are not using it.
1823         */
1824        if (!textconv) {
1825                switch (opt->binary) {
1826                case GREP_BINARY_DEFAULT:
1827                        if (grep_source_is_binary(gs))
1828                                binary_match_only = 1;
1829                        break;
1830                case GREP_BINARY_NOMATCH:
1831                        if (grep_source_is_binary(gs))
1832                                return 0; /* Assume unmatch */
1833                        break;
1834                case GREP_BINARY_TEXT:
1835                        break;
1836                default:
1837                        BUG("unknown binary handling mode");
1838                }
1839        }
1840
1841        memset(&xecfg, 0, sizeof(xecfg));
1842        opt->priv = &xecfg;
1843
1844        try_lookahead = should_lookahead(opt);
1845
1846        if (fill_textconv_grep(textconv, gs) < 0)
1847                return 0;
1848
1849        bol = gs->buf;
1850        left = gs->size;
1851        while (left) {
1852                char *eol, ch;
1853                int hit;
1854                ssize_t cno;
1855                ssize_t col = -1, icol = -1;
1856
1857                /*
1858                 * look_ahead() skips quickly to the line that possibly
1859                 * has the next hit; don't call it if we need to do
1860                 * something more than just skipping the current line
1861                 * in response to an unmatch for the current line.  E.g.
1862                 * inside a post-context window, we will show the current
1863                 * line as a context around the previous hit when it
1864                 * doesn't hit.
1865                 */
1866                if (try_lookahead
1867                    && !(last_hit
1868                         && (show_function ||
1869                             lno <= last_hit + opt->post_context))
1870                    && look_ahead(opt, &left, &lno, &bol))
1871                        break;
1872                eol = end_of_line(bol, &left);
1873                ch = *eol;
1874                *eol = 0;
1875
1876                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1877                        ctx = GREP_CONTEXT_BODY;
1878
1879                hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1880                *eol = ch;
1881
1882                if (collect_hits)
1883                        goto next_line;
1884
1885                /* "grep -v -e foo -e bla" should list lines
1886                 * that do not have either, so inversion should
1887                 * be done outside.
1888                 */
1889                if (opt->invert)
1890                        hit = !hit;
1891                if (opt->unmatch_name_only) {
1892                        if (hit)
1893                                return 0;
1894                        goto next_line;
1895                }
1896                if (hit) {
1897                        count++;
1898                        if (opt->status_only)
1899                                return 1;
1900                        if (opt->name_only) {
1901                                show_name(opt, gs->name);
1902                                return 1;
1903                        }
1904                        if (opt->count)
1905                                goto next_line;
1906                        if (binary_match_only) {
1907                                opt->output(opt, "Binary file ", 12);
1908                                output_color(opt, gs->name, strlen(gs->name),
1909                                             opt->color_filename);
1910                                opt->output(opt, " matches\n", 9);
1911                                return 1;
1912                        }
1913                        /* Hit at this line.  If we haven't shown the
1914                         * pre-context lines, we would need to show them.
1915                         */
1916                        if (opt->pre_context || opt->funcbody)
1917                                show_pre_context(opt, gs, bol, eol, lno);
1918                        else if (opt->funcname)
1919                                show_funcname_line(opt, gs, bol, lno);
1920                        cno = opt->invert ? icol : col;
1921                        if (cno < 0) {
1922                                /*
1923                                 * A negative cno indicates that there was no
1924                                 * match on the line. We are thus inverted and
1925                                 * being asked to show all lines that _don't_
1926                                 * match a given expression. Therefore, set cno
1927                                 * to 0 to suggest the whole line matches.
1928                                 */
1929                                cno = 0;
1930                        }
1931                        show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
1932                        last_hit = lno;
1933                        if (opt->funcbody)
1934                                show_function = 1;
1935                        goto next_line;
1936                }
1937                if (show_function && (!peek_bol || peek_bol < bol)) {
1938                        unsigned long peek_left = left;
1939                        char *peek_eol = eol;
1940
1941                        /*
1942                         * Trailing empty lines are not interesting.
1943                         * Peek past them to see if they belong to the
1944                         * body of the current function.
1945                         */
1946                        peek_bol = bol;
1947                        while (is_empty_line(peek_bol, peek_eol)) {
1948                                peek_bol = peek_eol + 1;
1949                                peek_eol = end_of_line(peek_bol, &peek_left);
1950                        }
1951
1952                        if (match_funcname(opt, gs, peek_bol, peek_eol))
1953                                show_function = 0;
1954                }
1955                if (show_function ||
1956                    (last_hit && lno <= last_hit + opt->post_context)) {
1957                        /* If the last hit is within the post context,
1958                         * we need to show this line.
1959                         */
1960                        show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
1961                }
1962
1963        next_line:
1964                bol = eol + 1;
1965                if (!left)
1966                        break;
1967                left--;
1968                lno++;
1969        }
1970
1971        if (collect_hits)
1972                return 0;
1973
1974        if (opt->status_only)
1975                return opt->unmatch_name_only;
1976        if (opt->unmatch_name_only) {
1977                /* We did not see any hit, so we want to show this */
1978                show_name(opt, gs->name);
1979                return 1;
1980        }
1981
1982        xdiff_clear_find_func(&xecfg);
1983        opt->priv = NULL;
1984
1985        /* NEEDSWORK:
1986         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1987         * which feels mostly useless but sometimes useful.  Maybe
1988         * make it another option?  For now suppress them.
1989         */
1990        if (opt->count && count) {
1991                char buf[32];
1992                if (opt->pathname) {
1993                        output_color(opt, gs->name, strlen(gs->name),
1994                                     opt->color_filename);
1995                        output_sep(opt, ':');
1996                }
1997                xsnprintf(buf, sizeof(buf), "%u\n", count);
1998                opt->output(opt, buf, strlen(buf));
1999                return 1;
2000        }
2001        return !!last_hit;
2002}
2003
2004static void clr_hit_marker(struct grep_expr *x)
2005{
2006        /* All-hit markers are meaningful only at the very top level
2007         * OR node.
2008         */
2009        while (1) {
2010                x->hit = 0;
2011                if (x->node != GREP_NODE_OR)
2012                        return;
2013                x->u.binary.left->hit = 0;
2014                x = x->u.binary.right;
2015        }
2016}
2017
2018static int chk_hit_marker(struct grep_expr *x)
2019{
2020        /* Top level nodes have hit markers.  See if they all are hits */
2021        while (1) {
2022                if (x->node != GREP_NODE_OR)
2023                        return x->hit;
2024                if (!x->u.binary.left->hit)
2025                        return 0;
2026                x = x->u.binary.right;
2027        }
2028}
2029
2030int grep_source(struct grep_opt *opt, struct grep_source *gs)
2031{
2032        /*
2033         * we do not have to do the two-pass grep when we do not check
2034         * buffer-wide "all-match".
2035         */
2036        if (!opt->all_match)
2037                return grep_source_1(opt, gs, 0);
2038
2039        /* Otherwise the toplevel "or" terms hit a bit differently.
2040         * We first clear hit markers from them.
2041         */
2042        clr_hit_marker(opt->pattern_expression);
2043        grep_source_1(opt, gs, 1);
2044
2045        if (!chk_hit_marker(opt->pattern_expression))
2046                return 0;
2047
2048        return grep_source_1(opt, gs, 0);
2049}
2050
2051int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
2052{
2053        struct grep_source gs;
2054        int r;
2055
2056        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
2057        gs.buf = buf;
2058        gs.size = size;
2059
2060        r = grep_source(opt, &gs);
2061
2062        grep_source_clear(&gs);
2063        return r;
2064}
2065
2066void grep_source_init(struct grep_source *gs, enum grep_source_type type,
2067                      const char *name, const char *path,
2068                      const void *identifier)
2069{
2070        gs->type = type;
2071        gs->name = xstrdup_or_null(name);
2072        gs->path = xstrdup_or_null(path);
2073        gs->buf = NULL;
2074        gs->size = 0;
2075        gs->driver = NULL;
2076
2077        switch (type) {
2078        case GREP_SOURCE_FILE:
2079                gs->identifier = xstrdup(identifier);
2080                break;
2081        case GREP_SOURCE_OID:
2082                gs->identifier = oiddup(identifier);
2083                break;
2084        case GREP_SOURCE_BUF:
2085                gs->identifier = NULL;
2086                break;
2087        }
2088}
2089
2090void grep_source_clear(struct grep_source *gs)
2091{
2092        FREE_AND_NULL(gs->name);
2093        FREE_AND_NULL(gs->path);
2094        FREE_AND_NULL(gs->identifier);
2095        grep_source_clear_data(gs);
2096}
2097
2098void grep_source_clear_data(struct grep_source *gs)
2099{
2100        switch (gs->type) {
2101        case GREP_SOURCE_FILE:
2102        case GREP_SOURCE_OID:
2103                FREE_AND_NULL(gs->buf);
2104                gs->size = 0;
2105                break;
2106        case GREP_SOURCE_BUF:
2107                /* leave user-provided buf intact */
2108                break;
2109        }
2110}
2111
2112static int grep_source_load_oid(struct grep_source *gs)
2113{
2114        enum object_type type;
2115
2116        grep_read_lock();
2117        gs->buf = read_object_file(gs->identifier, &type, &gs->size);
2118        grep_read_unlock();
2119
2120        if (!gs->buf)
2121                return error(_("'%s': unable to read %s"),
2122                             gs->name,
2123                             oid_to_hex(gs->identifier));
2124        return 0;
2125}
2126
2127static int grep_source_load_file(struct grep_source *gs)
2128{
2129        const char *filename = gs->identifier;
2130        struct stat st;
2131        char *data;
2132        size_t size;
2133        int i;
2134
2135        if (lstat(filename, &st) < 0) {
2136        err_ret:
2137                if (errno != ENOENT)
2138                        error_errno(_("failed to stat '%s'"), filename);
2139                return -1;
2140        }
2141        if (!S_ISREG(st.st_mode))
2142                return -1;
2143        size = xsize_t(st.st_size);
2144        i = open(filename, O_RDONLY);
2145        if (i < 0)
2146                goto err_ret;
2147        data = xmallocz(size);
2148        if (st.st_size != read_in_full(i, data, size)) {
2149                error_errno(_("'%s': short read"), filename);
2150                close(i);
2151                free(data);
2152                return -1;
2153        }
2154        close(i);
2155
2156        gs->buf = data;
2157        gs->size = size;
2158        return 0;
2159}
2160
2161static int grep_source_load(struct grep_source *gs)
2162{
2163        if (gs->buf)
2164                return 0;
2165
2166        switch (gs->type) {
2167        case GREP_SOURCE_FILE:
2168                return grep_source_load_file(gs);
2169        case GREP_SOURCE_OID:
2170                return grep_source_load_oid(gs);
2171        case GREP_SOURCE_BUF:
2172                return gs->buf ? 0 : -1;
2173        }
2174        BUG("invalid grep_source type to load");
2175}
2176
2177void grep_source_load_driver(struct grep_source *gs)
2178{
2179        if (gs->driver)
2180                return;
2181
2182        grep_attr_lock();
2183        if (gs->path)
2184                gs->driver = userdiff_find_by_path(gs->path);
2185        if (!gs->driver)
2186                gs->driver = userdiff_find_by_name("default");
2187        grep_attr_unlock();
2188}
2189
2190static int grep_source_is_binary(struct grep_source *gs)
2191{
2192        grep_source_load_driver(gs);
2193        if (gs->driver->binary != -1)
2194                return gs->driver->binary;
2195
2196        if (!grep_source_load(gs))
2197                return buffer_is_binary(gs->buf, gs->size);
2198
2199        return 0;
2200}