grep.con commit Merge branch 'as/sequencer-customizable-comment-char' (d6465fb)
   1#include "cache.h"
   2#include "config.h"
   3#include "grep.h"
   4#include "object-store.h"
   5#include "userdiff.h"
   6#include "xdiff-interface.h"
   7#include "diff.h"
   8#include "diffcore.h"
   9#include "commit.h"
  10#include "quote.h"
  11#include "help.h"
  12
  13static int grep_source_load(struct grep_source *gs);
  14static int grep_source_is_binary(struct grep_source *gs);
  15
  16static struct grep_opt grep_defaults;
  17
  18static const char *color_grep_slots[] = {
  19        [GREP_COLOR_CONTEXT]        = "context",
  20        [GREP_COLOR_FILENAME]       = "filename",
  21        [GREP_COLOR_FUNCTION]       = "function",
  22        [GREP_COLOR_LINENO]         = "lineNumber",
  23        [GREP_COLOR_COLUMNNO]       = "column",
  24        [GREP_COLOR_MATCH_CONTEXT]  = "matchContext",
  25        [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
  26        [GREP_COLOR_SELECTED]       = "selected",
  27        [GREP_COLOR_SEP]            = "separator",
  28};
  29
  30static void std_output(struct grep_opt *opt, const void *buf, size_t size)
  31{
  32        fwrite(buf, size, 1, stdout);
  33}
  34
  35static void color_set(char *dst, const char *color_bytes)
  36{
  37        xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
  38}
  39
  40/*
  41 * Initialize the grep_defaults template with hardcoded defaults.
  42 * We could let the compiler do this, but without C99 initializers
  43 * the code gets unwieldy and unreadable, so...
  44 */
  45void init_grep_defaults(void)
  46{
  47        struct grep_opt *opt = &grep_defaults;
  48        static int run_once;
  49
  50        if (run_once)
  51                return;
  52        run_once++;
  53
  54        memset(opt, 0, sizeof(*opt));
  55        opt->relative = 1;
  56        opt->pathname = 1;
  57        opt->max_depth = -1;
  58        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  59        color_set(opt->colors[GREP_COLOR_CONTEXT], "");
  60        color_set(opt->colors[GREP_COLOR_FILENAME], "");
  61        color_set(opt->colors[GREP_COLOR_FUNCTION], "");
  62        color_set(opt->colors[GREP_COLOR_LINENO], "");
  63        color_set(opt->colors[GREP_COLOR_COLUMNNO], "");
  64        color_set(opt->colors[GREP_COLOR_MATCH_CONTEXT], GIT_COLOR_BOLD_RED);
  65        color_set(opt->colors[GREP_COLOR_MATCH_SELECTED], GIT_COLOR_BOLD_RED);
  66        color_set(opt->colors[GREP_COLOR_SELECTED], "");
  67        color_set(opt->colors[GREP_COLOR_SEP], GIT_COLOR_CYAN);
  68        opt->color = -1;
  69        opt->output = std_output;
  70}
  71
  72static int parse_pattern_type_arg(const char *opt, const char *arg)
  73{
  74        if (!strcmp(arg, "default"))
  75                return GREP_PATTERN_TYPE_UNSPECIFIED;
  76        else if (!strcmp(arg, "basic"))
  77                return GREP_PATTERN_TYPE_BRE;
  78        else if (!strcmp(arg, "extended"))
  79                return GREP_PATTERN_TYPE_ERE;
  80        else if (!strcmp(arg, "fixed"))
  81                return GREP_PATTERN_TYPE_FIXED;
  82        else if (!strcmp(arg, "perl"))
  83                return GREP_PATTERN_TYPE_PCRE;
  84        die("bad %s argument: %s", opt, arg);
  85}
  86
  87define_list_config_array_extra(color_grep_slots, {"match"});
  88
  89/*
  90 * Read the configuration file once and store it in
  91 * the grep_defaults template.
  92 */
  93int grep_config(const char *var, const char *value, void *cb)
  94{
  95        struct grep_opt *opt = &grep_defaults;
  96        const char *slot;
  97
  98        if (userdiff_config(var, value) < 0)
  99                return -1;
 100
 101        if (!strcmp(var, "grep.extendedregexp")) {
 102                opt->extended_regexp_option = git_config_bool(var, value);
 103                return 0;
 104        }
 105
 106        if (!strcmp(var, "grep.patterntype")) {
 107                opt->pattern_type_option = parse_pattern_type_arg(var, value);
 108                return 0;
 109        }
 110
 111        if (!strcmp(var, "grep.linenumber")) {
 112                opt->linenum = git_config_bool(var, value);
 113                return 0;
 114        }
 115        if (!strcmp(var, "grep.column")) {
 116                opt->columnnum = git_config_bool(var, value);
 117                return 0;
 118        }
 119
 120        if (!strcmp(var, "grep.fullname")) {
 121                opt->relative = !git_config_bool(var, value);
 122                return 0;
 123        }
 124
 125        if (!strcmp(var, "color.grep"))
 126                opt->color = git_config_colorbool(var, value);
 127        if (!strcmp(var, "color.grep.match")) {
 128                if (grep_config("color.grep.matchcontext", value, cb) < 0)
 129                        return -1;
 130                if (grep_config("color.grep.matchselected", value, cb) < 0)
 131                        return -1;
 132        } else if (skip_prefix(var, "color.grep.", &slot)) {
 133                int i = LOOKUP_CONFIG(color_grep_slots, slot);
 134                char *color;
 135
 136                if (i < 0)
 137                        return -1;
 138                color = opt->colors[i];
 139                if (!value)
 140                        return config_error_nonbool(var);
 141                return color_parse(value, color);
 142        }
 143        return 0;
 144}
 145
 146/*
 147 * Initialize one instance of grep_opt and copy the
 148 * default values from the template we read the configuration
 149 * information in an earlier call to git_config(grep_config).
 150 */
 151void grep_init(struct grep_opt *opt, const char *prefix)
 152{
 153        struct grep_opt *def = &grep_defaults;
 154        int i;
 155
 156        memset(opt, 0, sizeof(*opt));
 157        opt->prefix = prefix;
 158        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 159        opt->pattern_tail = &opt->pattern_list;
 160        opt->header_tail = &opt->header_list;
 161
 162        opt->color = def->color;
 163        opt->extended_regexp_option = def->extended_regexp_option;
 164        opt->pattern_type_option = def->pattern_type_option;
 165        opt->linenum = def->linenum;
 166        opt->columnnum = def->columnnum;
 167        opt->max_depth = def->max_depth;
 168        opt->pathname = def->pathname;
 169        opt->relative = def->relative;
 170        opt->output = def->output;
 171
 172        for (i = 0; i < NR_GREP_COLORS; i++)
 173                color_set(opt->colors[i], def->colors[i]);
 174}
 175
 176static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 177{
 178        /*
 179         * When committing to the pattern type by setting the relevant
 180         * fields in grep_opt it's generally not necessary to zero out
 181         * the fields we're not choosing, since they won't have been
 182         * set by anything. The extended_regexp_option field is the
 183         * only exception to this.
 184         *
 185         * This is because in the process of parsing grep.patternType
 186         * & grep.extendedRegexp we set opt->pattern_type_option and
 187         * opt->extended_regexp_option, respectively. We then
 188         * internally use opt->extended_regexp_option to see if we're
 189         * compiling an ERE. It must be unset if that's not actually
 190         * the case.
 191         */
 192        if (pattern_type != GREP_PATTERN_TYPE_ERE &&
 193            opt->extended_regexp_option)
 194                opt->extended_regexp_option = 0;
 195
 196        switch (pattern_type) {
 197        case GREP_PATTERN_TYPE_UNSPECIFIED:
 198                /* fall through */
 199
 200        case GREP_PATTERN_TYPE_BRE:
 201                break;
 202
 203        case GREP_PATTERN_TYPE_ERE:
 204                opt->extended_regexp_option = 1;
 205                break;
 206
 207        case GREP_PATTERN_TYPE_FIXED:
 208                opt->fixed = 1;
 209                break;
 210
 211        case GREP_PATTERN_TYPE_PCRE:
 212#ifdef USE_LIBPCRE2
 213                opt->pcre2 = 1;
 214#else
 215                /*
 216                 * It's important that pcre1 always be assigned to
 217                 * even when there's no USE_LIBPCRE* defined. We still
 218                 * call the PCRE stub function, it just dies with
 219                 * "cannot use Perl-compatible regexes[...]".
 220                 */
 221                opt->pcre1 = 1;
 222#endif
 223                break;
 224        }
 225}
 226
 227void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 228{
 229        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 230                grep_set_pattern_type_option(pattern_type, opt);
 231        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 232                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 233        else if (opt->extended_regexp_option)
 234                /*
 235                 * This branch *must* happen after setting from the
 236                 * opt->pattern_type_option above, we don't want
 237                 * grep.extendedRegexp to override grep.patternType!
 238                 */
 239                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 240}
 241
 242static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 243                                        const char *origin, int no,
 244                                        enum grep_pat_token t,
 245                                        enum grep_header_field field)
 246{
 247        struct grep_pat *p = xcalloc(1, sizeof(*p));
 248        p->pattern = xmemdupz(pat, patlen);
 249        p->patternlen = patlen;
 250        p->origin = origin;
 251        p->no = no;
 252        p->token = t;
 253        p->field = field;
 254        return p;
 255}
 256
 257static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 258{
 259        **tail = p;
 260        *tail = &p->next;
 261        p->next = NULL;
 262
 263        switch (p->token) {
 264        case GREP_PATTERN: /* atom */
 265        case GREP_PATTERN_HEAD:
 266        case GREP_PATTERN_BODY:
 267                for (;;) {
 268                        struct grep_pat *new_pat;
 269                        size_t len = 0;
 270                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 271                        while (++len <= p->patternlen) {
 272                                if (*(--cp) == '\n') {
 273                                        nl = cp;
 274                                        break;
 275                                }
 276                        }
 277                        if (!nl)
 278                                break;
 279                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 280                                                  p->no, p->token, p->field);
 281                        new_pat->next = p->next;
 282                        if (!p->next)
 283                                *tail = &new_pat->next;
 284                        p->next = new_pat;
 285                        *nl = '\0';
 286                        p->patternlen -= len;
 287                }
 288                break;
 289        default:
 290                break;
 291        }
 292}
 293
 294void append_header_grep_pattern(struct grep_opt *opt,
 295                                enum grep_header_field field, const char *pat)
 296{
 297        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 298                                             GREP_PATTERN_HEAD, field);
 299        if (field == GREP_HEADER_REFLOG)
 300                opt->use_reflog_filter = 1;
 301        do_append_grep_pat(&opt->header_tail, p);
 302}
 303
 304void append_grep_pattern(struct grep_opt *opt, const char *pat,
 305                         const char *origin, int no, enum grep_pat_token t)
 306{
 307        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 308}
 309
 310void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 311                     const char *origin, int no, enum grep_pat_token t)
 312{
 313        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 314        do_append_grep_pat(&opt->pattern_tail, p);
 315}
 316
 317struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 318{
 319        struct grep_pat *pat;
 320        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 321        *ret = *opt;
 322
 323        ret->pattern_list = NULL;
 324        ret->pattern_tail = &ret->pattern_list;
 325
 326        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 327        {
 328                if(pat->token == GREP_PATTERN_HEAD)
 329                        append_header_grep_pattern(ret, pat->field,
 330                                                   pat->pattern);
 331                else
 332                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 333                                        pat->origin, pat->no, pat->token);
 334        }
 335
 336        return ret;
 337}
 338
 339static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 340                const char *error)
 341{
 342        char where[1024];
 343
 344        if (p->no)
 345                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 346        else if (p->origin)
 347                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 348        else
 349                where[0] = 0;
 350
 351        die("%s'%s': %s", where, p->pattern, error);
 352}
 353
 354static int is_fixed(const char *s, size_t len)
 355{
 356        size_t i;
 357
 358        for (i = 0; i < len; i++) {
 359                if (is_regex_special(s[i]))
 360                        return 0;
 361        }
 362
 363        return 1;
 364}
 365
 366static int has_null(const char *s, size_t len)
 367{
 368        /*
 369         * regcomp cannot accept patterns with NULs so when using it
 370         * we consider any pattern containing a NUL fixed.
 371         */
 372        if (memchr(s, 0, len))
 373                return 1;
 374
 375        return 0;
 376}
 377
 378#ifdef USE_LIBPCRE1
 379static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 380{
 381        const char *error;
 382        int erroffset;
 383        int options = PCRE_MULTILINE;
 384
 385        if (opt->ignore_case) {
 386                if (has_non_ascii(p->pattern))
 387                        p->pcre1_tables = pcre_maketables();
 388                options |= PCRE_CASELESS;
 389        }
 390        if (is_utf8_locale() && has_non_ascii(p->pattern))
 391                options |= PCRE_UTF8;
 392
 393        p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 394                                      p->pcre1_tables);
 395        if (!p->pcre1_regexp)
 396                compile_regexp_failed(p, error);
 397
 398        p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
 399        if (!p->pcre1_extra_info && error)
 400                die("%s", error);
 401
 402#ifdef GIT_PCRE1_USE_JIT
 403        pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 404        if (p->pcre1_jit_on == 1) {
 405                p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
 406                if (!p->pcre1_jit_stack)
 407                        die("Couldn't allocate PCRE JIT stack");
 408                pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
 409        } else if (p->pcre1_jit_on != 0) {
 410                BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
 411                    p->pcre1_jit_on);
 412        }
 413#endif
 414}
 415
 416static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 417                regmatch_t *match, int eflags)
 418{
 419        int ovector[30], ret, flags = 0;
 420
 421        if (eflags & REG_NOTBOL)
 422                flags |= PCRE_NOTBOL;
 423
 424#ifdef GIT_PCRE1_USE_JIT
 425        if (p->pcre1_jit_on) {
 426                ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 427                                    eol - line, 0, flags, ovector,
 428                                    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
 429        } else
 430#endif
 431        {
 432                ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 433                                eol - line, 0, flags, ovector,
 434                                ARRAY_SIZE(ovector));
 435        }
 436
 437        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 438                die("pcre_exec failed with error code %d", ret);
 439        if (ret > 0) {
 440                ret = 0;
 441                match->rm_so = ovector[0];
 442                match->rm_eo = ovector[1];
 443        }
 444
 445        return ret;
 446}
 447
 448static void free_pcre1_regexp(struct grep_pat *p)
 449{
 450        pcre_free(p->pcre1_regexp);
 451#ifdef GIT_PCRE1_USE_JIT
 452        if (p->pcre1_jit_on) {
 453                pcre_free_study(p->pcre1_extra_info);
 454                pcre_jit_stack_free(p->pcre1_jit_stack);
 455        } else
 456#endif
 457        {
 458                pcre_free(p->pcre1_extra_info);
 459        }
 460        pcre_free((void *)p->pcre1_tables);
 461}
 462#else /* !USE_LIBPCRE1 */
 463static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 464{
 465        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 466}
 467
 468static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 469                regmatch_t *match, int eflags)
 470{
 471        return 1;
 472}
 473
 474static void free_pcre1_regexp(struct grep_pat *p)
 475{
 476}
 477#endif /* !USE_LIBPCRE1 */
 478
 479#ifdef USE_LIBPCRE2
 480static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 481{
 482        int error;
 483        PCRE2_UCHAR errbuf[256];
 484        PCRE2_SIZE erroffset;
 485        int options = PCRE2_MULTILINE;
 486        const uint8_t *character_tables = NULL;
 487        int jitret;
 488        int patinforet;
 489        size_t jitsizearg;
 490
 491        assert(opt->pcre2);
 492
 493        p->pcre2_compile_context = NULL;
 494
 495        if (opt->ignore_case) {
 496                if (has_non_ascii(p->pattern)) {
 497                        character_tables = pcre2_maketables(NULL);
 498                        p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 499                        pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 500                }
 501                options |= PCRE2_CASELESS;
 502        }
 503        if (is_utf8_locale() && has_non_ascii(p->pattern))
 504                options |= PCRE2_UTF;
 505
 506        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
 507                                         p->patternlen, options, &error, &erroffset,
 508                                         p->pcre2_compile_context);
 509
 510        if (p->pcre2_pattern) {
 511                p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
 512                if (!p->pcre2_match_data)
 513                        die("Couldn't allocate PCRE2 match data");
 514        } else {
 515                pcre2_get_error_message(error, errbuf, sizeof(errbuf));
 516                compile_regexp_failed(p, (const char *)&errbuf);
 517        }
 518
 519        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
 520        if (p->pcre2_jit_on == 1) {
 521                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 522                if (jitret)
 523                        die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
 524
 525                /*
 526                 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
 527                 * tells us whether the library itself supports JIT,
 528                 * but to see whether we're going to be actually using
 529                 * JIT we need to extract PCRE2_INFO_JITSIZE from the
 530                 * pattern *after* we do pcre2_jit_compile() above.
 531                 *
 532                 * This is because if the pattern contains the
 533                 * (*NO_JIT) verb (see pcre2syntax(3))
 534                 * pcre2_jit_compile() will exit early with 0. If we
 535                 * then proceed to call pcre2_jit_match() further down
 536                 * the line instead of pcre2_match() we'll either
 537                 * segfault (pre PCRE 10.31) or run into a fatal error
 538                 * (post PCRE2 10.31)
 539                 */
 540                patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
 541                if (patinforet)
 542                        BUG("pcre2_pattern_info() failed: %d", patinforet);
 543                if (jitsizearg == 0) {
 544                        p->pcre2_jit_on = 0;
 545                        return;
 546                }
 547
 548                p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
 549                if (!p->pcre2_jit_stack)
 550                        die("Couldn't allocate PCRE2 JIT stack");
 551                p->pcre2_match_context = pcre2_match_context_create(NULL);
 552                if (!p->pcre2_match_context)
 553                        die("Couldn't allocate PCRE2 match context");
 554                pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
 555        } else if (p->pcre2_jit_on != 0) {
 556                BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
 557                    p->pcre1_jit_on);
 558        }
 559}
 560
 561static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 562                regmatch_t *match, int eflags)
 563{
 564        int ret, flags = 0;
 565        PCRE2_SIZE *ovector;
 566        PCRE2_UCHAR errbuf[256];
 567
 568        if (eflags & REG_NOTBOL)
 569                flags |= PCRE2_NOTBOL;
 570
 571        if (p->pcre2_jit_on)
 572                ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
 573                                      eol - line, 0, flags, p->pcre2_match_data,
 574                                      NULL);
 575        else
 576                ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
 577                                  eol - line, 0, flags, p->pcre2_match_data,
 578                                  NULL);
 579
 580        if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
 581                pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
 582                die("%s failed with error code %d: %s",
 583                    (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
 584                    errbuf);
 585        }
 586        if (ret > 0) {
 587                ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
 588                ret = 0;
 589                match->rm_so = (int)ovector[0];
 590                match->rm_eo = (int)ovector[1];
 591        }
 592
 593        return ret;
 594}
 595
 596static void free_pcre2_pattern(struct grep_pat *p)
 597{
 598        pcre2_compile_context_free(p->pcre2_compile_context);
 599        pcre2_code_free(p->pcre2_pattern);
 600        pcre2_match_data_free(p->pcre2_match_data);
 601        pcre2_jit_stack_free(p->pcre2_jit_stack);
 602        pcre2_match_context_free(p->pcre2_match_context);
 603}
 604#else /* !USE_LIBPCRE2 */
 605static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 606{
 607        /*
 608         * Unreachable until USE_LIBPCRE2 becomes synonymous with
 609         * USE_LIBPCRE. See the sibling comment in
 610         * grep_set_pattern_type_option().
 611         */
 612        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 613}
 614
 615static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 616                regmatch_t *match, int eflags)
 617{
 618        return 1;
 619}
 620
 621static void free_pcre2_pattern(struct grep_pat *p)
 622{
 623}
 624#endif /* !USE_LIBPCRE2 */
 625
 626static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 627{
 628        struct strbuf sb = STRBUF_INIT;
 629        int err;
 630        int regflags = 0;
 631
 632        basic_regex_quote_buf(&sb, p->pattern);
 633        if (opt->ignore_case)
 634                regflags |= REG_ICASE;
 635        err = regcomp(&p->regexp, sb.buf, regflags);
 636        if (opt->debug)
 637                fprintf(stderr, "fixed %s\n", sb.buf);
 638        strbuf_release(&sb);
 639        if (err) {
 640                char errbuf[1024];
 641                regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 642                compile_regexp_failed(p, errbuf);
 643        }
 644}
 645
 646static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 647{
 648        int ascii_only;
 649        int err;
 650        int regflags = REG_NEWLINE;
 651
 652        p->word_regexp = opt->word_regexp;
 653        p->ignore_case = opt->ignore_case;
 654        ascii_only     = !has_non_ascii(p->pattern);
 655
 656        /*
 657         * Even when -F (fixed) asks us to do a non-regexp search, we
 658         * may not be able to correctly case-fold when -i
 659         * (ignore-case) is asked (in which case, we'll synthesize a
 660         * regexp to match the pattern that matches regexp special
 661         * characters literally, while ignoring case differences).  On
 662         * the other hand, even without -F, if the pattern does not
 663         * have any regexp special characters and there is no need for
 664         * case-folding search, we can internally turn it into a
 665         * simple string match using kws.  p->fixed tells us if we
 666         * want to use kws.
 667         */
 668        if (opt->fixed ||
 669            has_null(p->pattern, p->patternlen) ||
 670            is_fixed(p->pattern, p->patternlen))
 671                p->fixed = !p->ignore_case || ascii_only;
 672
 673        if (p->fixed) {
 674                p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
 675                kwsincr(p->kws, p->pattern, p->patternlen);
 676                kwsprep(p->kws);
 677                return;
 678        } else if (opt->fixed) {
 679                /*
 680                 * We come here when the pattern has the non-ascii
 681                 * characters we cannot case-fold, and asked to
 682                 * ignore-case.
 683                 */
 684                compile_fixed_regexp(p, opt);
 685                return;
 686        }
 687
 688        if (opt->pcre2) {
 689                compile_pcre2_pattern(p, opt);
 690                return;
 691        }
 692
 693        if (opt->pcre1) {
 694                compile_pcre1_regexp(p, opt);
 695                return;
 696        }
 697
 698        if (p->ignore_case)
 699                regflags |= REG_ICASE;
 700        if (opt->extended_regexp_option)
 701                regflags |= REG_EXTENDED;
 702        err = regcomp(&p->regexp, p->pattern, regflags);
 703        if (err) {
 704                char errbuf[1024];
 705                regerror(err, &p->regexp, errbuf, 1024);
 706                compile_regexp_failed(p, errbuf);
 707        }
 708}
 709
 710static struct grep_expr *compile_pattern_or(struct grep_pat **);
 711static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 712{
 713        struct grep_pat *p;
 714        struct grep_expr *x;
 715
 716        p = *list;
 717        if (!p)
 718                return NULL;
 719        switch (p->token) {
 720        case GREP_PATTERN: /* atom */
 721        case GREP_PATTERN_HEAD:
 722        case GREP_PATTERN_BODY:
 723                x = xcalloc(1, sizeof (struct grep_expr));
 724                x->node = GREP_NODE_ATOM;
 725                x->u.atom = p;
 726                *list = p->next;
 727                return x;
 728        case GREP_OPEN_PAREN:
 729                *list = p->next;
 730                x = compile_pattern_or(list);
 731                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 732                        die("unmatched parenthesis");
 733                *list = (*list)->next;
 734                return x;
 735        default:
 736                return NULL;
 737        }
 738}
 739
 740static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 741{
 742        struct grep_pat *p;
 743        struct grep_expr *x;
 744
 745        p = *list;
 746        if (!p)
 747                return NULL;
 748        switch (p->token) {
 749        case GREP_NOT:
 750                if (!p->next)
 751                        die("--not not followed by pattern expression");
 752                *list = p->next;
 753                x = xcalloc(1, sizeof (struct grep_expr));
 754                x->node = GREP_NODE_NOT;
 755                x->u.unary = compile_pattern_not(list);
 756                if (!x->u.unary)
 757                        die("--not followed by non pattern expression");
 758                return x;
 759        default:
 760                return compile_pattern_atom(list);
 761        }
 762}
 763
 764static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 765{
 766        struct grep_pat *p;
 767        struct grep_expr *x, *y, *z;
 768
 769        x = compile_pattern_not(list);
 770        p = *list;
 771        if (p && p->token == GREP_AND) {
 772                if (!p->next)
 773                        die("--and not followed by pattern expression");
 774                *list = p->next;
 775                y = compile_pattern_and(list);
 776                if (!y)
 777                        die("--and not followed by pattern expression");
 778                z = xcalloc(1, sizeof (struct grep_expr));
 779                z->node = GREP_NODE_AND;
 780                z->u.binary.left = x;
 781                z->u.binary.right = y;
 782                return z;
 783        }
 784        return x;
 785}
 786
 787static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 788{
 789        struct grep_pat *p;
 790        struct grep_expr *x, *y, *z;
 791
 792        x = compile_pattern_and(list);
 793        p = *list;
 794        if (x && p && p->token != GREP_CLOSE_PAREN) {
 795                y = compile_pattern_or(list);
 796                if (!y)
 797                        die("not a pattern expression %s", p->pattern);
 798                z = xcalloc(1, sizeof (struct grep_expr));
 799                z->node = GREP_NODE_OR;
 800                z->u.binary.left = x;
 801                z->u.binary.right = y;
 802                return z;
 803        }
 804        return x;
 805}
 806
 807static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 808{
 809        return compile_pattern_or(list);
 810}
 811
 812static void indent(int in)
 813{
 814        while (in-- > 0)
 815                fputc(' ', stderr);
 816}
 817
 818static void dump_grep_pat(struct grep_pat *p)
 819{
 820        switch (p->token) {
 821        case GREP_AND: fprintf(stderr, "*and*"); break;
 822        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 823        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 824        case GREP_NOT: fprintf(stderr, "*not*"); break;
 825        case GREP_OR: fprintf(stderr, "*or*"); break;
 826
 827        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 828        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 829        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 830        }
 831
 832        switch (p->token) {
 833        default: break;
 834        case GREP_PATTERN_HEAD:
 835                fprintf(stderr, "<head %d>", p->field); break;
 836        case GREP_PATTERN_BODY:
 837                fprintf(stderr, "<body>"); break;
 838        }
 839        switch (p->token) {
 840        default: break;
 841        case GREP_PATTERN_HEAD:
 842        case GREP_PATTERN_BODY:
 843        case GREP_PATTERN:
 844                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 845                break;
 846        }
 847        fputc('\n', stderr);
 848}
 849
 850static void dump_grep_expression_1(struct grep_expr *x, int in)
 851{
 852        indent(in);
 853        switch (x->node) {
 854        case GREP_NODE_TRUE:
 855                fprintf(stderr, "true\n");
 856                break;
 857        case GREP_NODE_ATOM:
 858                dump_grep_pat(x->u.atom);
 859                break;
 860        case GREP_NODE_NOT:
 861                fprintf(stderr, "(not\n");
 862                dump_grep_expression_1(x->u.unary, in+1);
 863                indent(in);
 864                fprintf(stderr, ")\n");
 865                break;
 866        case GREP_NODE_AND:
 867                fprintf(stderr, "(and\n");
 868                dump_grep_expression_1(x->u.binary.left, in+1);
 869                dump_grep_expression_1(x->u.binary.right, in+1);
 870                indent(in);
 871                fprintf(stderr, ")\n");
 872                break;
 873        case GREP_NODE_OR:
 874                fprintf(stderr, "(or\n");
 875                dump_grep_expression_1(x->u.binary.left, in+1);
 876                dump_grep_expression_1(x->u.binary.right, in+1);
 877                indent(in);
 878                fprintf(stderr, ")\n");
 879                break;
 880        }
 881}
 882
 883static void dump_grep_expression(struct grep_opt *opt)
 884{
 885        struct grep_expr *x = opt->pattern_expression;
 886
 887        if (opt->all_match)
 888                fprintf(stderr, "[all-match]\n");
 889        dump_grep_expression_1(x, 0);
 890        fflush(NULL);
 891}
 892
 893static struct grep_expr *grep_true_expr(void)
 894{
 895        struct grep_expr *z = xcalloc(1, sizeof(*z));
 896        z->node = GREP_NODE_TRUE;
 897        return z;
 898}
 899
 900static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 901{
 902        struct grep_expr *z = xcalloc(1, sizeof(*z));
 903        z->node = GREP_NODE_OR;
 904        z->u.binary.left = left;
 905        z->u.binary.right = right;
 906        return z;
 907}
 908
 909static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 910{
 911        struct grep_pat *p;
 912        struct grep_expr *header_expr;
 913        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 914        enum grep_header_field fld;
 915
 916        if (!opt->header_list)
 917                return NULL;
 918
 919        for (p = opt->header_list; p; p = p->next) {
 920                if (p->token != GREP_PATTERN_HEAD)
 921                        BUG("a non-header pattern in grep header list.");
 922                if (p->field < GREP_HEADER_FIELD_MIN ||
 923                    GREP_HEADER_FIELD_MAX <= p->field)
 924                        BUG("unknown header field %d", p->field);
 925                compile_regexp(p, opt);
 926        }
 927
 928        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 929                header_group[fld] = NULL;
 930
 931        for (p = opt->header_list; p; p = p->next) {
 932                struct grep_expr *h;
 933                struct grep_pat *pp = p;
 934
 935                h = compile_pattern_atom(&pp);
 936                if (!h || pp != p->next)
 937                        BUG("malformed header expr");
 938                if (!header_group[p->field]) {
 939                        header_group[p->field] = h;
 940                        continue;
 941                }
 942                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 943        }
 944
 945        header_expr = NULL;
 946
 947        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 948                if (!header_group[fld])
 949                        continue;
 950                if (!header_expr)
 951                        header_expr = grep_true_expr();
 952                header_expr = grep_or_expr(header_group[fld], header_expr);
 953        }
 954        return header_expr;
 955}
 956
 957static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 958{
 959        struct grep_expr *z = x;
 960
 961        while (x) {
 962                assert(x->node == GREP_NODE_OR);
 963                if (x->u.binary.right &&
 964                    x->u.binary.right->node == GREP_NODE_TRUE) {
 965                        x->u.binary.right = y;
 966                        break;
 967                }
 968                x = x->u.binary.right;
 969        }
 970        return z;
 971}
 972
 973static void compile_grep_patterns_real(struct grep_opt *opt)
 974{
 975        struct grep_pat *p;
 976        struct grep_expr *header_expr = prep_header_patterns(opt);
 977
 978        for (p = opt->pattern_list; p; p = p->next) {
 979                switch (p->token) {
 980                case GREP_PATTERN: /* atom */
 981                case GREP_PATTERN_HEAD:
 982                case GREP_PATTERN_BODY:
 983                        compile_regexp(p, opt);
 984                        break;
 985                default:
 986                        opt->extended = 1;
 987                        break;
 988                }
 989        }
 990
 991        if (opt->all_match || header_expr)
 992                opt->extended = 1;
 993        else if (!opt->extended && !opt->debug)
 994                return;
 995
 996        p = opt->pattern_list;
 997        if (p)
 998                opt->pattern_expression = compile_pattern_expr(&p);
 999        if (p)
1000                die("incomplete pattern expression: %s", p->pattern);
1001
1002        if (!header_expr)
1003                return;
1004
1005        if (!opt->pattern_expression)
1006                opt->pattern_expression = header_expr;
1007        else if (opt->all_match)
1008                opt->pattern_expression = grep_splice_or(header_expr,
1009                                                         opt->pattern_expression);
1010        else
1011                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
1012                                                       header_expr);
1013        opt->all_match = 1;
1014}
1015
1016void compile_grep_patterns(struct grep_opt *opt)
1017{
1018        compile_grep_patterns_real(opt);
1019        if (opt->debug)
1020                dump_grep_expression(opt);
1021}
1022
1023static void free_pattern_expr(struct grep_expr *x)
1024{
1025        switch (x->node) {
1026        case GREP_NODE_TRUE:
1027        case GREP_NODE_ATOM:
1028                break;
1029        case GREP_NODE_NOT:
1030                free_pattern_expr(x->u.unary);
1031                break;
1032        case GREP_NODE_AND:
1033        case GREP_NODE_OR:
1034                free_pattern_expr(x->u.binary.left);
1035                free_pattern_expr(x->u.binary.right);
1036                break;
1037        }
1038        free(x);
1039}
1040
1041void free_grep_patterns(struct grep_opt *opt)
1042{
1043        struct grep_pat *p, *n;
1044
1045        for (p = opt->pattern_list; p; p = n) {
1046                n = p->next;
1047                switch (p->token) {
1048                case GREP_PATTERN: /* atom */
1049                case GREP_PATTERN_HEAD:
1050                case GREP_PATTERN_BODY:
1051                        if (p->kws)
1052                                kwsfree(p->kws);
1053                        else if (p->pcre1_regexp)
1054                                free_pcre1_regexp(p);
1055                        else if (p->pcre2_pattern)
1056                                free_pcre2_pattern(p);
1057                        else
1058                                regfree(&p->regexp);
1059                        free(p->pattern);
1060                        break;
1061                default:
1062                        break;
1063                }
1064                free(p);
1065        }
1066
1067        if (!opt->extended)
1068                return;
1069        free_pattern_expr(opt->pattern_expression);
1070}
1071
1072static char *end_of_line(char *cp, unsigned long *left)
1073{
1074        unsigned long l = *left;
1075        while (l && *cp != '\n') {
1076                l--;
1077                cp++;
1078        }
1079        *left = l;
1080        return cp;
1081}
1082
1083static int word_char(char ch)
1084{
1085        return isalnum(ch) || ch == '_';
1086}
1087
1088static void output_color(struct grep_opt *opt, const void *data, size_t size,
1089                         const char *color)
1090{
1091        if (want_color(opt->color) && color && color[0]) {
1092                opt->output(opt, color, strlen(color));
1093                opt->output(opt, data, size);
1094                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1095        } else
1096                opt->output(opt, data, size);
1097}
1098
1099static void output_sep(struct grep_opt *opt, char sign)
1100{
1101        if (opt->null_following_name)
1102                opt->output(opt, "\0", 1);
1103        else
1104                output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
1105}
1106
1107static void show_name(struct grep_opt *opt, const char *name)
1108{
1109        output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1110        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
1111}
1112
1113static int fixmatch(struct grep_pat *p, char *line, char *eol,
1114                    regmatch_t *match)
1115{
1116        struct kwsmatch kwsm;
1117        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1118        if (offset == -1) {
1119                match->rm_so = match->rm_eo = -1;
1120                return REG_NOMATCH;
1121        } else {
1122                match->rm_so = offset;
1123                match->rm_eo = match->rm_so + kwsm.size[0];
1124                return 0;
1125        }
1126}
1127
1128static int patmatch(struct grep_pat *p, char *line, char *eol,
1129                    regmatch_t *match, int eflags)
1130{
1131        int hit;
1132
1133        if (p->fixed)
1134                hit = !fixmatch(p, line, eol, match);
1135        else if (p->pcre1_regexp)
1136                hit = !pcre1match(p, line, eol, match, eflags);
1137        else if (p->pcre2_pattern)
1138                hit = !pcre2match(p, line, eol, match, eflags);
1139        else
1140                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1141                                   eflags);
1142
1143        return hit;
1144}
1145
1146static int strip_timestamp(char *bol, char **eol_p)
1147{
1148        char *eol = *eol_p;
1149        int ch;
1150
1151        while (bol < --eol) {
1152                if (*eol != '>')
1153                        continue;
1154                *eol_p = ++eol;
1155                ch = *eol;
1156                *eol = '\0';
1157                return ch;
1158        }
1159        return 0;
1160}
1161
1162static struct {
1163        const char *field;
1164        size_t len;
1165} header_field[] = {
1166        { "author ", 7 },
1167        { "committer ", 10 },
1168        { "reflog ", 7 },
1169};
1170
1171static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
1172                             enum grep_context ctx,
1173                             regmatch_t *pmatch, int eflags)
1174{
1175        int hit = 0;
1176        int saved_ch = 0;
1177        const char *start = bol;
1178
1179        if ((p->token != GREP_PATTERN) &&
1180            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1181                return 0;
1182
1183        if (p->token == GREP_PATTERN_HEAD) {
1184                const char *field;
1185                size_t len;
1186                assert(p->field < ARRAY_SIZE(header_field));
1187                field = header_field[p->field].field;
1188                len = header_field[p->field].len;
1189                if (strncmp(bol, field, len))
1190                        return 0;
1191                bol += len;
1192                switch (p->field) {
1193                case GREP_HEADER_AUTHOR:
1194                case GREP_HEADER_COMMITTER:
1195                        saved_ch = strip_timestamp(bol, &eol);
1196                        break;
1197                default:
1198                        break;
1199                }
1200        }
1201
1202 again:
1203        hit = patmatch(p, bol, eol, pmatch, eflags);
1204
1205        if (hit && p->word_regexp) {
1206                if ((pmatch[0].rm_so < 0) ||
1207                    (eol - bol) < pmatch[0].rm_so ||
1208                    (pmatch[0].rm_eo < 0) ||
1209                    (eol - bol) < pmatch[0].rm_eo)
1210                        die("regexp returned nonsense");
1211
1212                /* Match beginning must be either beginning of the
1213                 * line, or at word boundary (i.e. the last char must
1214                 * not be a word char).  Similarly, match end must be
1215                 * either end of the line, or at word boundary
1216                 * (i.e. the next char must not be a word char).
1217                 */
1218                if ( ((pmatch[0].rm_so == 0) ||
1219                      !word_char(bol[pmatch[0].rm_so-1])) &&
1220                     ((pmatch[0].rm_eo == (eol-bol)) ||
1221                      !word_char(bol[pmatch[0].rm_eo])) )
1222                        ;
1223                else
1224                        hit = 0;
1225
1226                /* Words consist of at least one character. */
1227                if (pmatch->rm_so == pmatch->rm_eo)
1228                        hit = 0;
1229
1230                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1231                        /* There could be more than one match on the
1232                         * line, and the first match might not be
1233                         * strict word match.  But later ones could be!
1234                         * Forward to the next possible start, i.e. the
1235                         * next position following a non-word char.
1236                         */
1237                        bol = pmatch[0].rm_so + bol + 1;
1238                        while (word_char(bol[-1]) && bol < eol)
1239                                bol++;
1240                        eflags |= REG_NOTBOL;
1241                        if (bol < eol)
1242                                goto again;
1243                }
1244        }
1245        if (p->token == GREP_PATTERN_HEAD && saved_ch)
1246                *eol = saved_ch;
1247        if (hit) {
1248                pmatch[0].rm_so += bol - start;
1249                pmatch[0].rm_eo += bol - start;
1250        }
1251        return hit;
1252}
1253
1254static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol,
1255                           char *eol, enum grep_context ctx, ssize_t *col,
1256                           ssize_t *icol, int collect_hits)
1257{
1258        int h = 0;
1259
1260        if (!x)
1261                die("Not a valid grep expression");
1262        switch (x->node) {
1263        case GREP_NODE_TRUE:
1264                h = 1;
1265                break;
1266        case GREP_NODE_ATOM:
1267                {
1268                        regmatch_t tmp;
1269                        h = match_one_pattern(x->u.atom, bol, eol, ctx,
1270                                              &tmp, 0);
1271                        if (h && (*col < 0 || tmp.rm_so < *col))
1272                                *col = tmp.rm_so;
1273                }
1274                break;
1275        case GREP_NODE_NOT:
1276                /*
1277                 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1278                 */
1279                h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1280                                     0);
1281                break;
1282        case GREP_NODE_AND:
1283                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1284                                    icol, 0);
1285                if (h || opt->columnnum) {
1286                        /*
1287                         * Don't short-circuit AND when given --column, since a
1288                         * NOT earlier in the tree may turn this into an OR. In
1289                         * this case, see the below comment.
1290                         */
1291                        h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1292                                             ctx, col, icol, 0);
1293                }
1294                break;
1295        case GREP_NODE_OR:
1296                if (!(collect_hits || opt->columnnum)) {
1297                        /*
1298                         * Don't short-circuit OR when given --column (or
1299                         * collecting hits) to ensure we don't skip a later
1300                         * child that would produce an earlier match.
1301                         */
1302                        return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1303                                                ctx, col, icol, 0) ||
1304                                match_expr_eval(opt, x->u.binary.right, bol,
1305                                                eol, ctx, col, icol, 0));
1306                }
1307                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1308                                    icol, 0);
1309                if (collect_hits)
1310                        x->u.binary.left->hit |= h;
1311                h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1312                                     icol, collect_hits);
1313                break;
1314        default:
1315                die("Unexpected node type (internal error) %d", x->node);
1316        }
1317        if (collect_hits)
1318                x->hit |= h;
1319        return h;
1320}
1321
1322static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1323                      enum grep_context ctx, ssize_t *col,
1324                      ssize_t *icol, int collect_hits)
1325{
1326        struct grep_expr *x = opt->pattern_expression;
1327        return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1328}
1329
1330static int match_line(struct grep_opt *opt, char *bol, char *eol,
1331                      ssize_t *col, ssize_t *icol,
1332                      enum grep_context ctx, int collect_hits)
1333{
1334        struct grep_pat *p;
1335        int hit = 0;
1336
1337        if (opt->extended)
1338                return match_expr(opt, bol, eol, ctx, col, icol,
1339                                  collect_hits);
1340
1341        /* we do not call with collect_hits without being extended */
1342        for (p = opt->pattern_list; p; p = p->next) {
1343                regmatch_t tmp;
1344                if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1345                        hit |= 1;
1346                        if (!opt->columnnum) {
1347                                /*
1348                                 * Without --column, any single match on a line
1349                                 * is enough to know that it needs to be
1350                                 * printed. With --column, scan _all_ patterns
1351                                 * to find the earliest.
1352                                 */
1353                                break;
1354                        }
1355                        if (*col < 0 || tmp.rm_so < *col)
1356                                *col = tmp.rm_so;
1357                }
1358        }
1359        return hit;
1360}
1361
1362static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1363                              enum grep_context ctx,
1364                              regmatch_t *pmatch, int eflags)
1365{
1366        regmatch_t match;
1367
1368        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1369                return 0;
1370        if (match.rm_so < 0 || match.rm_eo < 0)
1371                return 0;
1372        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1373                if (match.rm_so > pmatch->rm_so)
1374                        return 1;
1375                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1376                        return 1;
1377        }
1378        pmatch->rm_so = match.rm_so;
1379        pmatch->rm_eo = match.rm_eo;
1380        return 1;
1381}
1382
1383static int next_match(struct grep_opt *opt, char *bol, char *eol,
1384                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1385{
1386        struct grep_pat *p;
1387        int hit = 0;
1388
1389        pmatch->rm_so = pmatch->rm_eo = -1;
1390        if (bol < eol) {
1391                for (p = opt->pattern_list; p; p = p->next) {
1392                        switch (p->token) {
1393                        case GREP_PATTERN: /* atom */
1394                        case GREP_PATTERN_HEAD:
1395                        case GREP_PATTERN_BODY:
1396                                hit |= match_next_pattern(p, bol, eol, ctx,
1397                                                          pmatch, eflags);
1398                                break;
1399                        default:
1400                                break;
1401                        }
1402                }
1403        }
1404        return hit;
1405}
1406
1407static void show_line(struct grep_opt *opt, char *bol, char *eol,
1408                      const char *name, unsigned lno, ssize_t cno, char sign)
1409{
1410        int rest = eol - bol;
1411        const char *match_color, *line_color = NULL;
1412
1413        if (opt->file_break && opt->last_shown == 0) {
1414                if (opt->show_hunk_mark)
1415                        opt->output(opt, "\n", 1);
1416        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1417                if (opt->last_shown == 0) {
1418                        if (opt->show_hunk_mark) {
1419                                output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1420                                opt->output(opt, "\n", 1);
1421                        }
1422                } else if (lno > opt->last_shown + 1) {
1423                        output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1424                        opt->output(opt, "\n", 1);
1425                }
1426        }
1427        if (opt->heading && opt->last_shown == 0) {
1428                output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1429                opt->output(opt, "\n", 1);
1430        }
1431        opt->last_shown = lno;
1432
1433        if (!opt->heading && opt->pathname) {
1434                output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1435                output_sep(opt, sign);
1436        }
1437        if (opt->linenum) {
1438                char buf[32];
1439                xsnprintf(buf, sizeof(buf), "%d", lno);
1440                output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
1441                output_sep(opt, sign);
1442        }
1443        /*
1444         * Treat 'cno' as the 1-indexed offset from the start of a non-context
1445         * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1446         * being called with a context line.
1447         */
1448        if (opt->columnnum && cno) {
1449                char buf[32];
1450                xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
1451                output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
1452                output_sep(opt, sign);
1453        }
1454        if (opt->color) {
1455                regmatch_t match;
1456                enum grep_context ctx = GREP_CONTEXT_BODY;
1457                int ch = *eol;
1458                int eflags = 0;
1459
1460                if (sign == ':')
1461                        match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
1462                else
1463                        match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
1464                if (sign == ':')
1465                        line_color = opt->colors[GREP_COLOR_SELECTED];
1466                else if (sign == '-')
1467                        line_color = opt->colors[GREP_COLOR_CONTEXT];
1468                else if (sign == '=')
1469                        line_color = opt->colors[GREP_COLOR_FUNCTION];
1470                *eol = '\0';
1471                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1472                        if (match.rm_so == match.rm_eo)
1473                                break;
1474
1475                        output_color(opt, bol, match.rm_so, line_color);
1476                        output_color(opt, bol + match.rm_so,
1477                                     match.rm_eo - match.rm_so, match_color);
1478                        bol += match.rm_eo;
1479                        rest -= match.rm_eo;
1480                        eflags = REG_NOTBOL;
1481                }
1482                *eol = ch;
1483        }
1484        output_color(opt, bol, rest, line_color);
1485        opt->output(opt, "\n", 1);
1486}
1487
1488#ifndef NO_PTHREADS
1489int grep_use_locks;
1490
1491/*
1492 * This lock protects access to the gitattributes machinery, which is
1493 * not thread-safe.
1494 */
1495pthread_mutex_t grep_attr_mutex;
1496
1497static inline void grep_attr_lock(void)
1498{
1499        if (grep_use_locks)
1500                pthread_mutex_lock(&grep_attr_mutex);
1501}
1502
1503static inline void grep_attr_unlock(void)
1504{
1505        if (grep_use_locks)
1506                pthread_mutex_unlock(&grep_attr_mutex);
1507}
1508
1509/*
1510 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1511 */
1512pthread_mutex_t grep_read_mutex;
1513
1514#else
1515#define grep_attr_lock()
1516#define grep_attr_unlock()
1517#endif
1518
1519static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1520{
1521        xdemitconf_t *xecfg = opt->priv;
1522        if (xecfg && !xecfg->find_func) {
1523                grep_source_load_driver(gs);
1524                if (gs->driver->funcname.pattern) {
1525                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1526                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1527                } else {
1528                        xecfg = opt->priv = NULL;
1529                }
1530        }
1531
1532        if (xecfg) {
1533                char buf[1];
1534                return xecfg->find_func(bol, eol - bol, buf, 1,
1535                                        xecfg->find_func_priv) >= 0;
1536        }
1537
1538        if (bol == eol)
1539                return 0;
1540        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1541                return 1;
1542        return 0;
1543}
1544
1545static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1546                               char *bol, unsigned lno)
1547{
1548        while (bol > gs->buf) {
1549                char *eol = --bol;
1550
1551                while (bol > gs->buf && bol[-1] != '\n')
1552                        bol--;
1553                lno--;
1554
1555                if (lno <= opt->last_shown)
1556                        break;
1557
1558                if (match_funcname(opt, gs, bol, eol)) {
1559                        show_line(opt, bol, eol, gs->name, lno, 0, '=');
1560                        break;
1561                }
1562        }
1563}
1564
1565static int is_empty_line(const char *bol, const char *eol);
1566
1567static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1568                             char *bol, char *end, unsigned lno)
1569{
1570        unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1571        int funcname_needed = !!opt->funcname, comment_needed = 0;
1572
1573        if (opt->pre_context < lno)
1574                from = lno - opt->pre_context;
1575        if (from <= opt->last_shown)
1576                from = opt->last_shown + 1;
1577        orig_from = from;
1578        if (opt->funcbody) {
1579                if (match_funcname(opt, gs, bol, end))
1580                        comment_needed = 1;
1581                else
1582                        funcname_needed = 1;
1583                from = opt->last_shown + 1;
1584        }
1585
1586        /* Rewind. */
1587        while (bol > gs->buf && cur > from) {
1588                char *next_bol = bol;
1589                char *eol = --bol;
1590
1591                while (bol > gs->buf && bol[-1] != '\n')
1592                        bol--;
1593                cur--;
1594                if (comment_needed && (is_empty_line(bol, eol) ||
1595                                       match_funcname(opt, gs, bol, eol))) {
1596                        comment_needed = 0;
1597                        from = orig_from;
1598                        if (cur < from) {
1599                                cur++;
1600                                bol = next_bol;
1601                                break;
1602                        }
1603                }
1604                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1605                        funcname_lno = cur;
1606                        funcname_needed = 0;
1607                        if (opt->funcbody)
1608                                comment_needed = 1;
1609                        else
1610                                from = orig_from;
1611                }
1612        }
1613
1614        /* We need to look even further back to find a function signature. */
1615        if (opt->funcname && funcname_needed)
1616                show_funcname_line(opt, gs, bol, cur);
1617
1618        /* Back forward. */
1619        while (cur < lno) {
1620                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1621
1622                while (*eol != '\n')
1623                        eol++;
1624                show_line(opt, bol, eol, gs->name, cur, 0, sign);
1625                bol = eol + 1;
1626                cur++;
1627        }
1628}
1629
1630static int should_lookahead(struct grep_opt *opt)
1631{
1632        struct grep_pat *p;
1633
1634        if (opt->extended)
1635                return 0; /* punt for too complex stuff */
1636        if (opt->invert)
1637                return 0;
1638        for (p = opt->pattern_list; p; p = p->next) {
1639                if (p->token != GREP_PATTERN)
1640                        return 0; /* punt for "header only" and stuff */
1641        }
1642        return 1;
1643}
1644
1645static int look_ahead(struct grep_opt *opt,
1646                      unsigned long *left_p,
1647                      unsigned *lno_p,
1648                      char **bol_p)
1649{
1650        unsigned lno = *lno_p;
1651        char *bol = *bol_p;
1652        struct grep_pat *p;
1653        char *sp, *last_bol;
1654        regoff_t earliest = -1;
1655
1656        for (p = opt->pattern_list; p; p = p->next) {
1657                int hit;
1658                regmatch_t m;
1659
1660                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1661                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1662                        continue;
1663                if (earliest < 0 || m.rm_so < earliest)
1664                        earliest = m.rm_so;
1665        }
1666
1667        if (earliest < 0) {
1668                *bol_p = bol + *left_p;
1669                *left_p = 0;
1670                return 1;
1671        }
1672        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1673                ; /* find the beginning of the line */
1674        last_bol = sp;
1675
1676        for (sp = bol; sp < last_bol; sp++) {
1677                if (*sp == '\n')
1678                        lno++;
1679        }
1680        *left_p -= last_bol - bol;
1681        *bol_p = last_bol;
1682        *lno_p = lno;
1683        return 0;
1684}
1685
1686static int fill_textconv_grep(struct userdiff_driver *driver,
1687                              struct grep_source *gs)
1688{
1689        struct diff_filespec *df;
1690        char *buf;
1691        size_t size;
1692
1693        if (!driver || !driver->textconv)
1694                return grep_source_load(gs);
1695
1696        /*
1697         * The textconv interface is intimately tied to diff_filespecs, so we
1698         * have to pretend to be one. If we could unify the grep_source
1699         * and diff_filespec structs, this mess could just go away.
1700         */
1701        df = alloc_filespec(gs->path);
1702        switch (gs->type) {
1703        case GREP_SOURCE_OID:
1704                fill_filespec(df, gs->identifier, 1, 0100644);
1705                break;
1706        case GREP_SOURCE_FILE:
1707                fill_filespec(df, &null_oid, 0, 0100644);
1708                break;
1709        default:
1710                BUG("attempt to textconv something without a path?");
1711        }
1712
1713        /*
1714         * fill_textconv is not remotely thread-safe; it may load objects
1715         * behind the scenes, and it modifies the global diff tempfile
1716         * structure.
1717         */
1718        grep_read_lock();
1719        size = fill_textconv(driver, df, &buf);
1720        grep_read_unlock();
1721        free_filespec(df);
1722
1723        /*
1724         * The normal fill_textconv usage by the diff machinery would just keep
1725         * the textconv'd buf separate from the diff_filespec. But much of the
1726         * grep code passes around a grep_source and assumes that its "buf"
1727         * pointer is the beginning of the thing we are searching. So let's
1728         * install our textconv'd version into the grep_source, taking care not
1729         * to leak any existing buffer.
1730         */
1731        grep_source_clear_data(gs);
1732        gs->buf = buf;
1733        gs->size = size;
1734
1735        return 0;
1736}
1737
1738static int is_empty_line(const char *bol, const char *eol)
1739{
1740        while (bol < eol && isspace(*bol))
1741                bol++;
1742        return bol == eol;
1743}
1744
1745static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1746{
1747        char *bol;
1748        char *peek_bol = NULL;
1749        unsigned long left;
1750        unsigned lno = 1;
1751        unsigned last_hit = 0;
1752        int binary_match_only = 0;
1753        unsigned count = 0;
1754        int try_lookahead = 0;
1755        int show_function = 0;
1756        struct userdiff_driver *textconv = NULL;
1757        enum grep_context ctx = GREP_CONTEXT_HEAD;
1758        xdemitconf_t xecfg;
1759
1760        if (!opt->output)
1761                opt->output = std_output;
1762
1763        if (opt->pre_context || opt->post_context || opt->file_break ||
1764            opt->funcbody) {
1765                /* Show hunk marks, except for the first file. */
1766                if (opt->last_shown)
1767                        opt->show_hunk_mark = 1;
1768                /*
1769                 * If we're using threads then we can't easily identify
1770                 * the first file.  Always put hunk marks in that case
1771                 * and skip the very first one later in work_done().
1772                 */
1773                if (opt->output != std_output)
1774                        opt->show_hunk_mark = 1;
1775        }
1776        opt->last_shown = 0;
1777
1778        if (opt->allow_textconv) {
1779                grep_source_load_driver(gs);
1780                /*
1781                 * We might set up the shared textconv cache data here, which
1782                 * is not thread-safe.
1783                 */
1784                grep_attr_lock();
1785                textconv = userdiff_get_textconv(gs->driver);
1786                grep_attr_unlock();
1787        }
1788
1789        /*
1790         * We know the result of a textconv is text, so we only have to care
1791         * about binary handling if we are not using it.
1792         */
1793        if (!textconv) {
1794                switch (opt->binary) {
1795                case GREP_BINARY_DEFAULT:
1796                        if (grep_source_is_binary(gs))
1797                                binary_match_only = 1;
1798                        break;
1799                case GREP_BINARY_NOMATCH:
1800                        if (grep_source_is_binary(gs))
1801                                return 0; /* Assume unmatch */
1802                        break;
1803                case GREP_BINARY_TEXT:
1804                        break;
1805                default:
1806                        BUG("unknown binary handling mode");
1807                }
1808        }
1809
1810        memset(&xecfg, 0, sizeof(xecfg));
1811        opt->priv = &xecfg;
1812
1813        try_lookahead = should_lookahead(opt);
1814
1815        if (fill_textconv_grep(textconv, gs) < 0)
1816                return 0;
1817
1818        bol = gs->buf;
1819        left = gs->size;
1820        while (left) {
1821                char *eol, ch;
1822                int hit;
1823                ssize_t cno;
1824                ssize_t col = -1, icol = -1;
1825
1826                /*
1827                 * look_ahead() skips quickly to the line that possibly
1828                 * has the next hit; don't call it if we need to do
1829                 * something more than just skipping the current line
1830                 * in response to an unmatch for the current line.  E.g.
1831                 * inside a post-context window, we will show the current
1832                 * line as a context around the previous hit when it
1833                 * doesn't hit.
1834                 */
1835                if (try_lookahead
1836                    && !(last_hit
1837                         && (show_function ||
1838                             lno <= last_hit + opt->post_context))
1839                    && look_ahead(opt, &left, &lno, &bol))
1840                        break;
1841                eol = end_of_line(bol, &left);
1842                ch = *eol;
1843                *eol = 0;
1844
1845                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1846                        ctx = GREP_CONTEXT_BODY;
1847
1848                hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1849                *eol = ch;
1850
1851                if (collect_hits)
1852                        goto next_line;
1853
1854                /* "grep -v -e foo -e bla" should list lines
1855                 * that do not have either, so inversion should
1856                 * be done outside.
1857                 */
1858                if (opt->invert)
1859                        hit = !hit;
1860                if (opt->unmatch_name_only) {
1861                        if (hit)
1862                                return 0;
1863                        goto next_line;
1864                }
1865                if (hit) {
1866                        count++;
1867                        if (opt->status_only)
1868                                return 1;
1869                        if (opt->name_only) {
1870                                show_name(opt, gs->name);
1871                                return 1;
1872                        }
1873                        if (opt->count)
1874                                goto next_line;
1875                        if (binary_match_only) {
1876                                opt->output(opt, "Binary file ", 12);
1877                                output_color(opt, gs->name, strlen(gs->name),
1878                                             opt->colors[GREP_COLOR_FILENAME]);
1879                                opt->output(opt, " matches\n", 9);
1880                                return 1;
1881                        }
1882                        /* Hit at this line.  If we haven't shown the
1883                         * pre-context lines, we would need to show them.
1884                         */
1885                        if (opt->pre_context || opt->funcbody)
1886                                show_pre_context(opt, gs, bol, eol, lno);
1887                        else if (opt->funcname)
1888                                show_funcname_line(opt, gs, bol, lno);
1889                        cno = opt->invert ? icol : col;
1890                        if (cno < 0) {
1891                                /*
1892                                 * A negative cno indicates that there was no
1893                                 * match on the line. We are thus inverted and
1894                                 * being asked to show all lines that _don't_
1895                                 * match a given expression. Therefore, set cno
1896                                 * to 0 to suggest the whole line matches.
1897                                 */
1898                                cno = 0;
1899                        }
1900                        show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
1901                        last_hit = lno;
1902                        if (opt->funcbody)
1903                                show_function = 1;
1904                        goto next_line;
1905                }
1906                if (show_function && (!peek_bol || peek_bol < bol)) {
1907                        unsigned long peek_left = left;
1908                        char *peek_eol = eol;
1909
1910                        /*
1911                         * Trailing empty lines are not interesting.
1912                         * Peek past them to see if they belong to the
1913                         * body of the current function.
1914                         */
1915                        peek_bol = bol;
1916                        while (is_empty_line(peek_bol, peek_eol)) {
1917                                peek_bol = peek_eol + 1;
1918                                peek_eol = end_of_line(peek_bol, &peek_left);
1919                        }
1920
1921                        if (match_funcname(opt, gs, peek_bol, peek_eol))
1922                                show_function = 0;
1923                }
1924                if (show_function ||
1925                    (last_hit && lno <= last_hit + opt->post_context)) {
1926                        /* If the last hit is within the post context,
1927                         * we need to show this line.
1928                         */
1929                        show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
1930                }
1931
1932        next_line:
1933                bol = eol + 1;
1934                if (!left)
1935                        break;
1936                left--;
1937                lno++;
1938        }
1939
1940        if (collect_hits)
1941                return 0;
1942
1943        if (opt->status_only)
1944                return opt->unmatch_name_only;
1945        if (opt->unmatch_name_only) {
1946                /* We did not see any hit, so we want to show this */
1947                show_name(opt, gs->name);
1948                return 1;
1949        }
1950
1951        xdiff_clear_find_func(&xecfg);
1952        opt->priv = NULL;
1953
1954        /* NEEDSWORK:
1955         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1956         * which feels mostly useless but sometimes useful.  Maybe
1957         * make it another option?  For now suppress them.
1958         */
1959        if (opt->count && count) {
1960                char buf[32];
1961                if (opt->pathname) {
1962                        output_color(opt, gs->name, strlen(gs->name),
1963                                     opt->colors[GREP_COLOR_FILENAME]);
1964                        output_sep(opt, ':');
1965                }
1966                xsnprintf(buf, sizeof(buf), "%u\n", count);
1967                opt->output(opt, buf, strlen(buf));
1968                return 1;
1969        }
1970        return !!last_hit;
1971}
1972
1973static void clr_hit_marker(struct grep_expr *x)
1974{
1975        /* All-hit markers are meaningful only at the very top level
1976         * OR node.
1977         */
1978        while (1) {
1979                x->hit = 0;
1980                if (x->node != GREP_NODE_OR)
1981                        return;
1982                x->u.binary.left->hit = 0;
1983                x = x->u.binary.right;
1984        }
1985}
1986
1987static int chk_hit_marker(struct grep_expr *x)
1988{
1989        /* Top level nodes have hit markers.  See if they all are hits */
1990        while (1) {
1991                if (x->node != GREP_NODE_OR)
1992                        return x->hit;
1993                if (!x->u.binary.left->hit)
1994                        return 0;
1995                x = x->u.binary.right;
1996        }
1997}
1998
1999int grep_source(struct grep_opt *opt, struct grep_source *gs)
2000{
2001        /*
2002         * we do not have to do the two-pass grep when we do not check
2003         * buffer-wide "all-match".
2004         */
2005        if (!opt->all_match)
2006                return grep_source_1(opt, gs, 0);
2007
2008        /* Otherwise the toplevel "or" terms hit a bit differently.
2009         * We first clear hit markers from them.
2010         */
2011        clr_hit_marker(opt->pattern_expression);
2012        grep_source_1(opt, gs, 1);
2013
2014        if (!chk_hit_marker(opt->pattern_expression))
2015                return 0;
2016
2017        return grep_source_1(opt, gs, 0);
2018}
2019
2020int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
2021{
2022        struct grep_source gs;
2023        int r;
2024
2025        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
2026        gs.buf = buf;
2027        gs.size = size;
2028
2029        r = grep_source(opt, &gs);
2030
2031        grep_source_clear(&gs);
2032        return r;
2033}
2034
2035void grep_source_init(struct grep_source *gs, enum grep_source_type type,
2036                      const char *name, const char *path,
2037                      const void *identifier)
2038{
2039        gs->type = type;
2040        gs->name = xstrdup_or_null(name);
2041        gs->path = xstrdup_or_null(path);
2042        gs->buf = NULL;
2043        gs->size = 0;
2044        gs->driver = NULL;
2045
2046        switch (type) {
2047        case GREP_SOURCE_FILE:
2048                gs->identifier = xstrdup(identifier);
2049                break;
2050        case GREP_SOURCE_OID:
2051                gs->identifier = oiddup(identifier);
2052                break;
2053        case GREP_SOURCE_BUF:
2054                gs->identifier = NULL;
2055                break;
2056        }
2057}
2058
2059void grep_source_clear(struct grep_source *gs)
2060{
2061        FREE_AND_NULL(gs->name);
2062        FREE_AND_NULL(gs->path);
2063        FREE_AND_NULL(gs->identifier);
2064        grep_source_clear_data(gs);
2065}
2066
2067void grep_source_clear_data(struct grep_source *gs)
2068{
2069        switch (gs->type) {
2070        case GREP_SOURCE_FILE:
2071        case GREP_SOURCE_OID:
2072                FREE_AND_NULL(gs->buf);
2073                gs->size = 0;
2074                break;
2075        case GREP_SOURCE_BUF:
2076                /* leave user-provided buf intact */
2077                break;
2078        }
2079}
2080
2081static int grep_source_load_oid(struct grep_source *gs)
2082{
2083        enum object_type type;
2084
2085        grep_read_lock();
2086        gs->buf = read_object_file(gs->identifier, &type, &gs->size);
2087        grep_read_unlock();
2088
2089        if (!gs->buf)
2090                return error(_("'%s': unable to read %s"),
2091                             gs->name,
2092                             oid_to_hex(gs->identifier));
2093        return 0;
2094}
2095
2096static int grep_source_load_file(struct grep_source *gs)
2097{
2098        const char *filename = gs->identifier;
2099        struct stat st;
2100        char *data;
2101        size_t size;
2102        int i;
2103
2104        if (lstat(filename, &st) < 0) {
2105        err_ret:
2106                if (errno != ENOENT)
2107                        error_errno(_("failed to stat '%s'"), filename);
2108                return -1;
2109        }
2110        if (!S_ISREG(st.st_mode))
2111                return -1;
2112        size = xsize_t(st.st_size);
2113        i = open(filename, O_RDONLY);
2114        if (i < 0)
2115                goto err_ret;
2116        data = xmallocz(size);
2117        if (st.st_size != read_in_full(i, data, size)) {
2118                error_errno(_("'%s': short read"), filename);
2119                close(i);
2120                free(data);
2121                return -1;
2122        }
2123        close(i);
2124
2125        gs->buf = data;
2126        gs->size = size;
2127        return 0;
2128}
2129
2130static int grep_source_load(struct grep_source *gs)
2131{
2132        if (gs->buf)
2133                return 0;
2134
2135        switch (gs->type) {
2136        case GREP_SOURCE_FILE:
2137                return grep_source_load_file(gs);
2138        case GREP_SOURCE_OID:
2139                return grep_source_load_oid(gs);
2140        case GREP_SOURCE_BUF:
2141                return gs->buf ? 0 : -1;
2142        }
2143        BUG("invalid grep_source type to load");
2144}
2145
2146void grep_source_load_driver(struct grep_source *gs)
2147{
2148        if (gs->driver)
2149                return;
2150
2151        grep_attr_lock();
2152        if (gs->path)
2153                gs->driver = userdiff_find_by_path(gs->path);
2154        if (!gs->driver)
2155                gs->driver = userdiff_find_by_name("default");
2156        grep_attr_unlock();
2157}
2158
2159static int grep_source_is_binary(struct grep_source *gs)
2160{
2161        grep_source_load_driver(gs);
2162        if (gs->driver->binary != -1)
2163                return gs->driver->binary;
2164
2165        if (!grep_source_load(gs))
2166                return buffer_is_binary(gs->buf, gs->size);
2167
2168        return 0;
2169}