c885101017f6a4505a2525fa83975d1b1e86936d
   1#include "cache.h"
   2#include "config.h"
   3#include "grep.h"
   4#include "userdiff.h"
   5#include "xdiff-interface.h"
   6#include "diff.h"
   7#include "diffcore.h"
   8#include "commit.h"
   9#include "quote.h"
  10
  11static int grep_source_load(struct grep_source *gs);
  12static int grep_source_is_binary(struct grep_source *gs);
  13
  14static struct grep_opt grep_defaults;
  15
  16static void std_output(struct grep_opt *opt, const void *buf, size_t size)
  17{
  18        fwrite(buf, size, 1, stdout);
  19}
  20
  21static void color_set(char *dst, const char *color_bytes)
  22{
  23        xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
  24}
  25
  26/*
  27 * Initialize the grep_defaults template with hardcoded defaults.
  28 * We could let the compiler do this, but without C99 initializers
  29 * the code gets unwieldy and unreadable, so...
  30 */
  31void init_grep_defaults(void)
  32{
  33        struct grep_opt *opt = &grep_defaults;
  34        static int run_once;
  35
  36        if (run_once)
  37                return;
  38        run_once++;
  39
  40        memset(opt, 0, sizeof(*opt));
  41        opt->relative = 1;
  42        opt->pathname = 1;
  43        opt->max_depth = -1;
  44        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  45        color_set(opt->color_context, "");
  46        color_set(opt->color_filename, "");
  47        color_set(opt->color_function, "");
  48        color_set(opt->color_lineno, "");
  49        color_set(opt->color_columnno, "");
  50        color_set(opt->color_match_context, GIT_COLOR_BOLD_RED);
  51        color_set(opt->color_match_selected, GIT_COLOR_BOLD_RED);
  52        color_set(opt->color_selected, "");
  53        color_set(opt->color_sep, GIT_COLOR_CYAN);
  54        opt->color = -1;
  55        opt->output = std_output;
  56}
  57
  58static int parse_pattern_type_arg(const char *opt, const char *arg)
  59{
  60        if (!strcmp(arg, "default"))
  61                return GREP_PATTERN_TYPE_UNSPECIFIED;
  62        else if (!strcmp(arg, "basic"))
  63                return GREP_PATTERN_TYPE_BRE;
  64        else if (!strcmp(arg, "extended"))
  65                return GREP_PATTERN_TYPE_ERE;
  66        else if (!strcmp(arg, "fixed"))
  67                return GREP_PATTERN_TYPE_FIXED;
  68        else if (!strcmp(arg, "perl"))
  69                return GREP_PATTERN_TYPE_PCRE;
  70        die("bad %s argument: %s", opt, arg);
  71}
  72
  73/*
  74 * Read the configuration file once and store it in
  75 * the grep_defaults template.
  76 */
  77int grep_config(const char *var, const char *value, void *cb)
  78{
  79        struct grep_opt *opt = &grep_defaults;
  80        char *color = NULL;
  81
  82        if (userdiff_config(var, value) < 0)
  83                return -1;
  84
  85        if (!strcmp(var, "grep.extendedregexp")) {
  86                opt->extended_regexp_option = git_config_bool(var, value);
  87                return 0;
  88        }
  89
  90        if (!strcmp(var, "grep.patterntype")) {
  91                opt->pattern_type_option = parse_pattern_type_arg(var, value);
  92                return 0;
  93        }
  94
  95        if (!strcmp(var, "grep.linenumber")) {
  96                opt->linenum = git_config_bool(var, value);
  97                return 0;
  98        }
  99
 100        if (!strcmp(var, "grep.fullname")) {
 101                opt->relative = !git_config_bool(var, value);
 102                return 0;
 103        }
 104
 105        if (!strcmp(var, "color.grep"))
 106                opt->color = git_config_colorbool(var, value);
 107        else if (!strcmp(var, "color.grep.context"))
 108                color = opt->color_context;
 109        else if (!strcmp(var, "color.grep.filename"))
 110                color = opt->color_filename;
 111        else if (!strcmp(var, "color.grep.function"))
 112                color = opt->color_function;
 113        else if (!strcmp(var, "color.grep.linenumber"))
 114                color = opt->color_lineno;
 115        else if (!strcmp(var, "color.grep.matchcontext"))
 116                color = opt->color_match_context;
 117        else if (!strcmp(var, "color.grep.matchselected"))
 118                color = opt->color_match_selected;
 119        else if (!strcmp(var, "color.grep.selected"))
 120                color = opt->color_selected;
 121        else if (!strcmp(var, "color.grep.separator"))
 122                color = opt->color_sep;
 123        else if (!strcmp(var, "color.grep.match")) {
 124                int rc = 0;
 125                if (!value)
 126                        return config_error_nonbool(var);
 127                rc |= color_parse(value, opt->color_match_context);
 128                rc |= color_parse(value, opt->color_match_selected);
 129                return rc;
 130        }
 131
 132        if (color) {
 133                if (!value)
 134                        return config_error_nonbool(var);
 135                return color_parse(value, color);
 136        }
 137        return 0;
 138}
 139
 140/*
 141 * Initialize one instance of grep_opt and copy the
 142 * default values from the template we read the configuration
 143 * information in an earlier call to git_config(grep_config).
 144 */
 145void grep_init(struct grep_opt *opt, const char *prefix)
 146{
 147        struct grep_opt *def = &grep_defaults;
 148
 149        memset(opt, 0, sizeof(*opt));
 150        opt->prefix = prefix;
 151        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 152        opt->pattern_tail = &opt->pattern_list;
 153        opt->header_tail = &opt->header_list;
 154
 155        opt->color = def->color;
 156        opt->extended_regexp_option = def->extended_regexp_option;
 157        opt->pattern_type_option = def->pattern_type_option;
 158        opt->linenum = def->linenum;
 159        opt->columnnum = def->columnnum;
 160        opt->max_depth = def->max_depth;
 161        opt->pathname = def->pathname;
 162        opt->relative = def->relative;
 163        opt->output = def->output;
 164
 165        color_set(opt->color_context, def->color_context);
 166        color_set(opt->color_filename, def->color_filename);
 167        color_set(opt->color_function, def->color_function);
 168        color_set(opt->color_lineno, def->color_lineno);
 169        color_set(opt->color_columnno, def->color_columnno);
 170        color_set(opt->color_match_context, def->color_match_context);
 171        color_set(opt->color_match_selected, def->color_match_selected);
 172        color_set(opt->color_selected, def->color_selected);
 173        color_set(opt->color_sep, def->color_sep);
 174}
 175
 176static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 177{
 178        /*
 179         * When committing to the pattern type by setting the relevant
 180         * fields in grep_opt it's generally not necessary to zero out
 181         * the fields we're not choosing, since they won't have been
 182         * set by anything. The extended_regexp_option field is the
 183         * only exception to this.
 184         *
 185         * This is because in the process of parsing grep.patternType
 186         * & grep.extendedRegexp we set opt->pattern_type_option and
 187         * opt->extended_regexp_option, respectively. We then
 188         * internally use opt->extended_regexp_option to see if we're
 189         * compiling an ERE. It must be unset if that's not actually
 190         * the case.
 191         */
 192        if (pattern_type != GREP_PATTERN_TYPE_ERE &&
 193            opt->extended_regexp_option)
 194                opt->extended_regexp_option = 0;
 195
 196        switch (pattern_type) {
 197        case GREP_PATTERN_TYPE_UNSPECIFIED:
 198                /* fall through */
 199
 200        case GREP_PATTERN_TYPE_BRE:
 201                break;
 202
 203        case GREP_PATTERN_TYPE_ERE:
 204                opt->extended_regexp_option = 1;
 205                break;
 206
 207        case GREP_PATTERN_TYPE_FIXED:
 208                opt->fixed = 1;
 209                break;
 210
 211        case GREP_PATTERN_TYPE_PCRE:
 212#ifdef USE_LIBPCRE2
 213                opt->pcre2 = 1;
 214#else
 215                /*
 216                 * It's important that pcre1 always be assigned to
 217                 * even when there's no USE_LIBPCRE* defined. We still
 218                 * call the PCRE stub function, it just dies with
 219                 * "cannot use Perl-compatible regexes[...]".
 220                 */
 221                opt->pcre1 = 1;
 222#endif
 223                break;
 224        }
 225}
 226
 227void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 228{
 229        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 230                grep_set_pattern_type_option(pattern_type, opt);
 231        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 232                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 233        else if (opt->extended_regexp_option)
 234                /*
 235                 * This branch *must* happen after setting from the
 236                 * opt->pattern_type_option above, we don't want
 237                 * grep.extendedRegexp to override grep.patternType!
 238                 */
 239                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 240}
 241
 242static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 243                                        const char *origin, int no,
 244                                        enum grep_pat_token t,
 245                                        enum grep_header_field field)
 246{
 247        struct grep_pat *p = xcalloc(1, sizeof(*p));
 248        p->pattern = xmemdupz(pat, patlen);
 249        p->patternlen = patlen;
 250        p->origin = origin;
 251        p->no = no;
 252        p->token = t;
 253        p->field = field;
 254        return p;
 255}
 256
 257static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 258{
 259        **tail = p;
 260        *tail = &p->next;
 261        p->next = NULL;
 262
 263        switch (p->token) {
 264        case GREP_PATTERN: /* atom */
 265        case GREP_PATTERN_HEAD:
 266        case GREP_PATTERN_BODY:
 267                for (;;) {
 268                        struct grep_pat *new_pat;
 269                        size_t len = 0;
 270                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 271                        while (++len <= p->patternlen) {
 272                                if (*(--cp) == '\n') {
 273                                        nl = cp;
 274                                        break;
 275                                }
 276                        }
 277                        if (!nl)
 278                                break;
 279                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 280                                                  p->no, p->token, p->field);
 281                        new_pat->next = p->next;
 282                        if (!p->next)
 283                                *tail = &new_pat->next;
 284                        p->next = new_pat;
 285                        *nl = '\0';
 286                        p->patternlen -= len;
 287                }
 288                break;
 289        default:
 290                break;
 291        }
 292}
 293
 294void append_header_grep_pattern(struct grep_opt *opt,
 295                                enum grep_header_field field, const char *pat)
 296{
 297        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 298                                             GREP_PATTERN_HEAD, field);
 299        if (field == GREP_HEADER_REFLOG)
 300                opt->use_reflog_filter = 1;
 301        do_append_grep_pat(&opt->header_tail, p);
 302}
 303
 304void append_grep_pattern(struct grep_opt *opt, const char *pat,
 305                         const char *origin, int no, enum grep_pat_token t)
 306{
 307        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 308}
 309
 310void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 311                     const char *origin, int no, enum grep_pat_token t)
 312{
 313        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 314        do_append_grep_pat(&opt->pattern_tail, p);
 315}
 316
 317struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 318{
 319        struct grep_pat *pat;
 320        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 321        *ret = *opt;
 322
 323        ret->pattern_list = NULL;
 324        ret->pattern_tail = &ret->pattern_list;
 325
 326        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 327        {
 328                if(pat->token == GREP_PATTERN_HEAD)
 329                        append_header_grep_pattern(ret, pat->field,
 330                                                   pat->pattern);
 331                else
 332                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 333                                        pat->origin, pat->no, pat->token);
 334        }
 335
 336        return ret;
 337}
 338
 339static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 340                const char *error)
 341{
 342        char where[1024];
 343
 344        if (p->no)
 345                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 346        else if (p->origin)
 347                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 348        else
 349                where[0] = 0;
 350
 351        die("%s'%s': %s", where, p->pattern, error);
 352}
 353
 354static int is_fixed(const char *s, size_t len)
 355{
 356        size_t i;
 357
 358        for (i = 0; i < len; i++) {
 359                if (is_regex_special(s[i]))
 360                        return 0;
 361        }
 362
 363        return 1;
 364}
 365
 366static int has_null(const char *s, size_t len)
 367{
 368        /*
 369         * regcomp cannot accept patterns with NULs so when using it
 370         * we consider any pattern containing a NUL fixed.
 371         */
 372        if (memchr(s, 0, len))
 373                return 1;
 374
 375        return 0;
 376}
 377
 378#ifdef USE_LIBPCRE1
 379static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 380{
 381        const char *error;
 382        int erroffset;
 383        int options = PCRE_MULTILINE;
 384
 385        if (opt->ignore_case) {
 386                if (has_non_ascii(p->pattern))
 387                        p->pcre1_tables = pcre_maketables();
 388                options |= PCRE_CASELESS;
 389        }
 390        if (is_utf8_locale() && has_non_ascii(p->pattern))
 391                options |= PCRE_UTF8;
 392
 393        p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 394                                      p->pcre1_tables);
 395        if (!p->pcre1_regexp)
 396                compile_regexp_failed(p, error);
 397
 398        p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
 399        if (!p->pcre1_extra_info && error)
 400                die("%s", error);
 401
 402#ifdef GIT_PCRE1_USE_JIT
 403        pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 404        if (p->pcre1_jit_on == 1) {
 405                p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
 406                if (!p->pcre1_jit_stack)
 407                        die("Couldn't allocate PCRE JIT stack");
 408                pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
 409        } else if (p->pcre1_jit_on != 0) {
 410                BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
 411                    p->pcre1_jit_on);
 412        }
 413#endif
 414}
 415
 416static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 417                regmatch_t *match, int eflags)
 418{
 419        int ovector[30], ret, flags = 0;
 420
 421        if (eflags & REG_NOTBOL)
 422                flags |= PCRE_NOTBOL;
 423
 424#ifdef GIT_PCRE1_USE_JIT
 425        if (p->pcre1_jit_on) {
 426                ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 427                                    eol - line, 0, flags, ovector,
 428                                    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
 429        } else
 430#endif
 431        {
 432                ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 433                                eol - line, 0, flags, ovector,
 434                                ARRAY_SIZE(ovector));
 435        }
 436
 437        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 438                die("pcre_exec failed with error code %d", ret);
 439        if (ret > 0) {
 440                ret = 0;
 441                match->rm_so = ovector[0];
 442                match->rm_eo = ovector[1];
 443        }
 444
 445        return ret;
 446}
 447
 448static void free_pcre1_regexp(struct grep_pat *p)
 449{
 450        pcre_free(p->pcre1_regexp);
 451#ifdef GIT_PCRE1_USE_JIT
 452        if (p->pcre1_jit_on) {
 453                pcre_free_study(p->pcre1_extra_info);
 454                pcre_jit_stack_free(p->pcre1_jit_stack);
 455        } else
 456#endif
 457        {
 458                pcre_free(p->pcre1_extra_info);
 459        }
 460        pcre_free((void *)p->pcre1_tables);
 461}
 462#else /* !USE_LIBPCRE1 */
 463static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 464{
 465        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 466}
 467
 468static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 469                regmatch_t *match, int eflags)
 470{
 471        return 1;
 472}
 473
 474static void free_pcre1_regexp(struct grep_pat *p)
 475{
 476}
 477#endif /* !USE_LIBPCRE1 */
 478
 479#ifdef USE_LIBPCRE2
 480static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 481{
 482        int error;
 483        PCRE2_UCHAR errbuf[256];
 484        PCRE2_SIZE erroffset;
 485        int options = PCRE2_MULTILINE;
 486        const uint8_t *character_tables = NULL;
 487        int jitret;
 488        int patinforet;
 489        size_t jitsizearg;
 490
 491        assert(opt->pcre2);
 492
 493        p->pcre2_compile_context = NULL;
 494
 495        if (opt->ignore_case) {
 496                if (has_non_ascii(p->pattern)) {
 497                        character_tables = pcre2_maketables(NULL);
 498                        p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 499                        pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 500                }
 501                options |= PCRE2_CASELESS;
 502        }
 503        if (is_utf8_locale() && has_non_ascii(p->pattern))
 504                options |= PCRE2_UTF;
 505
 506        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
 507                                         p->patternlen, options, &error, &erroffset,
 508                                         p->pcre2_compile_context);
 509
 510        if (p->pcre2_pattern) {
 511                p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
 512                if (!p->pcre2_match_data)
 513                        die("Couldn't allocate PCRE2 match data");
 514        } else {
 515                pcre2_get_error_message(error, errbuf, sizeof(errbuf));
 516                compile_regexp_failed(p, (const char *)&errbuf);
 517        }
 518
 519        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
 520        if (p->pcre2_jit_on == 1) {
 521                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 522                if (jitret)
 523                        die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
 524
 525                /*
 526                 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
 527                 * tells us whether the library itself supports JIT,
 528                 * but to see whether we're going to be actually using
 529                 * JIT we need to extract PCRE2_INFO_JITSIZE from the
 530                 * pattern *after* we do pcre2_jit_compile() above.
 531                 *
 532                 * This is because if the pattern contains the
 533                 * (*NO_JIT) verb (see pcre2syntax(3))
 534                 * pcre2_jit_compile() will exit early with 0. If we
 535                 * then proceed to call pcre2_jit_match() further down
 536                 * the line instead of pcre2_match() we'll either
 537                 * segfault (pre PCRE 10.31) or run into a fatal error
 538                 * (post PCRE2 10.31)
 539                 */
 540                patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
 541                if (patinforet)
 542                        BUG("pcre2_pattern_info() failed: %d", patinforet);
 543                if (jitsizearg == 0) {
 544                        p->pcre2_jit_on = 0;
 545                        return;
 546                }
 547
 548                p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
 549                if (!p->pcre2_jit_stack)
 550                        die("Couldn't allocate PCRE2 JIT stack");
 551                p->pcre2_match_context = pcre2_match_context_create(NULL);
 552                if (!p->pcre2_match_context)
 553                        die("Couldn't allocate PCRE2 match context");
 554                pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
 555        } else if (p->pcre2_jit_on != 0) {
 556                BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
 557                    p->pcre1_jit_on);
 558        }
 559}
 560
 561static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 562                regmatch_t *match, int eflags)
 563{
 564        int ret, flags = 0;
 565        PCRE2_SIZE *ovector;
 566        PCRE2_UCHAR errbuf[256];
 567
 568        if (eflags & REG_NOTBOL)
 569                flags |= PCRE2_NOTBOL;
 570
 571        if (p->pcre2_jit_on)
 572                ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
 573                                      eol - line, 0, flags, p->pcre2_match_data,
 574                                      NULL);
 575        else
 576                ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
 577                                  eol - line, 0, flags, p->pcre2_match_data,
 578                                  NULL);
 579
 580        if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
 581                pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
 582                die("%s failed with error code %d: %s",
 583                    (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
 584                    errbuf);
 585        }
 586        if (ret > 0) {
 587                ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
 588                ret = 0;
 589                match->rm_so = (int)ovector[0];
 590                match->rm_eo = (int)ovector[1];
 591        }
 592
 593        return ret;
 594}
 595
 596static void free_pcre2_pattern(struct grep_pat *p)
 597{
 598        pcre2_compile_context_free(p->pcre2_compile_context);
 599        pcre2_code_free(p->pcre2_pattern);
 600        pcre2_match_data_free(p->pcre2_match_data);
 601        pcre2_jit_stack_free(p->pcre2_jit_stack);
 602        pcre2_match_context_free(p->pcre2_match_context);
 603}
 604#else /* !USE_LIBPCRE2 */
 605static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 606{
 607        /*
 608         * Unreachable until USE_LIBPCRE2 becomes synonymous with
 609         * USE_LIBPCRE. See the sibling comment in
 610         * grep_set_pattern_type_option().
 611         */
 612        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 613}
 614
 615static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 616                regmatch_t *match, int eflags)
 617{
 618        return 1;
 619}
 620
 621static void free_pcre2_pattern(struct grep_pat *p)
 622{
 623}
 624#endif /* !USE_LIBPCRE2 */
 625
 626static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 627{
 628        struct strbuf sb = STRBUF_INIT;
 629        int err;
 630        int regflags = 0;
 631
 632        basic_regex_quote_buf(&sb, p->pattern);
 633        if (opt->ignore_case)
 634                regflags |= REG_ICASE;
 635        err = regcomp(&p->regexp, sb.buf, regflags);
 636        if (opt->debug)
 637                fprintf(stderr, "fixed %s\n", sb.buf);
 638        strbuf_release(&sb);
 639        if (err) {
 640                char errbuf[1024];
 641                regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 642                compile_regexp_failed(p, errbuf);
 643        }
 644}
 645
 646static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 647{
 648        int ascii_only;
 649        int err;
 650        int regflags = REG_NEWLINE;
 651
 652        p->word_regexp = opt->word_regexp;
 653        p->ignore_case = opt->ignore_case;
 654        ascii_only     = !has_non_ascii(p->pattern);
 655
 656        /*
 657         * Even when -F (fixed) asks us to do a non-regexp search, we
 658         * may not be able to correctly case-fold when -i
 659         * (ignore-case) is asked (in which case, we'll synthesize a
 660         * regexp to match the pattern that matches regexp special
 661         * characters literally, while ignoring case differences).  On
 662         * the other hand, even without -F, if the pattern does not
 663         * have any regexp special characters and there is no need for
 664         * case-folding search, we can internally turn it into a
 665         * simple string match using kws.  p->fixed tells us if we
 666         * want to use kws.
 667         */
 668        if (opt->fixed ||
 669            has_null(p->pattern, p->patternlen) ||
 670            is_fixed(p->pattern, p->patternlen))
 671                p->fixed = !p->ignore_case || ascii_only;
 672
 673        if (p->fixed) {
 674                p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
 675                kwsincr(p->kws, p->pattern, p->patternlen);
 676                kwsprep(p->kws);
 677                return;
 678        } else if (opt->fixed) {
 679                /*
 680                 * We come here when the pattern has the non-ascii
 681                 * characters we cannot case-fold, and asked to
 682                 * ignore-case.
 683                 */
 684                compile_fixed_regexp(p, opt);
 685                return;
 686        }
 687
 688        if (opt->pcre2) {
 689                compile_pcre2_pattern(p, opt);
 690                return;
 691        }
 692
 693        if (opt->pcre1) {
 694                compile_pcre1_regexp(p, opt);
 695                return;
 696        }
 697
 698        if (p->ignore_case)
 699                regflags |= REG_ICASE;
 700        if (opt->extended_regexp_option)
 701                regflags |= REG_EXTENDED;
 702        err = regcomp(&p->regexp, p->pattern, regflags);
 703        if (err) {
 704                char errbuf[1024];
 705                regerror(err, &p->regexp, errbuf, 1024);
 706                compile_regexp_failed(p, errbuf);
 707        }
 708}
 709
 710static struct grep_expr *compile_pattern_or(struct grep_pat **);
 711static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 712{
 713        struct grep_pat *p;
 714        struct grep_expr *x;
 715
 716        p = *list;
 717        if (!p)
 718                return NULL;
 719        switch (p->token) {
 720        case GREP_PATTERN: /* atom */
 721        case GREP_PATTERN_HEAD:
 722        case GREP_PATTERN_BODY:
 723                x = xcalloc(1, sizeof (struct grep_expr));
 724                x->node = GREP_NODE_ATOM;
 725                x->u.atom = p;
 726                *list = p->next;
 727                return x;
 728        case GREP_OPEN_PAREN:
 729                *list = p->next;
 730                x = compile_pattern_or(list);
 731                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 732                        die("unmatched parenthesis");
 733                *list = (*list)->next;
 734                return x;
 735        default:
 736                return NULL;
 737        }
 738}
 739
 740static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 741{
 742        struct grep_pat *p;
 743        struct grep_expr *x;
 744
 745        p = *list;
 746        if (!p)
 747                return NULL;
 748        switch (p->token) {
 749        case GREP_NOT:
 750                if (!p->next)
 751                        die("--not not followed by pattern expression");
 752                *list = p->next;
 753                x = xcalloc(1, sizeof (struct grep_expr));
 754                x->node = GREP_NODE_NOT;
 755                x->u.unary = compile_pattern_not(list);
 756                if (!x->u.unary)
 757                        die("--not followed by non pattern expression");
 758                return x;
 759        default:
 760                return compile_pattern_atom(list);
 761        }
 762}
 763
 764static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 765{
 766        struct grep_pat *p;
 767        struct grep_expr *x, *y, *z;
 768
 769        x = compile_pattern_not(list);
 770        p = *list;
 771        if (p && p->token == GREP_AND) {
 772                if (!p->next)
 773                        die("--and not followed by pattern expression");
 774                *list = p->next;
 775                y = compile_pattern_and(list);
 776                if (!y)
 777                        die("--and not followed by pattern expression");
 778                z = xcalloc(1, sizeof (struct grep_expr));
 779                z->node = GREP_NODE_AND;
 780                z->u.binary.left = x;
 781                z->u.binary.right = y;
 782                return z;
 783        }
 784        return x;
 785}
 786
 787static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 788{
 789        struct grep_pat *p;
 790        struct grep_expr *x, *y, *z;
 791
 792        x = compile_pattern_and(list);
 793        p = *list;
 794        if (x && p && p->token != GREP_CLOSE_PAREN) {
 795                y = compile_pattern_or(list);
 796                if (!y)
 797                        die("not a pattern expression %s", p->pattern);
 798                z = xcalloc(1, sizeof (struct grep_expr));
 799                z->node = GREP_NODE_OR;
 800                z->u.binary.left = x;
 801                z->u.binary.right = y;
 802                return z;
 803        }
 804        return x;
 805}
 806
 807static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 808{
 809        return compile_pattern_or(list);
 810}
 811
 812static void indent(int in)
 813{
 814        while (in-- > 0)
 815                fputc(' ', stderr);
 816}
 817
 818static void dump_grep_pat(struct grep_pat *p)
 819{
 820        switch (p->token) {
 821        case GREP_AND: fprintf(stderr, "*and*"); break;
 822        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 823        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 824        case GREP_NOT: fprintf(stderr, "*not*"); break;
 825        case GREP_OR: fprintf(stderr, "*or*"); break;
 826
 827        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 828        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 829        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 830        }
 831
 832        switch (p->token) {
 833        default: break;
 834        case GREP_PATTERN_HEAD:
 835                fprintf(stderr, "<head %d>", p->field); break;
 836        case GREP_PATTERN_BODY:
 837                fprintf(stderr, "<body>"); break;
 838        }
 839        switch (p->token) {
 840        default: break;
 841        case GREP_PATTERN_HEAD:
 842        case GREP_PATTERN_BODY:
 843        case GREP_PATTERN:
 844                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 845                break;
 846        }
 847        fputc('\n', stderr);
 848}
 849
 850static void dump_grep_expression_1(struct grep_expr *x, int in)
 851{
 852        indent(in);
 853        switch (x->node) {
 854        case GREP_NODE_TRUE:
 855                fprintf(stderr, "true\n");
 856                break;
 857        case GREP_NODE_ATOM:
 858                dump_grep_pat(x->u.atom);
 859                break;
 860        case GREP_NODE_NOT:
 861                fprintf(stderr, "(not\n");
 862                dump_grep_expression_1(x->u.unary, in+1);
 863                indent(in);
 864                fprintf(stderr, ")\n");
 865                break;
 866        case GREP_NODE_AND:
 867                fprintf(stderr, "(and\n");
 868                dump_grep_expression_1(x->u.binary.left, in+1);
 869                dump_grep_expression_1(x->u.binary.right, in+1);
 870                indent(in);
 871                fprintf(stderr, ")\n");
 872                break;
 873        case GREP_NODE_OR:
 874                fprintf(stderr, "(or\n");
 875                dump_grep_expression_1(x->u.binary.left, in+1);
 876                dump_grep_expression_1(x->u.binary.right, in+1);
 877                indent(in);
 878                fprintf(stderr, ")\n");
 879                break;
 880        }
 881}
 882
 883static void dump_grep_expression(struct grep_opt *opt)
 884{
 885        struct grep_expr *x = opt->pattern_expression;
 886
 887        if (opt->all_match)
 888                fprintf(stderr, "[all-match]\n");
 889        dump_grep_expression_1(x, 0);
 890        fflush(NULL);
 891}
 892
 893static struct grep_expr *grep_true_expr(void)
 894{
 895        struct grep_expr *z = xcalloc(1, sizeof(*z));
 896        z->node = GREP_NODE_TRUE;
 897        return z;
 898}
 899
 900static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 901{
 902        struct grep_expr *z = xcalloc(1, sizeof(*z));
 903        z->node = GREP_NODE_OR;
 904        z->u.binary.left = left;
 905        z->u.binary.right = right;
 906        return z;
 907}
 908
 909static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 910{
 911        struct grep_pat *p;
 912        struct grep_expr *header_expr;
 913        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 914        enum grep_header_field fld;
 915
 916        if (!opt->header_list)
 917                return NULL;
 918
 919        for (p = opt->header_list; p; p = p->next) {
 920                if (p->token != GREP_PATTERN_HEAD)
 921                        BUG("a non-header pattern in grep header list.");
 922                if (p->field < GREP_HEADER_FIELD_MIN ||
 923                    GREP_HEADER_FIELD_MAX <= p->field)
 924                        BUG("unknown header field %d", p->field);
 925                compile_regexp(p, opt);
 926        }
 927
 928        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 929                header_group[fld] = NULL;
 930
 931        for (p = opt->header_list; p; p = p->next) {
 932                struct grep_expr *h;
 933                struct grep_pat *pp = p;
 934
 935                h = compile_pattern_atom(&pp);
 936                if (!h || pp != p->next)
 937                        BUG("malformed header expr");
 938                if (!header_group[p->field]) {
 939                        header_group[p->field] = h;
 940                        continue;
 941                }
 942                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 943        }
 944
 945        header_expr = NULL;
 946
 947        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 948                if (!header_group[fld])
 949                        continue;
 950                if (!header_expr)
 951                        header_expr = grep_true_expr();
 952                header_expr = grep_or_expr(header_group[fld], header_expr);
 953        }
 954        return header_expr;
 955}
 956
 957static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 958{
 959        struct grep_expr *z = x;
 960
 961        while (x) {
 962                assert(x->node == GREP_NODE_OR);
 963                if (x->u.binary.right &&
 964                    x->u.binary.right->node == GREP_NODE_TRUE) {
 965                        x->u.binary.right = y;
 966                        break;
 967                }
 968                x = x->u.binary.right;
 969        }
 970        return z;
 971}
 972
 973static void compile_grep_patterns_real(struct grep_opt *opt)
 974{
 975        struct grep_pat *p;
 976        struct grep_expr *header_expr = prep_header_patterns(opt);
 977
 978        for (p = opt->pattern_list; p; p = p->next) {
 979                switch (p->token) {
 980                case GREP_PATTERN: /* atom */
 981                case GREP_PATTERN_HEAD:
 982                case GREP_PATTERN_BODY:
 983                        compile_regexp(p, opt);
 984                        break;
 985                default:
 986                        opt->extended = 1;
 987                        break;
 988                }
 989        }
 990
 991        if (opt->all_match || header_expr)
 992                opt->extended = 1;
 993        else if (!opt->extended && !opt->debug)
 994                return;
 995
 996        p = opt->pattern_list;
 997        if (p)
 998                opt->pattern_expression = compile_pattern_expr(&p);
 999        if (p)
1000                die("incomplete pattern expression: %s", p->pattern);
1001
1002        if (!header_expr)
1003                return;
1004
1005        if (!opt->pattern_expression)
1006                opt->pattern_expression = header_expr;
1007        else if (opt->all_match)
1008                opt->pattern_expression = grep_splice_or(header_expr,
1009                                                         opt->pattern_expression);
1010        else
1011                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
1012                                                       header_expr);
1013        opt->all_match = 1;
1014}
1015
1016void compile_grep_patterns(struct grep_opt *opt)
1017{
1018        compile_grep_patterns_real(opt);
1019        if (opt->debug)
1020                dump_grep_expression(opt);
1021}
1022
1023static void free_pattern_expr(struct grep_expr *x)
1024{
1025        switch (x->node) {
1026        case GREP_NODE_TRUE:
1027        case GREP_NODE_ATOM:
1028                break;
1029        case GREP_NODE_NOT:
1030                free_pattern_expr(x->u.unary);
1031                break;
1032        case GREP_NODE_AND:
1033        case GREP_NODE_OR:
1034                free_pattern_expr(x->u.binary.left);
1035                free_pattern_expr(x->u.binary.right);
1036                break;
1037        }
1038        free(x);
1039}
1040
1041void free_grep_patterns(struct grep_opt *opt)
1042{
1043        struct grep_pat *p, *n;
1044
1045        for (p = opt->pattern_list; p; p = n) {
1046                n = p->next;
1047                switch (p->token) {
1048                case GREP_PATTERN: /* atom */
1049                case GREP_PATTERN_HEAD:
1050                case GREP_PATTERN_BODY:
1051                        if (p->kws)
1052                                kwsfree(p->kws);
1053                        else if (p->pcre1_regexp)
1054                                free_pcre1_regexp(p);
1055                        else if (p->pcre2_pattern)
1056                                free_pcre2_pattern(p);
1057                        else
1058                                regfree(&p->regexp);
1059                        free(p->pattern);
1060                        break;
1061                default:
1062                        break;
1063                }
1064                free(p);
1065        }
1066
1067        if (!opt->extended)
1068                return;
1069        free_pattern_expr(opt->pattern_expression);
1070}
1071
1072static char *end_of_line(char *cp, unsigned long *left)
1073{
1074        unsigned long l = *left;
1075        while (l && *cp != '\n') {
1076                l--;
1077                cp++;
1078        }
1079        *left = l;
1080        return cp;
1081}
1082
1083static int word_char(char ch)
1084{
1085        return isalnum(ch) || ch == '_';
1086}
1087
1088static void output_color(struct grep_opt *opt, const void *data, size_t size,
1089                         const char *color)
1090{
1091        if (want_color(opt->color) && color && color[0]) {
1092                opt->output(opt, color, strlen(color));
1093                opt->output(opt, data, size);
1094                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1095        } else
1096                opt->output(opt, data, size);
1097}
1098
1099static void output_sep(struct grep_opt *opt, char sign)
1100{
1101        if (opt->null_following_name)
1102                opt->output(opt, "\0", 1);
1103        else
1104                output_color(opt, &sign, 1, opt->color_sep);
1105}
1106
1107static void show_name(struct grep_opt *opt, const char *name)
1108{
1109        output_color(opt, name, strlen(name), opt->color_filename);
1110        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
1111}
1112
1113static int fixmatch(struct grep_pat *p, char *line, char *eol,
1114                    regmatch_t *match)
1115{
1116        struct kwsmatch kwsm;
1117        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1118        if (offset == -1) {
1119                match->rm_so = match->rm_eo = -1;
1120                return REG_NOMATCH;
1121        } else {
1122                match->rm_so = offset;
1123                match->rm_eo = match->rm_so + kwsm.size[0];
1124                return 0;
1125        }
1126}
1127
1128static int patmatch(struct grep_pat *p, char *line, char *eol,
1129                    regmatch_t *match, int eflags)
1130{
1131        int hit;
1132
1133        if (p->fixed)
1134                hit = !fixmatch(p, line, eol, match);
1135        else if (p->pcre1_regexp)
1136                hit = !pcre1match(p, line, eol, match, eflags);
1137        else if (p->pcre2_pattern)
1138                hit = !pcre2match(p, line, eol, match, eflags);
1139        else
1140                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1141                                   eflags);
1142
1143        return hit;
1144}
1145
1146static int strip_timestamp(char *bol, char **eol_p)
1147{
1148        char *eol = *eol_p;
1149        int ch;
1150
1151        while (bol < --eol) {
1152                if (*eol != '>')
1153                        continue;
1154                *eol_p = ++eol;
1155                ch = *eol;
1156                *eol = '\0';
1157                return ch;
1158        }
1159        return 0;
1160}
1161
1162static struct {
1163        const char *field;
1164        size_t len;
1165} header_field[] = {
1166        { "author ", 7 },
1167        { "committer ", 10 },
1168        { "reflog ", 7 },
1169};
1170
1171static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
1172                             enum grep_context ctx,
1173                             regmatch_t *pmatch, int eflags)
1174{
1175        int hit = 0;
1176        int saved_ch = 0;
1177        const char *start = bol;
1178
1179        if ((p->token != GREP_PATTERN) &&
1180            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1181                return 0;
1182
1183        if (p->token == GREP_PATTERN_HEAD) {
1184                const char *field;
1185                size_t len;
1186                assert(p->field < ARRAY_SIZE(header_field));
1187                field = header_field[p->field].field;
1188                len = header_field[p->field].len;
1189                if (strncmp(bol, field, len))
1190                        return 0;
1191                bol += len;
1192                switch (p->field) {
1193                case GREP_HEADER_AUTHOR:
1194                case GREP_HEADER_COMMITTER:
1195                        saved_ch = strip_timestamp(bol, &eol);
1196                        break;
1197                default:
1198                        break;
1199                }
1200        }
1201
1202 again:
1203        hit = patmatch(p, bol, eol, pmatch, eflags);
1204
1205        if (hit && p->word_regexp) {
1206                if ((pmatch[0].rm_so < 0) ||
1207                    (eol - bol) < pmatch[0].rm_so ||
1208                    (pmatch[0].rm_eo < 0) ||
1209                    (eol - bol) < pmatch[0].rm_eo)
1210                        die("regexp returned nonsense");
1211
1212                /* Match beginning must be either beginning of the
1213                 * line, or at word boundary (i.e. the last char must
1214                 * not be a word char).  Similarly, match end must be
1215                 * either end of the line, or at word boundary
1216                 * (i.e. the next char must not be a word char).
1217                 */
1218                if ( ((pmatch[0].rm_so == 0) ||
1219                      !word_char(bol[pmatch[0].rm_so-1])) &&
1220                     ((pmatch[0].rm_eo == (eol-bol)) ||
1221                      !word_char(bol[pmatch[0].rm_eo])) )
1222                        ;
1223                else
1224                        hit = 0;
1225
1226                /* Words consist of at least one character. */
1227                if (pmatch->rm_so == pmatch->rm_eo)
1228                        hit = 0;
1229
1230                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1231                        /* There could be more than one match on the
1232                         * line, and the first match might not be
1233                         * strict word match.  But later ones could be!
1234                         * Forward to the next possible start, i.e. the
1235                         * next position following a non-word char.
1236                         */
1237                        bol = pmatch[0].rm_so + bol + 1;
1238                        while (word_char(bol[-1]) && bol < eol)
1239                                bol++;
1240                        eflags |= REG_NOTBOL;
1241                        if (bol < eol)
1242                                goto again;
1243                }
1244        }
1245        if (p->token == GREP_PATTERN_HEAD && saved_ch)
1246                *eol = saved_ch;
1247        if (hit) {
1248                pmatch[0].rm_so += bol - start;
1249                pmatch[0].rm_eo += bol - start;
1250        }
1251        return hit;
1252}
1253
1254static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol,
1255                           char *eol, enum grep_context ctx, ssize_t *col,
1256                           ssize_t *icol, int collect_hits)
1257{
1258        int h = 0;
1259
1260        if (!x)
1261                die("Not a valid grep expression");
1262        switch (x->node) {
1263        case GREP_NODE_TRUE:
1264                h = 1;
1265                break;
1266        case GREP_NODE_ATOM:
1267                {
1268                        regmatch_t tmp;
1269                        h = match_one_pattern(x->u.atom, bol, eol, ctx,
1270                                              &tmp, 0);
1271                        if (h && (*col < 0 || tmp.rm_so < *col))
1272                                *col = tmp.rm_so;
1273                }
1274                break;
1275        case GREP_NODE_NOT:
1276                /*
1277                 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1278                 */
1279                h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1280                                     0);
1281                break;
1282        case GREP_NODE_AND:
1283                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1284                                    icol, 0);
1285                if (h || opt->columnnum) {
1286                        /*
1287                         * Don't short-circuit AND when given --column, since a
1288                         * NOT earlier in the tree may turn this into an OR. In
1289                         * this case, see the below comment.
1290                         */
1291                        h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1292                                             ctx, col, icol, 0);
1293                }
1294                break;
1295        case GREP_NODE_OR:
1296                if (!(collect_hits || opt->columnnum)) {
1297                        /*
1298                         * Don't short-circuit OR when given --column (or
1299                         * collecting hits) to ensure we don't skip a later
1300                         * child that would produce an earlier match.
1301                         */
1302                        return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1303                                                ctx, col, icol, 0) ||
1304                                match_expr_eval(opt, x->u.binary.right, bol,
1305                                                eol, ctx, col, icol, 0));
1306                }
1307                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1308                                    icol, 0);
1309                if (collect_hits)
1310                        x->u.binary.left->hit |= h;
1311                h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1312                                     icol, collect_hits);
1313                break;
1314        default:
1315                die("Unexpected node type (internal error) %d", x->node);
1316        }
1317        if (collect_hits)
1318                x->hit |= h;
1319        return h;
1320}
1321
1322static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1323                      enum grep_context ctx, ssize_t *col,
1324                      ssize_t *icol, int collect_hits)
1325{
1326        struct grep_expr *x = opt->pattern_expression;
1327        return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1328}
1329
1330static int match_line(struct grep_opt *opt, char *bol, char *eol,
1331                      ssize_t *col, ssize_t *icol,
1332                      enum grep_context ctx, int collect_hits)
1333{
1334        struct grep_pat *p;
1335        int hit = 0;
1336
1337        if (opt->extended)
1338                return match_expr(opt, bol, eol, ctx, col, icol,
1339                                  collect_hits);
1340
1341        /* we do not call with collect_hits without being extended */
1342        for (p = opt->pattern_list; p; p = p->next) {
1343                regmatch_t tmp;
1344                if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1345                        hit |= 1;
1346                        if (!opt->columnnum) {
1347                                /*
1348                                 * Without --column, any single match on a line
1349                                 * is enough to know that it needs to be
1350                                 * printed. With --column, scan _all_ patterns
1351                                 * to find the earliest.
1352                                 */
1353                                break;
1354                        }
1355                        if (*col < 0 || tmp.rm_so < *col)
1356                                *col = tmp.rm_so;
1357                }
1358        }
1359        return hit;
1360}
1361
1362static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1363                              enum grep_context ctx,
1364                              regmatch_t *pmatch, int eflags)
1365{
1366        regmatch_t match;
1367
1368        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1369                return 0;
1370        if (match.rm_so < 0 || match.rm_eo < 0)
1371                return 0;
1372        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1373                if (match.rm_so > pmatch->rm_so)
1374                        return 1;
1375                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1376                        return 1;
1377        }
1378        pmatch->rm_so = match.rm_so;
1379        pmatch->rm_eo = match.rm_eo;
1380        return 1;
1381}
1382
1383static int next_match(struct grep_opt *opt, char *bol, char *eol,
1384                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1385{
1386        struct grep_pat *p;
1387        int hit = 0;
1388
1389        pmatch->rm_so = pmatch->rm_eo = -1;
1390        if (bol < eol) {
1391                for (p = opt->pattern_list; p; p = p->next) {
1392                        switch (p->token) {
1393                        case GREP_PATTERN: /* atom */
1394                        case GREP_PATTERN_HEAD:
1395                        case GREP_PATTERN_BODY:
1396                                hit |= match_next_pattern(p, bol, eol, ctx,
1397                                                          pmatch, eflags);
1398                                break;
1399                        default:
1400                                break;
1401                        }
1402                }
1403        }
1404        return hit;
1405}
1406
1407static void show_line(struct grep_opt *opt, char *bol, char *eol,
1408                      const char *name, unsigned lno, char sign)
1409{
1410        int rest = eol - bol;
1411        const char *match_color, *line_color = NULL;
1412
1413        if (opt->file_break && opt->last_shown == 0) {
1414                if (opt->show_hunk_mark)
1415                        opt->output(opt, "\n", 1);
1416        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1417                if (opt->last_shown == 0) {
1418                        if (opt->show_hunk_mark) {
1419                                output_color(opt, "--", 2, opt->color_sep);
1420                                opt->output(opt, "\n", 1);
1421                        }
1422                } else if (lno > opt->last_shown + 1) {
1423                        output_color(opt, "--", 2, opt->color_sep);
1424                        opt->output(opt, "\n", 1);
1425                }
1426        }
1427        if (opt->heading && opt->last_shown == 0) {
1428                output_color(opt, name, strlen(name), opt->color_filename);
1429                opt->output(opt, "\n", 1);
1430        }
1431        opt->last_shown = lno;
1432
1433        if (!opt->heading && opt->pathname) {
1434                output_color(opt, name, strlen(name), opt->color_filename);
1435                output_sep(opt, sign);
1436        }
1437        if (opt->linenum) {
1438                char buf[32];
1439                xsnprintf(buf, sizeof(buf), "%d", lno);
1440                output_color(opt, buf, strlen(buf), opt->color_lineno);
1441                output_sep(opt, sign);
1442        }
1443        if (opt->color) {
1444                regmatch_t match;
1445                enum grep_context ctx = GREP_CONTEXT_BODY;
1446                int ch = *eol;
1447                int eflags = 0;
1448
1449                if (sign == ':')
1450                        match_color = opt->color_match_selected;
1451                else
1452                        match_color = opt->color_match_context;
1453                if (sign == ':')
1454                        line_color = opt->color_selected;
1455                else if (sign == '-')
1456                        line_color = opt->color_context;
1457                else if (sign == '=')
1458                        line_color = opt->color_function;
1459                *eol = '\0';
1460                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1461                        if (match.rm_so == match.rm_eo)
1462                                break;
1463
1464                        output_color(opt, bol, match.rm_so, line_color);
1465                        output_color(opt, bol + match.rm_so,
1466                                     match.rm_eo - match.rm_so, match_color);
1467                        bol += match.rm_eo;
1468                        rest -= match.rm_eo;
1469                        eflags = REG_NOTBOL;
1470                }
1471                *eol = ch;
1472        }
1473        output_color(opt, bol, rest, line_color);
1474        opt->output(opt, "\n", 1);
1475}
1476
1477#ifndef NO_PTHREADS
1478int grep_use_locks;
1479
1480/*
1481 * This lock protects access to the gitattributes machinery, which is
1482 * not thread-safe.
1483 */
1484pthread_mutex_t grep_attr_mutex;
1485
1486static inline void grep_attr_lock(void)
1487{
1488        if (grep_use_locks)
1489                pthread_mutex_lock(&grep_attr_mutex);
1490}
1491
1492static inline void grep_attr_unlock(void)
1493{
1494        if (grep_use_locks)
1495                pthread_mutex_unlock(&grep_attr_mutex);
1496}
1497
1498/*
1499 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1500 */
1501pthread_mutex_t grep_read_mutex;
1502
1503#else
1504#define grep_attr_lock()
1505#define grep_attr_unlock()
1506#endif
1507
1508static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1509{
1510        xdemitconf_t *xecfg = opt->priv;
1511        if (xecfg && !xecfg->find_func) {
1512                grep_source_load_driver(gs);
1513                if (gs->driver->funcname.pattern) {
1514                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1515                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1516                } else {
1517                        xecfg = opt->priv = NULL;
1518                }
1519        }
1520
1521        if (xecfg) {
1522                char buf[1];
1523                return xecfg->find_func(bol, eol - bol, buf, 1,
1524                                        xecfg->find_func_priv) >= 0;
1525        }
1526
1527        if (bol == eol)
1528                return 0;
1529        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1530                return 1;
1531        return 0;
1532}
1533
1534static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1535                               char *bol, unsigned lno)
1536{
1537        while (bol > gs->buf) {
1538                char *eol = --bol;
1539
1540                while (bol > gs->buf && bol[-1] != '\n')
1541                        bol--;
1542                lno--;
1543
1544                if (lno <= opt->last_shown)
1545                        break;
1546
1547                if (match_funcname(opt, gs, bol, eol)) {
1548                        show_line(opt, bol, eol, gs->name, lno, '=');
1549                        break;
1550                }
1551        }
1552}
1553
1554static int is_empty_line(const char *bol, const char *eol);
1555
1556static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1557                             char *bol, char *end, unsigned lno)
1558{
1559        unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1560        int funcname_needed = !!opt->funcname, comment_needed = 0;
1561
1562        if (opt->pre_context < lno)
1563                from = lno - opt->pre_context;
1564        if (from <= opt->last_shown)
1565                from = opt->last_shown + 1;
1566        orig_from = from;
1567        if (opt->funcbody) {
1568                if (match_funcname(opt, gs, bol, end))
1569                        comment_needed = 1;
1570                else
1571                        funcname_needed = 1;
1572                from = opt->last_shown + 1;
1573        }
1574
1575        /* Rewind. */
1576        while (bol > gs->buf && cur > from) {
1577                char *next_bol = bol;
1578                char *eol = --bol;
1579
1580                while (bol > gs->buf && bol[-1] != '\n')
1581                        bol--;
1582                cur--;
1583                if (comment_needed && (is_empty_line(bol, eol) ||
1584                                       match_funcname(opt, gs, bol, eol))) {
1585                        comment_needed = 0;
1586                        from = orig_from;
1587                        if (cur < from) {
1588                                cur++;
1589                                bol = next_bol;
1590                                break;
1591                        }
1592                }
1593                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1594                        funcname_lno = cur;
1595                        funcname_needed = 0;
1596                        if (opt->funcbody)
1597                                comment_needed = 1;
1598                        else
1599                                from = orig_from;
1600                }
1601        }
1602
1603        /* We need to look even further back to find a function signature. */
1604        if (opt->funcname && funcname_needed)
1605                show_funcname_line(opt, gs, bol, cur);
1606
1607        /* Back forward. */
1608        while (cur < lno) {
1609                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1610
1611                while (*eol != '\n')
1612                        eol++;
1613                show_line(opt, bol, eol, gs->name, cur, sign);
1614                bol = eol + 1;
1615                cur++;
1616        }
1617}
1618
1619static int should_lookahead(struct grep_opt *opt)
1620{
1621        struct grep_pat *p;
1622
1623        if (opt->extended)
1624                return 0; /* punt for too complex stuff */
1625        if (opt->invert)
1626                return 0;
1627        for (p = opt->pattern_list; p; p = p->next) {
1628                if (p->token != GREP_PATTERN)
1629                        return 0; /* punt for "header only" and stuff */
1630        }
1631        return 1;
1632}
1633
1634static int look_ahead(struct grep_opt *opt,
1635                      unsigned long *left_p,
1636                      unsigned *lno_p,
1637                      char **bol_p)
1638{
1639        unsigned lno = *lno_p;
1640        char *bol = *bol_p;
1641        struct grep_pat *p;
1642        char *sp, *last_bol;
1643        regoff_t earliest = -1;
1644
1645        for (p = opt->pattern_list; p; p = p->next) {
1646                int hit;
1647                regmatch_t m;
1648
1649                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1650                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1651                        continue;
1652                if (earliest < 0 || m.rm_so < earliest)
1653                        earliest = m.rm_so;
1654        }
1655
1656        if (earliest < 0) {
1657                *bol_p = bol + *left_p;
1658                *left_p = 0;
1659                return 1;
1660        }
1661        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1662                ; /* find the beginning of the line */
1663        last_bol = sp;
1664
1665        for (sp = bol; sp < last_bol; sp++) {
1666                if (*sp == '\n')
1667                        lno++;
1668        }
1669        *left_p -= last_bol - bol;
1670        *bol_p = last_bol;
1671        *lno_p = lno;
1672        return 0;
1673}
1674
1675static int fill_textconv_grep(struct userdiff_driver *driver,
1676                              struct grep_source *gs)
1677{
1678        struct diff_filespec *df;
1679        char *buf;
1680        size_t size;
1681
1682        if (!driver || !driver->textconv)
1683                return grep_source_load(gs);
1684
1685        /*
1686         * The textconv interface is intimately tied to diff_filespecs, so we
1687         * have to pretend to be one. If we could unify the grep_source
1688         * and diff_filespec structs, this mess could just go away.
1689         */
1690        df = alloc_filespec(gs->path);
1691        switch (gs->type) {
1692        case GREP_SOURCE_OID:
1693                fill_filespec(df, gs->identifier, 1, 0100644);
1694                break;
1695        case GREP_SOURCE_FILE:
1696                fill_filespec(df, &null_oid, 0, 0100644);
1697                break;
1698        default:
1699                BUG("attempt to textconv something without a path?");
1700        }
1701
1702        /*
1703         * fill_textconv is not remotely thread-safe; it may load objects
1704         * behind the scenes, and it modifies the global diff tempfile
1705         * structure.
1706         */
1707        grep_read_lock();
1708        size = fill_textconv(driver, df, &buf);
1709        grep_read_unlock();
1710        free_filespec(df);
1711
1712        /*
1713         * The normal fill_textconv usage by the diff machinery would just keep
1714         * the textconv'd buf separate from the diff_filespec. But much of the
1715         * grep code passes around a grep_source and assumes that its "buf"
1716         * pointer is the beginning of the thing we are searching. So let's
1717         * install our textconv'd version into the grep_source, taking care not
1718         * to leak any existing buffer.
1719         */
1720        grep_source_clear_data(gs);
1721        gs->buf = buf;
1722        gs->size = size;
1723
1724        return 0;
1725}
1726
1727static int is_empty_line(const char *bol, const char *eol)
1728{
1729        while (bol < eol && isspace(*bol))
1730                bol++;
1731        return bol == eol;
1732}
1733
1734static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1735{
1736        char *bol;
1737        char *peek_bol = NULL;
1738        unsigned long left;
1739        unsigned lno = 1;
1740        unsigned last_hit = 0;
1741        int binary_match_only = 0;
1742        unsigned count = 0;
1743        int try_lookahead = 0;
1744        int show_function = 0;
1745        struct userdiff_driver *textconv = NULL;
1746        enum grep_context ctx = GREP_CONTEXT_HEAD;
1747        xdemitconf_t xecfg;
1748
1749        if (!opt->output)
1750                opt->output = std_output;
1751
1752        if (opt->pre_context || opt->post_context || opt->file_break ||
1753            opt->funcbody) {
1754                /* Show hunk marks, except for the first file. */
1755                if (opt->last_shown)
1756                        opt->show_hunk_mark = 1;
1757                /*
1758                 * If we're using threads then we can't easily identify
1759                 * the first file.  Always put hunk marks in that case
1760                 * and skip the very first one later in work_done().
1761                 */
1762                if (opt->output != std_output)
1763                        opt->show_hunk_mark = 1;
1764        }
1765        opt->last_shown = 0;
1766
1767        if (opt->allow_textconv) {
1768                grep_source_load_driver(gs);
1769                /*
1770                 * We might set up the shared textconv cache data here, which
1771                 * is not thread-safe.
1772                 */
1773                grep_attr_lock();
1774                textconv = userdiff_get_textconv(gs->driver);
1775                grep_attr_unlock();
1776        }
1777
1778        /*
1779         * We know the result of a textconv is text, so we only have to care
1780         * about binary handling if we are not using it.
1781         */
1782        if (!textconv) {
1783                switch (opt->binary) {
1784                case GREP_BINARY_DEFAULT:
1785                        if (grep_source_is_binary(gs))
1786                                binary_match_only = 1;
1787                        break;
1788                case GREP_BINARY_NOMATCH:
1789                        if (grep_source_is_binary(gs))
1790                                return 0; /* Assume unmatch */
1791                        break;
1792                case GREP_BINARY_TEXT:
1793                        break;
1794                default:
1795                        BUG("unknown binary handling mode");
1796                }
1797        }
1798
1799        memset(&xecfg, 0, sizeof(xecfg));
1800        opt->priv = &xecfg;
1801
1802        try_lookahead = should_lookahead(opt);
1803
1804        if (fill_textconv_grep(textconv, gs) < 0)
1805                return 0;
1806
1807        bol = gs->buf;
1808        left = gs->size;
1809        while (left) {
1810                char *eol, ch;
1811                int hit;
1812                ssize_t col = -1, icol = -1;
1813
1814                /*
1815                 * look_ahead() skips quickly to the line that possibly
1816                 * has the next hit; don't call it if we need to do
1817                 * something more than just skipping the current line
1818                 * in response to an unmatch for the current line.  E.g.
1819                 * inside a post-context window, we will show the current
1820                 * line as a context around the previous hit when it
1821                 * doesn't hit.
1822                 */
1823                if (try_lookahead
1824                    && !(last_hit
1825                         && (show_function ||
1826                             lno <= last_hit + opt->post_context))
1827                    && look_ahead(opt, &left, &lno, &bol))
1828                        break;
1829                eol = end_of_line(bol, &left);
1830                ch = *eol;
1831                *eol = 0;
1832
1833                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1834                        ctx = GREP_CONTEXT_BODY;
1835
1836                hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1837                *eol = ch;
1838
1839                if (collect_hits)
1840                        goto next_line;
1841
1842                /* "grep -v -e foo -e bla" should list lines
1843                 * that do not have either, so inversion should
1844                 * be done outside.
1845                 */
1846                if (opt->invert)
1847                        hit = !hit;
1848                if (opt->unmatch_name_only) {
1849                        if (hit)
1850                                return 0;
1851                        goto next_line;
1852                }
1853                if (hit) {
1854                        count++;
1855                        if (opt->status_only)
1856                                return 1;
1857                        if (opt->name_only) {
1858                                show_name(opt, gs->name);
1859                                return 1;
1860                        }
1861                        if (opt->count)
1862                                goto next_line;
1863                        if (binary_match_only) {
1864                                opt->output(opt, "Binary file ", 12);
1865                                output_color(opt, gs->name, strlen(gs->name),
1866                                             opt->color_filename);
1867                                opt->output(opt, " matches\n", 9);
1868                                return 1;
1869                        }
1870                        /* Hit at this line.  If we haven't shown the
1871                         * pre-context lines, we would need to show them.
1872                         */
1873                        if (opt->pre_context || opt->funcbody)
1874                                show_pre_context(opt, gs, bol, eol, lno);
1875                        else if (opt->funcname)
1876                                show_funcname_line(opt, gs, bol, lno);
1877                        show_line(opt, bol, eol, gs->name, lno, ':');
1878                        last_hit = lno;
1879                        if (opt->funcbody)
1880                                show_function = 1;
1881                        goto next_line;
1882                }
1883                if (show_function && (!peek_bol || peek_bol < bol)) {
1884                        unsigned long peek_left = left;
1885                        char *peek_eol = eol;
1886
1887                        /*
1888                         * Trailing empty lines are not interesting.
1889                         * Peek past them to see if they belong to the
1890                         * body of the current function.
1891                         */
1892                        peek_bol = bol;
1893                        while (is_empty_line(peek_bol, peek_eol)) {
1894                                peek_bol = peek_eol + 1;
1895                                peek_eol = end_of_line(peek_bol, &peek_left);
1896                        }
1897
1898                        if (match_funcname(opt, gs, peek_bol, peek_eol))
1899                                show_function = 0;
1900                }
1901                if (show_function ||
1902                    (last_hit && lno <= last_hit + opt->post_context)) {
1903                        /* If the last hit is within the post context,
1904                         * we need to show this line.
1905                         */
1906                        show_line(opt, bol, eol, gs->name, lno, '-');
1907                }
1908
1909        next_line:
1910                bol = eol + 1;
1911                if (!left)
1912                        break;
1913                left--;
1914                lno++;
1915        }
1916
1917        if (collect_hits)
1918                return 0;
1919
1920        if (opt->status_only)
1921                return opt->unmatch_name_only;
1922        if (opt->unmatch_name_only) {
1923                /* We did not see any hit, so we want to show this */
1924                show_name(opt, gs->name);
1925                return 1;
1926        }
1927
1928        xdiff_clear_find_func(&xecfg);
1929        opt->priv = NULL;
1930
1931        /* NEEDSWORK:
1932         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1933         * which feels mostly useless but sometimes useful.  Maybe
1934         * make it another option?  For now suppress them.
1935         */
1936        if (opt->count && count) {
1937                char buf[32];
1938                if (opt->pathname) {
1939                        output_color(opt, gs->name, strlen(gs->name),
1940                                     opt->color_filename);
1941                        output_sep(opt, ':');
1942                }
1943                xsnprintf(buf, sizeof(buf), "%u\n", count);
1944                opt->output(opt, buf, strlen(buf));
1945                return 1;
1946        }
1947        return !!last_hit;
1948}
1949
1950static void clr_hit_marker(struct grep_expr *x)
1951{
1952        /* All-hit markers are meaningful only at the very top level
1953         * OR node.
1954         */
1955        while (1) {
1956                x->hit = 0;
1957                if (x->node != GREP_NODE_OR)
1958                        return;
1959                x->u.binary.left->hit = 0;
1960                x = x->u.binary.right;
1961        }
1962}
1963
1964static int chk_hit_marker(struct grep_expr *x)
1965{
1966        /* Top level nodes have hit markers.  See if they all are hits */
1967        while (1) {
1968                if (x->node != GREP_NODE_OR)
1969                        return x->hit;
1970                if (!x->u.binary.left->hit)
1971                        return 0;
1972                x = x->u.binary.right;
1973        }
1974}
1975
1976int grep_source(struct grep_opt *opt, struct grep_source *gs)
1977{
1978        /*
1979         * we do not have to do the two-pass grep when we do not check
1980         * buffer-wide "all-match".
1981         */
1982        if (!opt->all_match)
1983                return grep_source_1(opt, gs, 0);
1984
1985        /* Otherwise the toplevel "or" terms hit a bit differently.
1986         * We first clear hit markers from them.
1987         */
1988        clr_hit_marker(opt->pattern_expression);
1989        grep_source_1(opt, gs, 1);
1990
1991        if (!chk_hit_marker(opt->pattern_expression))
1992                return 0;
1993
1994        return grep_source_1(opt, gs, 0);
1995}
1996
1997int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
1998{
1999        struct grep_source gs;
2000        int r;
2001
2002        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
2003        gs.buf = buf;
2004        gs.size = size;
2005
2006        r = grep_source(opt, &gs);
2007
2008        grep_source_clear(&gs);
2009        return r;
2010}
2011
2012void grep_source_init(struct grep_source *gs, enum grep_source_type type,
2013                      const char *name, const char *path,
2014                      const void *identifier)
2015{
2016        gs->type = type;
2017        gs->name = xstrdup_or_null(name);
2018        gs->path = xstrdup_or_null(path);
2019        gs->buf = NULL;
2020        gs->size = 0;
2021        gs->driver = NULL;
2022
2023        switch (type) {
2024        case GREP_SOURCE_FILE:
2025                gs->identifier = xstrdup(identifier);
2026                break;
2027        case GREP_SOURCE_OID:
2028                gs->identifier = oiddup(identifier);
2029                break;
2030        case GREP_SOURCE_BUF:
2031                gs->identifier = NULL;
2032                break;
2033        }
2034}
2035
2036void grep_source_clear(struct grep_source *gs)
2037{
2038        FREE_AND_NULL(gs->name);
2039        FREE_AND_NULL(gs->path);
2040        FREE_AND_NULL(gs->identifier);
2041        grep_source_clear_data(gs);
2042}
2043
2044void grep_source_clear_data(struct grep_source *gs)
2045{
2046        switch (gs->type) {
2047        case GREP_SOURCE_FILE:
2048        case GREP_SOURCE_OID:
2049                FREE_AND_NULL(gs->buf);
2050                gs->size = 0;
2051                break;
2052        case GREP_SOURCE_BUF:
2053                /* leave user-provided buf intact */
2054                break;
2055        }
2056}
2057
2058static int grep_source_load_oid(struct grep_source *gs)
2059{
2060        enum object_type type;
2061
2062        grep_read_lock();
2063        gs->buf = read_object_file(gs->identifier, &type, &gs->size);
2064        grep_read_unlock();
2065
2066        if (!gs->buf)
2067                return error(_("'%s': unable to read %s"),
2068                             gs->name,
2069                             oid_to_hex(gs->identifier));
2070        return 0;
2071}
2072
2073static int grep_source_load_file(struct grep_source *gs)
2074{
2075        const char *filename = gs->identifier;
2076        struct stat st;
2077        char *data;
2078        size_t size;
2079        int i;
2080
2081        if (lstat(filename, &st) < 0) {
2082        err_ret:
2083                if (errno != ENOENT)
2084                        error_errno(_("failed to stat '%s'"), filename);
2085                return -1;
2086        }
2087        if (!S_ISREG(st.st_mode))
2088                return -1;
2089        size = xsize_t(st.st_size);
2090        i = open(filename, O_RDONLY);
2091        if (i < 0)
2092                goto err_ret;
2093        data = xmallocz(size);
2094        if (st.st_size != read_in_full(i, data, size)) {
2095                error_errno(_("'%s': short read"), filename);
2096                close(i);
2097                free(data);
2098                return -1;
2099        }
2100        close(i);
2101
2102        gs->buf = data;
2103        gs->size = size;
2104        return 0;
2105}
2106
2107static int grep_source_load(struct grep_source *gs)
2108{
2109        if (gs->buf)
2110                return 0;
2111
2112        switch (gs->type) {
2113        case GREP_SOURCE_FILE:
2114                return grep_source_load_file(gs);
2115        case GREP_SOURCE_OID:
2116                return grep_source_load_oid(gs);
2117        case GREP_SOURCE_BUF:
2118                return gs->buf ? 0 : -1;
2119        }
2120        BUG("invalid grep_source type to load");
2121}
2122
2123void grep_source_load_driver(struct grep_source *gs)
2124{
2125        if (gs->driver)
2126                return;
2127
2128        grep_attr_lock();
2129        if (gs->path)
2130                gs->driver = userdiff_find_by_path(gs->path);
2131        if (!gs->driver)
2132                gs->driver = userdiff_find_by_name("default");
2133        grep_attr_unlock();
2134}
2135
2136static int grep_source_is_binary(struct grep_source *gs)
2137{
2138        grep_source_load_driver(gs);
2139        if (gs->driver->binary != -1)
2140                return gs->driver->binary;
2141
2142        if (!grep_source_load(gs))
2143                return buffer_is_binary(gs->buf, gs->size);
2144
2145        return 0;
2146}