grep.con commit sha1-file.c: remove implicit dependency on the_index (58bf2a4)
   1#include "cache.h"
   2#include "config.h"
   3#include "grep.h"
   4#include "object-store.h"
   5#include "userdiff.h"
   6#include "xdiff-interface.h"
   7#include "diff.h"
   8#include "diffcore.h"
   9#include "commit.h"
  10#include "quote.h"
  11#include "help.h"
  12
  13static int grep_source_load(struct grep_source *gs);
  14static int grep_source_is_binary(struct grep_source *gs);
  15
  16static struct grep_opt grep_defaults;
  17
  18static const char *color_grep_slots[] = {
  19        [GREP_COLOR_CONTEXT]        = "context",
  20        [GREP_COLOR_FILENAME]       = "filename",
  21        [GREP_COLOR_FUNCTION]       = "function",
  22        [GREP_COLOR_LINENO]         = "lineNumber",
  23        [GREP_COLOR_COLUMNNO]       = "column",
  24        [GREP_COLOR_MATCH_CONTEXT]  = "matchContext",
  25        [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
  26        [GREP_COLOR_SELECTED]       = "selected",
  27        [GREP_COLOR_SEP]            = "separator",
  28};
  29
  30static void std_output(struct grep_opt *opt, const void *buf, size_t size)
  31{
  32        fwrite(buf, size, 1, stdout);
  33}
  34
  35static void color_set(char *dst, const char *color_bytes)
  36{
  37        xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
  38}
  39
  40/*
  41 * Initialize the grep_defaults template with hardcoded defaults.
  42 * We could let the compiler do this, but without C99 initializers
  43 * the code gets unwieldy and unreadable, so...
  44 */
  45void init_grep_defaults(struct repository *repo)
  46{
  47        struct grep_opt *opt = &grep_defaults;
  48        static int run_once;
  49
  50        if (run_once)
  51                return;
  52        run_once++;
  53
  54        memset(opt, 0, sizeof(*opt));
  55        opt->repo = repo;
  56        opt->relative = 1;
  57        opt->pathname = 1;
  58        opt->max_depth = -1;
  59        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  60        color_set(opt->colors[GREP_COLOR_CONTEXT], "");
  61        color_set(opt->colors[GREP_COLOR_FILENAME], "");
  62        color_set(opt->colors[GREP_COLOR_FUNCTION], "");
  63        color_set(opt->colors[GREP_COLOR_LINENO], "");
  64        color_set(opt->colors[GREP_COLOR_COLUMNNO], "");
  65        color_set(opt->colors[GREP_COLOR_MATCH_CONTEXT], GIT_COLOR_BOLD_RED);
  66        color_set(opt->colors[GREP_COLOR_MATCH_SELECTED], GIT_COLOR_BOLD_RED);
  67        color_set(opt->colors[GREP_COLOR_SELECTED], "");
  68        color_set(opt->colors[GREP_COLOR_SEP], GIT_COLOR_CYAN);
  69        opt->only_matching = 0;
  70        opt->color = -1;
  71        opt->output = std_output;
  72}
  73
  74static int parse_pattern_type_arg(const char *opt, const char *arg)
  75{
  76        if (!strcmp(arg, "default"))
  77                return GREP_PATTERN_TYPE_UNSPECIFIED;
  78        else if (!strcmp(arg, "basic"))
  79                return GREP_PATTERN_TYPE_BRE;
  80        else if (!strcmp(arg, "extended"))
  81                return GREP_PATTERN_TYPE_ERE;
  82        else if (!strcmp(arg, "fixed"))
  83                return GREP_PATTERN_TYPE_FIXED;
  84        else if (!strcmp(arg, "perl"))
  85                return GREP_PATTERN_TYPE_PCRE;
  86        die("bad %s argument: %s", opt, arg);
  87}
  88
  89define_list_config_array_extra(color_grep_slots, {"match"});
  90
  91/*
  92 * Read the configuration file once and store it in
  93 * the grep_defaults template.
  94 */
  95int grep_config(const char *var, const char *value, void *cb)
  96{
  97        struct grep_opt *opt = &grep_defaults;
  98        const char *slot;
  99
 100        if (userdiff_config(var, value) < 0)
 101                return -1;
 102
 103        if (!strcmp(var, "grep.extendedregexp")) {
 104                opt->extended_regexp_option = git_config_bool(var, value);
 105                return 0;
 106        }
 107
 108        if (!strcmp(var, "grep.patterntype")) {
 109                opt->pattern_type_option = parse_pattern_type_arg(var, value);
 110                return 0;
 111        }
 112
 113        if (!strcmp(var, "grep.linenumber")) {
 114                opt->linenum = git_config_bool(var, value);
 115                return 0;
 116        }
 117        if (!strcmp(var, "grep.column")) {
 118                opt->columnnum = git_config_bool(var, value);
 119                return 0;
 120        }
 121
 122        if (!strcmp(var, "grep.fullname")) {
 123                opt->relative = !git_config_bool(var, value);
 124                return 0;
 125        }
 126
 127        if (!strcmp(var, "color.grep"))
 128                opt->color = git_config_colorbool(var, value);
 129        if (!strcmp(var, "color.grep.match")) {
 130                if (grep_config("color.grep.matchcontext", value, cb) < 0)
 131                        return -1;
 132                if (grep_config("color.grep.matchselected", value, cb) < 0)
 133                        return -1;
 134        } else if (skip_prefix(var, "color.grep.", &slot)) {
 135                int i = LOOKUP_CONFIG(color_grep_slots, slot);
 136                char *color;
 137
 138                if (i < 0)
 139                        return -1;
 140                color = opt->colors[i];
 141                if (!value)
 142                        return config_error_nonbool(var);
 143                return color_parse(value, color);
 144        }
 145        return 0;
 146}
 147
 148/*
 149 * Initialize one instance of grep_opt and copy the
 150 * default values from the template we read the configuration
 151 * information in an earlier call to git_config(grep_config).
 152 */
 153void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
 154{
 155        struct grep_opt *def = &grep_defaults;
 156        int i;
 157
 158        memset(opt, 0, sizeof(*opt));
 159        opt->repo = repo;
 160        opt->prefix = prefix;
 161        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 162        opt->pattern_tail = &opt->pattern_list;
 163        opt->header_tail = &opt->header_list;
 164
 165        opt->only_matching = def->only_matching;
 166        opt->color = def->color;
 167        opt->extended_regexp_option = def->extended_regexp_option;
 168        opt->pattern_type_option = def->pattern_type_option;
 169        opt->linenum = def->linenum;
 170        opt->columnnum = def->columnnum;
 171        opt->max_depth = def->max_depth;
 172        opt->pathname = def->pathname;
 173        opt->relative = def->relative;
 174        opt->output = def->output;
 175
 176        for (i = 0; i < NR_GREP_COLORS; i++)
 177                color_set(opt->colors[i], def->colors[i]);
 178}
 179
 180static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 181{
 182        /*
 183         * When committing to the pattern type by setting the relevant
 184         * fields in grep_opt it's generally not necessary to zero out
 185         * the fields we're not choosing, since they won't have been
 186         * set by anything. The extended_regexp_option field is the
 187         * only exception to this.
 188         *
 189         * This is because in the process of parsing grep.patternType
 190         * & grep.extendedRegexp we set opt->pattern_type_option and
 191         * opt->extended_regexp_option, respectively. We then
 192         * internally use opt->extended_regexp_option to see if we're
 193         * compiling an ERE. It must be unset if that's not actually
 194         * the case.
 195         */
 196        if (pattern_type != GREP_PATTERN_TYPE_ERE &&
 197            opt->extended_regexp_option)
 198                opt->extended_regexp_option = 0;
 199
 200        switch (pattern_type) {
 201        case GREP_PATTERN_TYPE_UNSPECIFIED:
 202                /* fall through */
 203
 204        case GREP_PATTERN_TYPE_BRE:
 205                break;
 206
 207        case GREP_PATTERN_TYPE_ERE:
 208                opt->extended_regexp_option = 1;
 209                break;
 210
 211        case GREP_PATTERN_TYPE_FIXED:
 212                opt->fixed = 1;
 213                break;
 214
 215        case GREP_PATTERN_TYPE_PCRE:
 216#ifdef USE_LIBPCRE2
 217                opt->pcre2 = 1;
 218#else
 219                /*
 220                 * It's important that pcre1 always be assigned to
 221                 * even when there's no USE_LIBPCRE* defined. We still
 222                 * call the PCRE stub function, it just dies with
 223                 * "cannot use Perl-compatible regexes[...]".
 224                 */
 225                opt->pcre1 = 1;
 226#endif
 227                break;
 228        }
 229}
 230
 231void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 232{
 233        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 234                grep_set_pattern_type_option(pattern_type, opt);
 235        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 236                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 237        else if (opt->extended_regexp_option)
 238                /*
 239                 * This branch *must* happen after setting from the
 240                 * opt->pattern_type_option above, we don't want
 241                 * grep.extendedRegexp to override grep.patternType!
 242                 */
 243                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 244}
 245
 246static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 247                                        const char *origin, int no,
 248                                        enum grep_pat_token t,
 249                                        enum grep_header_field field)
 250{
 251        struct grep_pat *p = xcalloc(1, sizeof(*p));
 252        p->pattern = xmemdupz(pat, patlen);
 253        p->patternlen = patlen;
 254        p->origin = origin;
 255        p->no = no;
 256        p->token = t;
 257        p->field = field;
 258        return p;
 259}
 260
 261static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 262{
 263        **tail = p;
 264        *tail = &p->next;
 265        p->next = NULL;
 266
 267        switch (p->token) {
 268        case GREP_PATTERN: /* atom */
 269        case GREP_PATTERN_HEAD:
 270        case GREP_PATTERN_BODY:
 271                for (;;) {
 272                        struct grep_pat *new_pat;
 273                        size_t len = 0;
 274                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 275                        while (++len <= p->patternlen) {
 276                                if (*(--cp) == '\n') {
 277                                        nl = cp;
 278                                        break;
 279                                }
 280                        }
 281                        if (!nl)
 282                                break;
 283                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 284                                                  p->no, p->token, p->field);
 285                        new_pat->next = p->next;
 286                        if (!p->next)
 287                                *tail = &new_pat->next;
 288                        p->next = new_pat;
 289                        *nl = '\0';
 290                        p->patternlen -= len;
 291                }
 292                break;
 293        default:
 294                break;
 295        }
 296}
 297
 298void append_header_grep_pattern(struct grep_opt *opt,
 299                                enum grep_header_field field, const char *pat)
 300{
 301        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 302                                             GREP_PATTERN_HEAD, field);
 303        if (field == GREP_HEADER_REFLOG)
 304                opt->use_reflog_filter = 1;
 305        do_append_grep_pat(&opt->header_tail, p);
 306}
 307
 308void append_grep_pattern(struct grep_opt *opt, const char *pat,
 309                         const char *origin, int no, enum grep_pat_token t)
 310{
 311        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 312}
 313
 314void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 315                     const char *origin, int no, enum grep_pat_token t)
 316{
 317        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 318        do_append_grep_pat(&opt->pattern_tail, p);
 319}
 320
 321struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 322{
 323        struct grep_pat *pat;
 324        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 325        *ret = *opt;
 326
 327        ret->pattern_list = NULL;
 328        ret->pattern_tail = &ret->pattern_list;
 329
 330        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 331        {
 332                if(pat->token == GREP_PATTERN_HEAD)
 333                        append_header_grep_pattern(ret, pat->field,
 334                                                   pat->pattern);
 335                else
 336                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 337                                        pat->origin, pat->no, pat->token);
 338        }
 339
 340        return ret;
 341}
 342
 343static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 344                const char *error)
 345{
 346        char where[1024];
 347
 348        if (p->no)
 349                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 350        else if (p->origin)
 351                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 352        else
 353                where[0] = 0;
 354
 355        die("%s'%s': %s", where, p->pattern, error);
 356}
 357
 358static int is_fixed(const char *s, size_t len)
 359{
 360        size_t i;
 361
 362        for (i = 0; i < len; i++) {
 363                if (is_regex_special(s[i]))
 364                        return 0;
 365        }
 366
 367        return 1;
 368}
 369
 370static int has_null(const char *s, size_t len)
 371{
 372        /*
 373         * regcomp cannot accept patterns with NULs so when using it
 374         * we consider any pattern containing a NUL fixed.
 375         */
 376        if (memchr(s, 0, len))
 377                return 1;
 378
 379        return 0;
 380}
 381
 382#ifdef USE_LIBPCRE1
 383static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 384{
 385        const char *error;
 386        int erroffset;
 387        int options = PCRE_MULTILINE;
 388
 389        if (opt->ignore_case) {
 390                if (has_non_ascii(p->pattern))
 391                        p->pcre1_tables = pcre_maketables();
 392                options |= PCRE_CASELESS;
 393        }
 394        if (is_utf8_locale() && has_non_ascii(p->pattern))
 395                options |= PCRE_UTF8;
 396
 397        p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 398                                      p->pcre1_tables);
 399        if (!p->pcre1_regexp)
 400                compile_regexp_failed(p, error);
 401
 402        p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
 403        if (!p->pcre1_extra_info && error)
 404                die("%s", error);
 405
 406#ifdef GIT_PCRE1_USE_JIT
 407        pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 408        if (p->pcre1_jit_on == 1) {
 409                p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
 410                if (!p->pcre1_jit_stack)
 411                        die("Couldn't allocate PCRE JIT stack");
 412                pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
 413        } else if (p->pcre1_jit_on != 0) {
 414                BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
 415                    p->pcre1_jit_on);
 416        }
 417#endif
 418}
 419
 420static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 421                regmatch_t *match, int eflags)
 422{
 423        int ovector[30], ret, flags = 0;
 424
 425        if (eflags & REG_NOTBOL)
 426                flags |= PCRE_NOTBOL;
 427
 428#ifdef GIT_PCRE1_USE_JIT
 429        if (p->pcre1_jit_on) {
 430                ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 431                                    eol - line, 0, flags, ovector,
 432                                    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
 433        } else
 434#endif
 435        {
 436                ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 437                                eol - line, 0, flags, ovector,
 438                                ARRAY_SIZE(ovector));
 439        }
 440
 441        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 442                die("pcre_exec failed with error code %d", ret);
 443        if (ret > 0) {
 444                ret = 0;
 445                match->rm_so = ovector[0];
 446                match->rm_eo = ovector[1];
 447        }
 448
 449        return ret;
 450}
 451
 452static void free_pcre1_regexp(struct grep_pat *p)
 453{
 454        pcre_free(p->pcre1_regexp);
 455#ifdef GIT_PCRE1_USE_JIT
 456        if (p->pcre1_jit_on) {
 457                pcre_free_study(p->pcre1_extra_info);
 458                pcre_jit_stack_free(p->pcre1_jit_stack);
 459        } else
 460#endif
 461        {
 462                pcre_free(p->pcre1_extra_info);
 463        }
 464        pcre_free((void *)p->pcre1_tables);
 465}
 466#else /* !USE_LIBPCRE1 */
 467static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 468{
 469        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 470}
 471
 472static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 473                regmatch_t *match, int eflags)
 474{
 475        return 1;
 476}
 477
 478static void free_pcre1_regexp(struct grep_pat *p)
 479{
 480}
 481#endif /* !USE_LIBPCRE1 */
 482
 483#ifdef USE_LIBPCRE2
 484static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 485{
 486        int error;
 487        PCRE2_UCHAR errbuf[256];
 488        PCRE2_SIZE erroffset;
 489        int options = PCRE2_MULTILINE;
 490        const uint8_t *character_tables = NULL;
 491        int jitret;
 492        int patinforet;
 493        size_t jitsizearg;
 494
 495        assert(opt->pcre2);
 496
 497        p->pcre2_compile_context = NULL;
 498
 499        if (opt->ignore_case) {
 500                if (has_non_ascii(p->pattern)) {
 501                        character_tables = pcre2_maketables(NULL);
 502                        p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 503                        pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 504                }
 505                options |= PCRE2_CASELESS;
 506        }
 507        if (is_utf8_locale() && has_non_ascii(p->pattern))
 508                options |= PCRE2_UTF;
 509
 510        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
 511                                         p->patternlen, options, &error, &erroffset,
 512                                         p->pcre2_compile_context);
 513
 514        if (p->pcre2_pattern) {
 515                p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
 516                if (!p->pcre2_match_data)
 517                        die("Couldn't allocate PCRE2 match data");
 518        } else {
 519                pcre2_get_error_message(error, errbuf, sizeof(errbuf));
 520                compile_regexp_failed(p, (const char *)&errbuf);
 521        }
 522
 523        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
 524        if (p->pcre2_jit_on == 1) {
 525                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 526                if (jitret)
 527                        die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
 528
 529                /*
 530                 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
 531                 * tells us whether the library itself supports JIT,
 532                 * but to see whether we're going to be actually using
 533                 * JIT we need to extract PCRE2_INFO_JITSIZE from the
 534                 * pattern *after* we do pcre2_jit_compile() above.
 535                 *
 536                 * This is because if the pattern contains the
 537                 * (*NO_JIT) verb (see pcre2syntax(3))
 538                 * pcre2_jit_compile() will exit early with 0. If we
 539                 * then proceed to call pcre2_jit_match() further down
 540                 * the line instead of pcre2_match() we'll either
 541                 * segfault (pre PCRE 10.31) or run into a fatal error
 542                 * (post PCRE2 10.31)
 543                 */
 544                patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
 545                if (patinforet)
 546                        BUG("pcre2_pattern_info() failed: %d", patinforet);
 547                if (jitsizearg == 0) {
 548                        p->pcre2_jit_on = 0;
 549                        return;
 550                }
 551
 552                p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
 553                if (!p->pcre2_jit_stack)
 554                        die("Couldn't allocate PCRE2 JIT stack");
 555                p->pcre2_match_context = pcre2_match_context_create(NULL);
 556                if (!p->pcre2_match_context)
 557                        die("Couldn't allocate PCRE2 match context");
 558                pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
 559        } else if (p->pcre2_jit_on != 0) {
 560                BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
 561                    p->pcre1_jit_on);
 562        }
 563}
 564
 565static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 566                regmatch_t *match, int eflags)
 567{
 568        int ret, flags = 0;
 569        PCRE2_SIZE *ovector;
 570        PCRE2_UCHAR errbuf[256];
 571
 572        if (eflags & REG_NOTBOL)
 573                flags |= PCRE2_NOTBOL;
 574
 575        if (p->pcre2_jit_on)
 576                ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
 577                                      eol - line, 0, flags, p->pcre2_match_data,
 578                                      NULL);
 579        else
 580                ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
 581                                  eol - line, 0, flags, p->pcre2_match_data,
 582                                  NULL);
 583
 584        if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
 585                pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
 586                die("%s failed with error code %d: %s",
 587                    (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
 588                    errbuf);
 589        }
 590        if (ret > 0) {
 591                ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
 592                ret = 0;
 593                match->rm_so = (int)ovector[0];
 594                match->rm_eo = (int)ovector[1];
 595        }
 596
 597        return ret;
 598}
 599
 600static void free_pcre2_pattern(struct grep_pat *p)
 601{
 602        pcre2_compile_context_free(p->pcre2_compile_context);
 603        pcre2_code_free(p->pcre2_pattern);
 604        pcre2_match_data_free(p->pcre2_match_data);
 605        pcre2_jit_stack_free(p->pcre2_jit_stack);
 606        pcre2_match_context_free(p->pcre2_match_context);
 607}
 608#else /* !USE_LIBPCRE2 */
 609static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 610{
 611        /*
 612         * Unreachable until USE_LIBPCRE2 becomes synonymous with
 613         * USE_LIBPCRE. See the sibling comment in
 614         * grep_set_pattern_type_option().
 615         */
 616        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 617}
 618
 619static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 620                regmatch_t *match, int eflags)
 621{
 622        return 1;
 623}
 624
 625static void free_pcre2_pattern(struct grep_pat *p)
 626{
 627}
 628#endif /* !USE_LIBPCRE2 */
 629
 630static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 631{
 632        struct strbuf sb = STRBUF_INIT;
 633        int err;
 634        int regflags = 0;
 635
 636        basic_regex_quote_buf(&sb, p->pattern);
 637        if (opt->ignore_case)
 638                regflags |= REG_ICASE;
 639        err = regcomp(&p->regexp, sb.buf, regflags);
 640        if (opt->debug)
 641                fprintf(stderr, "fixed %s\n", sb.buf);
 642        strbuf_release(&sb);
 643        if (err) {
 644                char errbuf[1024];
 645                regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 646                compile_regexp_failed(p, errbuf);
 647        }
 648}
 649
 650static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 651{
 652        int ascii_only;
 653        int err;
 654        int regflags = REG_NEWLINE;
 655
 656        p->word_regexp = opt->word_regexp;
 657        p->ignore_case = opt->ignore_case;
 658        ascii_only     = !has_non_ascii(p->pattern);
 659
 660        /*
 661         * Even when -F (fixed) asks us to do a non-regexp search, we
 662         * may not be able to correctly case-fold when -i
 663         * (ignore-case) is asked (in which case, we'll synthesize a
 664         * regexp to match the pattern that matches regexp special
 665         * characters literally, while ignoring case differences).  On
 666         * the other hand, even without -F, if the pattern does not
 667         * have any regexp special characters and there is no need for
 668         * case-folding search, we can internally turn it into a
 669         * simple string match using kws.  p->fixed tells us if we
 670         * want to use kws.
 671         */
 672        if (opt->fixed ||
 673            has_null(p->pattern, p->patternlen) ||
 674            is_fixed(p->pattern, p->patternlen))
 675                p->fixed = !p->ignore_case || ascii_only;
 676
 677        if (p->fixed) {
 678                p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
 679                kwsincr(p->kws, p->pattern, p->patternlen);
 680                kwsprep(p->kws);
 681                return;
 682        } else if (opt->fixed) {
 683                /*
 684                 * We come here when the pattern has the non-ascii
 685                 * characters we cannot case-fold, and asked to
 686                 * ignore-case.
 687                 */
 688                compile_fixed_regexp(p, opt);
 689                return;
 690        }
 691
 692        if (opt->pcre2) {
 693                compile_pcre2_pattern(p, opt);
 694                return;
 695        }
 696
 697        if (opt->pcre1) {
 698                compile_pcre1_regexp(p, opt);
 699                return;
 700        }
 701
 702        if (p->ignore_case)
 703                regflags |= REG_ICASE;
 704        if (opt->extended_regexp_option)
 705                regflags |= REG_EXTENDED;
 706        err = regcomp(&p->regexp, p->pattern, regflags);
 707        if (err) {
 708                char errbuf[1024];
 709                regerror(err, &p->regexp, errbuf, 1024);
 710                compile_regexp_failed(p, errbuf);
 711        }
 712}
 713
 714static struct grep_expr *compile_pattern_or(struct grep_pat **);
 715static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 716{
 717        struct grep_pat *p;
 718        struct grep_expr *x;
 719
 720        p = *list;
 721        if (!p)
 722                return NULL;
 723        switch (p->token) {
 724        case GREP_PATTERN: /* atom */
 725        case GREP_PATTERN_HEAD:
 726        case GREP_PATTERN_BODY:
 727                x = xcalloc(1, sizeof (struct grep_expr));
 728                x->node = GREP_NODE_ATOM;
 729                x->u.atom = p;
 730                *list = p->next;
 731                return x;
 732        case GREP_OPEN_PAREN:
 733                *list = p->next;
 734                x = compile_pattern_or(list);
 735                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 736                        die("unmatched parenthesis");
 737                *list = (*list)->next;
 738                return x;
 739        default:
 740                return NULL;
 741        }
 742}
 743
 744static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 745{
 746        struct grep_pat *p;
 747        struct grep_expr *x;
 748
 749        p = *list;
 750        if (!p)
 751                return NULL;
 752        switch (p->token) {
 753        case GREP_NOT:
 754                if (!p->next)
 755                        die("--not not followed by pattern expression");
 756                *list = p->next;
 757                x = xcalloc(1, sizeof (struct grep_expr));
 758                x->node = GREP_NODE_NOT;
 759                x->u.unary = compile_pattern_not(list);
 760                if (!x->u.unary)
 761                        die("--not followed by non pattern expression");
 762                return x;
 763        default:
 764                return compile_pattern_atom(list);
 765        }
 766}
 767
 768static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 769{
 770        struct grep_pat *p;
 771        struct grep_expr *x, *y, *z;
 772
 773        x = compile_pattern_not(list);
 774        p = *list;
 775        if (p && p->token == GREP_AND) {
 776                if (!p->next)
 777                        die("--and not followed by pattern expression");
 778                *list = p->next;
 779                y = compile_pattern_and(list);
 780                if (!y)
 781                        die("--and not followed by pattern expression");
 782                z = xcalloc(1, sizeof (struct grep_expr));
 783                z->node = GREP_NODE_AND;
 784                z->u.binary.left = x;
 785                z->u.binary.right = y;
 786                return z;
 787        }
 788        return x;
 789}
 790
 791static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 792{
 793        struct grep_pat *p;
 794        struct grep_expr *x, *y, *z;
 795
 796        x = compile_pattern_and(list);
 797        p = *list;
 798        if (x && p && p->token != GREP_CLOSE_PAREN) {
 799                y = compile_pattern_or(list);
 800                if (!y)
 801                        die("not a pattern expression %s", p->pattern);
 802                z = xcalloc(1, sizeof (struct grep_expr));
 803                z->node = GREP_NODE_OR;
 804                z->u.binary.left = x;
 805                z->u.binary.right = y;
 806                return z;
 807        }
 808        return x;
 809}
 810
 811static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 812{
 813        return compile_pattern_or(list);
 814}
 815
 816static void indent(int in)
 817{
 818        while (in-- > 0)
 819                fputc(' ', stderr);
 820}
 821
 822static void dump_grep_pat(struct grep_pat *p)
 823{
 824        switch (p->token) {
 825        case GREP_AND: fprintf(stderr, "*and*"); break;
 826        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 827        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 828        case GREP_NOT: fprintf(stderr, "*not*"); break;
 829        case GREP_OR: fprintf(stderr, "*or*"); break;
 830
 831        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 832        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 833        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 834        }
 835
 836        switch (p->token) {
 837        default: break;
 838        case GREP_PATTERN_HEAD:
 839                fprintf(stderr, "<head %d>", p->field); break;
 840        case GREP_PATTERN_BODY:
 841                fprintf(stderr, "<body>"); break;
 842        }
 843        switch (p->token) {
 844        default: break;
 845        case GREP_PATTERN_HEAD:
 846        case GREP_PATTERN_BODY:
 847        case GREP_PATTERN:
 848                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 849                break;
 850        }
 851        fputc('\n', stderr);
 852}
 853
 854static void dump_grep_expression_1(struct grep_expr *x, int in)
 855{
 856        indent(in);
 857        switch (x->node) {
 858        case GREP_NODE_TRUE:
 859                fprintf(stderr, "true\n");
 860                break;
 861        case GREP_NODE_ATOM:
 862                dump_grep_pat(x->u.atom);
 863                break;
 864        case GREP_NODE_NOT:
 865                fprintf(stderr, "(not\n");
 866                dump_grep_expression_1(x->u.unary, in+1);
 867                indent(in);
 868                fprintf(stderr, ")\n");
 869                break;
 870        case GREP_NODE_AND:
 871                fprintf(stderr, "(and\n");
 872                dump_grep_expression_1(x->u.binary.left, in+1);
 873                dump_grep_expression_1(x->u.binary.right, in+1);
 874                indent(in);
 875                fprintf(stderr, ")\n");
 876                break;
 877        case GREP_NODE_OR:
 878                fprintf(stderr, "(or\n");
 879                dump_grep_expression_1(x->u.binary.left, in+1);
 880                dump_grep_expression_1(x->u.binary.right, in+1);
 881                indent(in);
 882                fprintf(stderr, ")\n");
 883                break;
 884        }
 885}
 886
 887static void dump_grep_expression(struct grep_opt *opt)
 888{
 889        struct grep_expr *x = opt->pattern_expression;
 890
 891        if (opt->all_match)
 892                fprintf(stderr, "[all-match]\n");
 893        dump_grep_expression_1(x, 0);
 894        fflush(NULL);
 895}
 896
 897static struct grep_expr *grep_true_expr(void)
 898{
 899        struct grep_expr *z = xcalloc(1, sizeof(*z));
 900        z->node = GREP_NODE_TRUE;
 901        return z;
 902}
 903
 904static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 905{
 906        struct grep_expr *z = xcalloc(1, sizeof(*z));
 907        z->node = GREP_NODE_OR;
 908        z->u.binary.left = left;
 909        z->u.binary.right = right;
 910        return z;
 911}
 912
 913static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 914{
 915        struct grep_pat *p;
 916        struct grep_expr *header_expr;
 917        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 918        enum grep_header_field fld;
 919
 920        if (!opt->header_list)
 921                return NULL;
 922
 923        for (p = opt->header_list; p; p = p->next) {
 924                if (p->token != GREP_PATTERN_HEAD)
 925                        BUG("a non-header pattern in grep header list.");
 926                if (p->field < GREP_HEADER_FIELD_MIN ||
 927                    GREP_HEADER_FIELD_MAX <= p->field)
 928                        BUG("unknown header field %d", p->field);
 929                compile_regexp(p, opt);
 930        }
 931
 932        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 933                header_group[fld] = NULL;
 934
 935        for (p = opt->header_list; p; p = p->next) {
 936                struct grep_expr *h;
 937                struct grep_pat *pp = p;
 938
 939                h = compile_pattern_atom(&pp);
 940                if (!h || pp != p->next)
 941                        BUG("malformed header expr");
 942                if (!header_group[p->field]) {
 943                        header_group[p->field] = h;
 944                        continue;
 945                }
 946                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 947        }
 948
 949        header_expr = NULL;
 950
 951        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 952                if (!header_group[fld])
 953                        continue;
 954                if (!header_expr)
 955                        header_expr = grep_true_expr();
 956                header_expr = grep_or_expr(header_group[fld], header_expr);
 957        }
 958        return header_expr;
 959}
 960
 961static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 962{
 963        struct grep_expr *z = x;
 964
 965        while (x) {
 966                assert(x->node == GREP_NODE_OR);
 967                if (x->u.binary.right &&
 968                    x->u.binary.right->node == GREP_NODE_TRUE) {
 969                        x->u.binary.right = y;
 970                        break;
 971                }
 972                x = x->u.binary.right;
 973        }
 974        return z;
 975}
 976
 977static void compile_grep_patterns_real(struct grep_opt *opt)
 978{
 979        struct grep_pat *p;
 980        struct grep_expr *header_expr = prep_header_patterns(opt);
 981
 982        for (p = opt->pattern_list; p; p = p->next) {
 983                switch (p->token) {
 984                case GREP_PATTERN: /* atom */
 985                case GREP_PATTERN_HEAD:
 986                case GREP_PATTERN_BODY:
 987                        compile_regexp(p, opt);
 988                        break;
 989                default:
 990                        opt->extended = 1;
 991                        break;
 992                }
 993        }
 994
 995        if (opt->all_match || header_expr)
 996                opt->extended = 1;
 997        else if (!opt->extended && !opt->debug)
 998                return;
 999
1000        p = opt->pattern_list;
1001        if (p)
1002                opt->pattern_expression = compile_pattern_expr(&p);
1003        if (p)
1004                die("incomplete pattern expression: %s", p->pattern);
1005
1006        if (!header_expr)
1007                return;
1008
1009        if (!opt->pattern_expression)
1010                opt->pattern_expression = header_expr;
1011        else if (opt->all_match)
1012                opt->pattern_expression = grep_splice_or(header_expr,
1013                                                         opt->pattern_expression);
1014        else
1015                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
1016                                                       header_expr);
1017        opt->all_match = 1;
1018}
1019
1020void compile_grep_patterns(struct grep_opt *opt)
1021{
1022        compile_grep_patterns_real(opt);
1023        if (opt->debug)
1024                dump_grep_expression(opt);
1025}
1026
1027static void free_pattern_expr(struct grep_expr *x)
1028{
1029        switch (x->node) {
1030        case GREP_NODE_TRUE:
1031        case GREP_NODE_ATOM:
1032                break;
1033        case GREP_NODE_NOT:
1034                free_pattern_expr(x->u.unary);
1035                break;
1036        case GREP_NODE_AND:
1037        case GREP_NODE_OR:
1038                free_pattern_expr(x->u.binary.left);
1039                free_pattern_expr(x->u.binary.right);
1040                break;
1041        }
1042        free(x);
1043}
1044
1045void free_grep_patterns(struct grep_opt *opt)
1046{
1047        struct grep_pat *p, *n;
1048
1049        for (p = opt->pattern_list; p; p = n) {
1050                n = p->next;
1051                switch (p->token) {
1052                case GREP_PATTERN: /* atom */
1053                case GREP_PATTERN_HEAD:
1054                case GREP_PATTERN_BODY:
1055                        if (p->kws)
1056                                kwsfree(p->kws);
1057                        else if (p->pcre1_regexp)
1058                                free_pcre1_regexp(p);
1059                        else if (p->pcre2_pattern)
1060                                free_pcre2_pattern(p);
1061                        else
1062                                regfree(&p->regexp);
1063                        free(p->pattern);
1064                        break;
1065                default:
1066                        break;
1067                }
1068                free(p);
1069        }
1070
1071        if (!opt->extended)
1072                return;
1073        free_pattern_expr(opt->pattern_expression);
1074}
1075
1076static char *end_of_line(char *cp, unsigned long *left)
1077{
1078        unsigned long l = *left;
1079        while (l && *cp != '\n') {
1080                l--;
1081                cp++;
1082        }
1083        *left = l;
1084        return cp;
1085}
1086
1087static int word_char(char ch)
1088{
1089        return isalnum(ch) || ch == '_';
1090}
1091
1092static void output_color(struct grep_opt *opt, const void *data, size_t size,
1093                         const char *color)
1094{
1095        if (want_color(opt->color) && color && color[0]) {
1096                opt->output(opt, color, strlen(color));
1097                opt->output(opt, data, size);
1098                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1099        } else
1100                opt->output(opt, data, size);
1101}
1102
1103static void output_sep(struct grep_opt *opt, char sign)
1104{
1105        if (opt->null_following_name)
1106                opt->output(opt, "\0", 1);
1107        else
1108                output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
1109}
1110
1111static void show_name(struct grep_opt *opt, const char *name)
1112{
1113        output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1114        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
1115}
1116
1117static int fixmatch(struct grep_pat *p, char *line, char *eol,
1118                    regmatch_t *match)
1119{
1120        struct kwsmatch kwsm;
1121        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1122        if (offset == -1) {
1123                match->rm_so = match->rm_eo = -1;
1124                return REG_NOMATCH;
1125        } else {
1126                match->rm_so = offset;
1127                match->rm_eo = match->rm_so + kwsm.size[0];
1128                return 0;
1129        }
1130}
1131
1132static int patmatch(struct grep_pat *p, char *line, char *eol,
1133                    regmatch_t *match, int eflags)
1134{
1135        int hit;
1136
1137        if (p->fixed)
1138                hit = !fixmatch(p, line, eol, match);
1139        else if (p->pcre1_regexp)
1140                hit = !pcre1match(p, line, eol, match, eflags);
1141        else if (p->pcre2_pattern)
1142                hit = !pcre2match(p, line, eol, match, eflags);
1143        else
1144                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1145                                   eflags);
1146
1147        return hit;
1148}
1149
1150static int strip_timestamp(char *bol, char **eol_p)
1151{
1152        char *eol = *eol_p;
1153        int ch;
1154
1155        while (bol < --eol) {
1156                if (*eol != '>')
1157                        continue;
1158                *eol_p = ++eol;
1159                ch = *eol;
1160                *eol = '\0';
1161                return ch;
1162        }
1163        return 0;
1164}
1165
1166static struct {
1167        const char *field;
1168        size_t len;
1169} header_field[] = {
1170        { "author ", 7 },
1171        { "committer ", 10 },
1172        { "reflog ", 7 },
1173};
1174
1175static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
1176                             enum grep_context ctx,
1177                             regmatch_t *pmatch, int eflags)
1178{
1179        int hit = 0;
1180        int saved_ch = 0;
1181        const char *start = bol;
1182
1183        if ((p->token != GREP_PATTERN) &&
1184            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1185                return 0;
1186
1187        if (p->token == GREP_PATTERN_HEAD) {
1188                const char *field;
1189                size_t len;
1190                assert(p->field < ARRAY_SIZE(header_field));
1191                field = header_field[p->field].field;
1192                len = header_field[p->field].len;
1193                if (strncmp(bol, field, len))
1194                        return 0;
1195                bol += len;
1196                switch (p->field) {
1197                case GREP_HEADER_AUTHOR:
1198                case GREP_HEADER_COMMITTER:
1199                        saved_ch = strip_timestamp(bol, &eol);
1200                        break;
1201                default:
1202                        break;
1203                }
1204        }
1205
1206 again:
1207        hit = patmatch(p, bol, eol, pmatch, eflags);
1208
1209        if (hit && p->word_regexp) {
1210                if ((pmatch[0].rm_so < 0) ||
1211                    (eol - bol) < pmatch[0].rm_so ||
1212                    (pmatch[0].rm_eo < 0) ||
1213                    (eol - bol) < pmatch[0].rm_eo)
1214                        die("regexp returned nonsense");
1215
1216                /* Match beginning must be either beginning of the
1217                 * line, or at word boundary (i.e. the last char must
1218                 * not be a word char).  Similarly, match end must be
1219                 * either end of the line, or at word boundary
1220                 * (i.e. the next char must not be a word char).
1221                 */
1222                if ( ((pmatch[0].rm_so == 0) ||
1223                      !word_char(bol[pmatch[0].rm_so-1])) &&
1224                     ((pmatch[0].rm_eo == (eol-bol)) ||
1225                      !word_char(bol[pmatch[0].rm_eo])) )
1226                        ;
1227                else
1228                        hit = 0;
1229
1230                /* Words consist of at least one character. */
1231                if (pmatch->rm_so == pmatch->rm_eo)
1232                        hit = 0;
1233
1234                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1235                        /* There could be more than one match on the
1236                         * line, and the first match might not be
1237                         * strict word match.  But later ones could be!
1238                         * Forward to the next possible start, i.e. the
1239                         * next position following a non-word char.
1240                         */
1241                        bol = pmatch[0].rm_so + bol + 1;
1242                        while (word_char(bol[-1]) && bol < eol)
1243                                bol++;
1244                        eflags |= REG_NOTBOL;
1245                        if (bol < eol)
1246                                goto again;
1247                }
1248        }
1249        if (p->token == GREP_PATTERN_HEAD && saved_ch)
1250                *eol = saved_ch;
1251        if (hit) {
1252                pmatch[0].rm_so += bol - start;
1253                pmatch[0].rm_eo += bol - start;
1254        }
1255        return hit;
1256}
1257
1258static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol,
1259                           char *eol, enum grep_context ctx, ssize_t *col,
1260                           ssize_t *icol, int collect_hits)
1261{
1262        int h = 0;
1263
1264        if (!x)
1265                die("Not a valid grep expression");
1266        switch (x->node) {
1267        case GREP_NODE_TRUE:
1268                h = 1;
1269                break;
1270        case GREP_NODE_ATOM:
1271                {
1272                        regmatch_t tmp;
1273                        h = match_one_pattern(x->u.atom, bol, eol, ctx,
1274                                              &tmp, 0);
1275                        if (h && (*col < 0 || tmp.rm_so < *col))
1276                                *col = tmp.rm_so;
1277                }
1278                break;
1279        case GREP_NODE_NOT:
1280                /*
1281                 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1282                 */
1283                h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1284                                     0);
1285                break;
1286        case GREP_NODE_AND:
1287                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1288                                    icol, 0);
1289                if (h || opt->columnnum) {
1290                        /*
1291                         * Don't short-circuit AND when given --column, since a
1292                         * NOT earlier in the tree may turn this into an OR. In
1293                         * this case, see the below comment.
1294                         */
1295                        h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1296                                             ctx, col, icol, 0);
1297                }
1298                break;
1299        case GREP_NODE_OR:
1300                if (!(collect_hits || opt->columnnum)) {
1301                        /*
1302                         * Don't short-circuit OR when given --column (or
1303                         * collecting hits) to ensure we don't skip a later
1304                         * child that would produce an earlier match.
1305                         */
1306                        return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1307                                                ctx, col, icol, 0) ||
1308                                match_expr_eval(opt, x->u.binary.right, bol,
1309                                                eol, ctx, col, icol, 0));
1310                }
1311                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1312                                    icol, 0);
1313                if (collect_hits)
1314                        x->u.binary.left->hit |= h;
1315                h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1316                                     icol, collect_hits);
1317                break;
1318        default:
1319                die("Unexpected node type (internal error) %d", x->node);
1320        }
1321        if (collect_hits)
1322                x->hit |= h;
1323        return h;
1324}
1325
1326static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1327                      enum grep_context ctx, ssize_t *col,
1328                      ssize_t *icol, int collect_hits)
1329{
1330        struct grep_expr *x = opt->pattern_expression;
1331        return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1332}
1333
1334static int match_line(struct grep_opt *opt, char *bol, char *eol,
1335                      ssize_t *col, ssize_t *icol,
1336                      enum grep_context ctx, int collect_hits)
1337{
1338        struct grep_pat *p;
1339        int hit = 0;
1340
1341        if (opt->extended)
1342                return match_expr(opt, bol, eol, ctx, col, icol,
1343                                  collect_hits);
1344
1345        /* we do not call with collect_hits without being extended */
1346        for (p = opt->pattern_list; p; p = p->next) {
1347                regmatch_t tmp;
1348                if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1349                        hit |= 1;
1350                        if (!opt->columnnum) {
1351                                /*
1352                                 * Without --column, any single match on a line
1353                                 * is enough to know that it needs to be
1354                                 * printed. With --column, scan _all_ patterns
1355                                 * to find the earliest.
1356                                 */
1357                                break;
1358                        }
1359                        if (*col < 0 || tmp.rm_so < *col)
1360                                *col = tmp.rm_so;
1361                }
1362        }
1363        return hit;
1364}
1365
1366static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1367                              enum grep_context ctx,
1368                              regmatch_t *pmatch, int eflags)
1369{
1370        regmatch_t match;
1371
1372        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1373                return 0;
1374        if (match.rm_so < 0 || match.rm_eo < 0)
1375                return 0;
1376        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1377                if (match.rm_so > pmatch->rm_so)
1378                        return 1;
1379                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1380                        return 1;
1381        }
1382        pmatch->rm_so = match.rm_so;
1383        pmatch->rm_eo = match.rm_eo;
1384        return 1;
1385}
1386
1387static int next_match(struct grep_opt *opt, char *bol, char *eol,
1388                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1389{
1390        struct grep_pat *p;
1391        int hit = 0;
1392
1393        pmatch->rm_so = pmatch->rm_eo = -1;
1394        if (bol < eol) {
1395                for (p = opt->pattern_list; p; p = p->next) {
1396                        switch (p->token) {
1397                        case GREP_PATTERN: /* atom */
1398                        case GREP_PATTERN_HEAD:
1399                        case GREP_PATTERN_BODY:
1400                                hit |= match_next_pattern(p, bol, eol, ctx,
1401                                                          pmatch, eflags);
1402                                break;
1403                        default:
1404                                break;
1405                        }
1406                }
1407        }
1408        return hit;
1409}
1410
1411static void show_line_header(struct grep_opt *opt, const char *name,
1412                             unsigned lno, ssize_t cno, char sign)
1413{
1414        if (opt->heading && opt->last_shown == 0) {
1415                output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1416                opt->output(opt, "\n", 1);
1417        }
1418        opt->last_shown = lno;
1419
1420        if (!opt->heading && opt->pathname) {
1421                output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1422                output_sep(opt, sign);
1423        }
1424        if (opt->linenum) {
1425                char buf[32];
1426                xsnprintf(buf, sizeof(buf), "%d", lno);
1427                output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
1428                output_sep(opt, sign);
1429        }
1430        /*
1431         * Treat 'cno' as the 1-indexed offset from the start of a non-context
1432         * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1433         * being called with a context line.
1434         */
1435        if (opt->columnnum && cno) {
1436                char buf[32];
1437                xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
1438                output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
1439                output_sep(opt, sign);
1440        }
1441}
1442
1443static void show_line(struct grep_opt *opt, char *bol, char *eol,
1444                      const char *name, unsigned lno, ssize_t cno, char sign)
1445{
1446        int rest = eol - bol;
1447        const char *match_color = NULL;
1448        const char *line_color = NULL;
1449
1450        if (opt->file_break && opt->last_shown == 0) {
1451                if (opt->show_hunk_mark)
1452                        opt->output(opt, "\n", 1);
1453        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1454                if (opt->last_shown == 0) {
1455                        if (opt->show_hunk_mark) {
1456                                output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1457                                opt->output(opt, "\n", 1);
1458                        }
1459                } else if (lno > opt->last_shown + 1) {
1460                        output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1461                        opt->output(opt, "\n", 1);
1462                }
1463        }
1464        if (!opt->only_matching) {
1465                /*
1466                 * In case the line we're being called with contains more than
1467                 * one match, leave printing each header to the loop below.
1468                 */
1469                show_line_header(opt, name, lno, cno, sign);
1470        }
1471        if (opt->color || opt->only_matching) {
1472                regmatch_t match;
1473                enum grep_context ctx = GREP_CONTEXT_BODY;
1474                int ch = *eol;
1475                int eflags = 0;
1476
1477                if (opt->color) {
1478                        if (sign == ':')
1479                                match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
1480                        else
1481                                match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
1482                        if (sign == ':')
1483                                line_color = opt->colors[GREP_COLOR_SELECTED];
1484                        else if (sign == '-')
1485                                line_color = opt->colors[GREP_COLOR_CONTEXT];
1486                        else if (sign == '=')
1487                                line_color = opt->colors[GREP_COLOR_FUNCTION];
1488                }
1489                *eol = '\0';
1490                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1491                        if (match.rm_so == match.rm_eo)
1492                                break;
1493
1494                        if (opt->only_matching)
1495                                show_line_header(opt, name, lno, cno, sign);
1496                        else
1497                                output_color(opt, bol, match.rm_so, line_color);
1498                        output_color(opt, bol + match.rm_so,
1499                                     match.rm_eo - match.rm_so, match_color);
1500                        if (opt->only_matching)
1501                                opt->output(opt, "\n", 1);
1502                        bol += match.rm_eo;
1503                        cno += match.rm_eo;
1504                        rest -= match.rm_eo;
1505                        eflags = REG_NOTBOL;
1506                }
1507                *eol = ch;
1508        }
1509        if (!opt->only_matching) {
1510                output_color(opt, bol, rest, line_color);
1511                opt->output(opt, "\n", 1);
1512        }
1513}
1514
1515#ifndef NO_PTHREADS
1516int grep_use_locks;
1517
1518/*
1519 * This lock protects access to the gitattributes machinery, which is
1520 * not thread-safe.
1521 */
1522pthread_mutex_t grep_attr_mutex;
1523
1524static inline void grep_attr_lock(void)
1525{
1526        if (grep_use_locks)
1527                pthread_mutex_lock(&grep_attr_mutex);
1528}
1529
1530static inline void grep_attr_unlock(void)
1531{
1532        if (grep_use_locks)
1533                pthread_mutex_unlock(&grep_attr_mutex);
1534}
1535
1536/*
1537 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1538 */
1539pthread_mutex_t grep_read_mutex;
1540
1541#else
1542#define grep_attr_lock()
1543#define grep_attr_unlock()
1544#endif
1545
1546static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1547{
1548        xdemitconf_t *xecfg = opt->priv;
1549        if (xecfg && !xecfg->find_func) {
1550                grep_source_load_driver(gs);
1551                if (gs->driver->funcname.pattern) {
1552                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1553                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1554                } else {
1555                        xecfg = opt->priv = NULL;
1556                }
1557        }
1558
1559        if (xecfg) {
1560                char buf[1];
1561                return xecfg->find_func(bol, eol - bol, buf, 1,
1562                                        xecfg->find_func_priv) >= 0;
1563        }
1564
1565        if (bol == eol)
1566                return 0;
1567        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1568                return 1;
1569        return 0;
1570}
1571
1572static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1573                               char *bol, unsigned lno)
1574{
1575        while (bol > gs->buf) {
1576                char *eol = --bol;
1577
1578                while (bol > gs->buf && bol[-1] != '\n')
1579                        bol--;
1580                lno--;
1581
1582                if (lno <= opt->last_shown)
1583                        break;
1584
1585                if (match_funcname(opt, gs, bol, eol)) {
1586                        show_line(opt, bol, eol, gs->name, lno, 0, '=');
1587                        break;
1588                }
1589        }
1590}
1591
1592static int is_empty_line(const char *bol, const char *eol);
1593
1594static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1595                             char *bol, char *end, unsigned lno)
1596{
1597        unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1598        int funcname_needed = !!opt->funcname, comment_needed = 0;
1599
1600        if (opt->pre_context < lno)
1601                from = lno - opt->pre_context;
1602        if (from <= opt->last_shown)
1603                from = opt->last_shown + 1;
1604        orig_from = from;
1605        if (opt->funcbody) {
1606                if (match_funcname(opt, gs, bol, end))
1607                        comment_needed = 1;
1608                else
1609                        funcname_needed = 1;
1610                from = opt->last_shown + 1;
1611        }
1612
1613        /* Rewind. */
1614        while (bol > gs->buf && cur > from) {
1615                char *next_bol = bol;
1616                char *eol = --bol;
1617
1618                while (bol > gs->buf && bol[-1] != '\n')
1619                        bol--;
1620                cur--;
1621                if (comment_needed && (is_empty_line(bol, eol) ||
1622                                       match_funcname(opt, gs, bol, eol))) {
1623                        comment_needed = 0;
1624                        from = orig_from;
1625                        if (cur < from) {
1626                                cur++;
1627                                bol = next_bol;
1628                                break;
1629                        }
1630                }
1631                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1632                        funcname_lno = cur;
1633                        funcname_needed = 0;
1634                        if (opt->funcbody)
1635                                comment_needed = 1;
1636                        else
1637                                from = orig_from;
1638                }
1639        }
1640
1641        /* We need to look even further back to find a function signature. */
1642        if (opt->funcname && funcname_needed)
1643                show_funcname_line(opt, gs, bol, cur);
1644
1645        /* Back forward. */
1646        while (cur < lno) {
1647                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1648
1649                while (*eol != '\n')
1650                        eol++;
1651                show_line(opt, bol, eol, gs->name, cur, 0, sign);
1652                bol = eol + 1;
1653                cur++;
1654        }
1655}
1656
1657static int should_lookahead(struct grep_opt *opt)
1658{
1659        struct grep_pat *p;
1660
1661        if (opt->extended)
1662                return 0; /* punt for too complex stuff */
1663        if (opt->invert)
1664                return 0;
1665        for (p = opt->pattern_list; p; p = p->next) {
1666                if (p->token != GREP_PATTERN)
1667                        return 0; /* punt for "header only" and stuff */
1668        }
1669        return 1;
1670}
1671
1672static int look_ahead(struct grep_opt *opt,
1673                      unsigned long *left_p,
1674                      unsigned *lno_p,
1675                      char **bol_p)
1676{
1677        unsigned lno = *lno_p;
1678        char *bol = *bol_p;
1679        struct grep_pat *p;
1680        char *sp, *last_bol;
1681        regoff_t earliest = -1;
1682
1683        for (p = opt->pattern_list; p; p = p->next) {
1684                int hit;
1685                regmatch_t m;
1686
1687                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1688                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1689                        continue;
1690                if (earliest < 0 || m.rm_so < earliest)
1691                        earliest = m.rm_so;
1692        }
1693
1694        if (earliest < 0) {
1695                *bol_p = bol + *left_p;
1696                *left_p = 0;
1697                return 1;
1698        }
1699        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1700                ; /* find the beginning of the line */
1701        last_bol = sp;
1702
1703        for (sp = bol; sp < last_bol; sp++) {
1704                if (*sp == '\n')
1705                        lno++;
1706        }
1707        *left_p -= last_bol - bol;
1708        *bol_p = last_bol;
1709        *lno_p = lno;
1710        return 0;
1711}
1712
1713static int fill_textconv_grep(struct repository *r,
1714                              struct userdiff_driver *driver,
1715                              struct grep_source *gs)
1716{
1717        struct diff_filespec *df;
1718        char *buf;
1719        size_t size;
1720
1721        if (!driver || !driver->textconv)
1722                return grep_source_load(gs);
1723
1724        /*
1725         * The textconv interface is intimately tied to diff_filespecs, so we
1726         * have to pretend to be one. If we could unify the grep_source
1727         * and diff_filespec structs, this mess could just go away.
1728         */
1729        df = alloc_filespec(gs->path);
1730        switch (gs->type) {
1731        case GREP_SOURCE_OID:
1732                fill_filespec(df, gs->identifier, 1, 0100644);
1733                break;
1734        case GREP_SOURCE_FILE:
1735                fill_filespec(df, &null_oid, 0, 0100644);
1736                break;
1737        default:
1738                BUG("attempt to textconv something without a path?");
1739        }
1740
1741        /*
1742         * fill_textconv is not remotely thread-safe; it may load objects
1743         * behind the scenes, and it modifies the global diff tempfile
1744         * structure.
1745         */
1746        grep_read_lock();
1747        size = fill_textconv(r, driver, df, &buf);
1748        grep_read_unlock();
1749        free_filespec(df);
1750
1751        /*
1752         * The normal fill_textconv usage by the diff machinery would just keep
1753         * the textconv'd buf separate from the diff_filespec. But much of the
1754         * grep code passes around a grep_source and assumes that its "buf"
1755         * pointer is the beginning of the thing we are searching. So let's
1756         * install our textconv'd version into the grep_source, taking care not
1757         * to leak any existing buffer.
1758         */
1759        grep_source_clear_data(gs);
1760        gs->buf = buf;
1761        gs->size = size;
1762
1763        return 0;
1764}
1765
1766static int is_empty_line(const char *bol, const char *eol)
1767{
1768        while (bol < eol && isspace(*bol))
1769                bol++;
1770        return bol == eol;
1771}
1772
1773static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1774{
1775        char *bol;
1776        char *peek_bol = NULL;
1777        unsigned long left;
1778        unsigned lno = 1;
1779        unsigned last_hit = 0;
1780        int binary_match_only = 0;
1781        unsigned count = 0;
1782        int try_lookahead = 0;
1783        int show_function = 0;
1784        struct userdiff_driver *textconv = NULL;
1785        enum grep_context ctx = GREP_CONTEXT_HEAD;
1786        xdemitconf_t xecfg;
1787
1788        if (!opt->output)
1789                opt->output = std_output;
1790
1791        if (opt->pre_context || opt->post_context || opt->file_break ||
1792            opt->funcbody) {
1793                /* Show hunk marks, except for the first file. */
1794                if (opt->last_shown)
1795                        opt->show_hunk_mark = 1;
1796                /*
1797                 * If we're using threads then we can't easily identify
1798                 * the first file.  Always put hunk marks in that case
1799                 * and skip the very first one later in work_done().
1800                 */
1801                if (opt->output != std_output)
1802                        opt->show_hunk_mark = 1;
1803        }
1804        opt->last_shown = 0;
1805
1806        if (opt->allow_textconv) {
1807                grep_source_load_driver(gs);
1808                /*
1809                 * We might set up the shared textconv cache data here, which
1810                 * is not thread-safe.
1811                 */
1812                grep_attr_lock();
1813                textconv = userdiff_get_textconv(gs->driver);
1814                grep_attr_unlock();
1815        }
1816
1817        /*
1818         * We know the result of a textconv is text, so we only have to care
1819         * about binary handling if we are not using it.
1820         */
1821        if (!textconv) {
1822                switch (opt->binary) {
1823                case GREP_BINARY_DEFAULT:
1824                        if (grep_source_is_binary(gs))
1825                                binary_match_only = 1;
1826                        break;
1827                case GREP_BINARY_NOMATCH:
1828                        if (grep_source_is_binary(gs))
1829                                return 0; /* Assume unmatch */
1830                        break;
1831                case GREP_BINARY_TEXT:
1832                        break;
1833                default:
1834                        BUG("unknown binary handling mode");
1835                }
1836        }
1837
1838        memset(&xecfg, 0, sizeof(xecfg));
1839        opt->priv = &xecfg;
1840
1841        try_lookahead = should_lookahead(opt);
1842
1843        if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
1844                return 0;
1845
1846        bol = gs->buf;
1847        left = gs->size;
1848        while (left) {
1849                char *eol, ch;
1850                int hit;
1851                ssize_t cno;
1852                ssize_t col = -1, icol = -1;
1853
1854                /*
1855                 * look_ahead() skips quickly to the line that possibly
1856                 * has the next hit; don't call it if we need to do
1857                 * something more than just skipping the current line
1858                 * in response to an unmatch for the current line.  E.g.
1859                 * inside a post-context window, we will show the current
1860                 * line as a context around the previous hit when it
1861                 * doesn't hit.
1862                 */
1863                if (try_lookahead
1864                    && !(last_hit
1865                         && (show_function ||
1866                             lno <= last_hit + opt->post_context))
1867                    && look_ahead(opt, &left, &lno, &bol))
1868                        break;
1869                eol = end_of_line(bol, &left);
1870                ch = *eol;
1871                *eol = 0;
1872
1873                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1874                        ctx = GREP_CONTEXT_BODY;
1875
1876                hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1877                *eol = ch;
1878
1879                if (collect_hits)
1880                        goto next_line;
1881
1882                /* "grep -v -e foo -e bla" should list lines
1883                 * that do not have either, so inversion should
1884                 * be done outside.
1885                 */
1886                if (opt->invert)
1887                        hit = !hit;
1888                if (opt->unmatch_name_only) {
1889                        if (hit)
1890                                return 0;
1891                        goto next_line;
1892                }
1893                if (hit) {
1894                        count++;
1895                        if (opt->status_only)
1896                                return 1;
1897                        if (opt->name_only) {
1898                                show_name(opt, gs->name);
1899                                return 1;
1900                        }
1901                        if (opt->count)
1902                                goto next_line;
1903                        if (binary_match_only) {
1904                                opt->output(opt, "Binary file ", 12);
1905                                output_color(opt, gs->name, strlen(gs->name),
1906                                             opt->colors[GREP_COLOR_FILENAME]);
1907                                opt->output(opt, " matches\n", 9);
1908                                return 1;
1909                        }
1910                        /* Hit at this line.  If we haven't shown the
1911                         * pre-context lines, we would need to show them.
1912                         */
1913                        if (opt->pre_context || opt->funcbody)
1914                                show_pre_context(opt, gs, bol, eol, lno);
1915                        else if (opt->funcname)
1916                                show_funcname_line(opt, gs, bol, lno);
1917                        cno = opt->invert ? icol : col;
1918                        if (cno < 0) {
1919                                /*
1920                                 * A negative cno indicates that there was no
1921                                 * match on the line. We are thus inverted and
1922                                 * being asked to show all lines that _don't_
1923                                 * match a given expression. Therefore, set cno
1924                                 * to 0 to suggest the whole line matches.
1925                                 */
1926                                cno = 0;
1927                        }
1928                        show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
1929                        last_hit = lno;
1930                        if (opt->funcbody)
1931                                show_function = 1;
1932                        goto next_line;
1933                }
1934                if (show_function && (!peek_bol || peek_bol < bol)) {
1935                        unsigned long peek_left = left;
1936                        char *peek_eol = eol;
1937
1938                        /*
1939                         * Trailing empty lines are not interesting.
1940                         * Peek past them to see if they belong to the
1941                         * body of the current function.
1942                         */
1943                        peek_bol = bol;
1944                        while (is_empty_line(peek_bol, peek_eol)) {
1945                                peek_bol = peek_eol + 1;
1946                                peek_eol = end_of_line(peek_bol, &peek_left);
1947                        }
1948
1949                        if (match_funcname(opt, gs, peek_bol, peek_eol))
1950                                show_function = 0;
1951                }
1952                if (show_function ||
1953                    (last_hit && lno <= last_hit + opt->post_context)) {
1954                        /* If the last hit is within the post context,
1955                         * we need to show this line.
1956                         */
1957                        show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
1958                }
1959
1960        next_line:
1961                bol = eol + 1;
1962                if (!left)
1963                        break;
1964                left--;
1965                lno++;
1966        }
1967
1968        if (collect_hits)
1969                return 0;
1970
1971        if (opt->status_only)
1972                return opt->unmatch_name_only;
1973        if (opt->unmatch_name_only) {
1974                /* We did not see any hit, so we want to show this */
1975                show_name(opt, gs->name);
1976                return 1;
1977        }
1978
1979        xdiff_clear_find_func(&xecfg);
1980        opt->priv = NULL;
1981
1982        /* NEEDSWORK:
1983         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1984         * which feels mostly useless but sometimes useful.  Maybe
1985         * make it another option?  For now suppress them.
1986         */
1987        if (opt->count && count) {
1988                char buf[32];
1989                if (opt->pathname) {
1990                        output_color(opt, gs->name, strlen(gs->name),
1991                                     opt->colors[GREP_COLOR_FILENAME]);
1992                        output_sep(opt, ':');
1993                }
1994                xsnprintf(buf, sizeof(buf), "%u\n", count);
1995                opt->output(opt, buf, strlen(buf));
1996                return 1;
1997        }
1998        return !!last_hit;
1999}
2000
2001static void clr_hit_marker(struct grep_expr *x)
2002{
2003        /* All-hit markers are meaningful only at the very top level
2004         * OR node.
2005         */
2006        while (1) {
2007                x->hit = 0;
2008                if (x->node != GREP_NODE_OR)
2009                        return;
2010                x->u.binary.left->hit = 0;
2011                x = x->u.binary.right;
2012        }
2013}
2014
2015static int chk_hit_marker(struct grep_expr *x)
2016{
2017        /* Top level nodes have hit markers.  See if they all are hits */
2018        while (1) {
2019                if (x->node != GREP_NODE_OR)
2020                        return x->hit;
2021                if (!x->u.binary.left->hit)
2022                        return 0;
2023                x = x->u.binary.right;
2024        }
2025}
2026
2027int grep_source(struct grep_opt *opt, struct grep_source *gs)
2028{
2029        /*
2030         * we do not have to do the two-pass grep when we do not check
2031         * buffer-wide "all-match".
2032         */
2033        if (!opt->all_match)
2034                return grep_source_1(opt, gs, 0);
2035
2036        /* Otherwise the toplevel "or" terms hit a bit differently.
2037         * We first clear hit markers from them.
2038         */
2039        clr_hit_marker(opt->pattern_expression);
2040        grep_source_1(opt, gs, 1);
2041
2042        if (!chk_hit_marker(opt->pattern_expression))
2043                return 0;
2044
2045        return grep_source_1(opt, gs, 0);
2046}
2047
2048int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
2049{
2050        struct grep_source gs;
2051        int r;
2052
2053        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
2054        gs.buf = buf;
2055        gs.size = size;
2056
2057        r = grep_source(opt, &gs);
2058
2059        grep_source_clear(&gs);
2060        return r;
2061}
2062
2063void grep_source_init(struct grep_source *gs, enum grep_source_type type,
2064                      const char *name, const char *path,
2065                      const void *identifier)
2066{
2067        gs->type = type;
2068        gs->name = xstrdup_or_null(name);
2069        gs->path = xstrdup_or_null(path);
2070        gs->buf = NULL;
2071        gs->size = 0;
2072        gs->driver = NULL;
2073
2074        switch (type) {
2075        case GREP_SOURCE_FILE:
2076                gs->identifier = xstrdup(identifier);
2077                break;
2078        case GREP_SOURCE_OID:
2079                gs->identifier = oiddup(identifier);
2080                break;
2081        case GREP_SOURCE_BUF:
2082                gs->identifier = NULL;
2083                break;
2084        }
2085}
2086
2087void grep_source_clear(struct grep_source *gs)
2088{
2089        FREE_AND_NULL(gs->name);
2090        FREE_AND_NULL(gs->path);
2091        FREE_AND_NULL(gs->identifier);
2092        grep_source_clear_data(gs);
2093}
2094
2095void grep_source_clear_data(struct grep_source *gs)
2096{
2097        switch (gs->type) {
2098        case GREP_SOURCE_FILE:
2099        case GREP_SOURCE_OID:
2100                FREE_AND_NULL(gs->buf);
2101                gs->size = 0;
2102                break;
2103        case GREP_SOURCE_BUF:
2104                /* leave user-provided buf intact */
2105                break;
2106        }
2107}
2108
2109static int grep_source_load_oid(struct grep_source *gs)
2110{
2111        enum object_type type;
2112
2113        grep_read_lock();
2114        gs->buf = read_object_file(gs->identifier, &type, &gs->size);
2115        grep_read_unlock();
2116
2117        if (!gs->buf)
2118                return error(_("'%s': unable to read %s"),
2119                             gs->name,
2120                             oid_to_hex(gs->identifier));
2121        return 0;
2122}
2123
2124static int grep_source_load_file(struct grep_source *gs)
2125{
2126        const char *filename = gs->identifier;
2127        struct stat st;
2128        char *data;
2129        size_t size;
2130        int i;
2131
2132        if (lstat(filename, &st) < 0) {
2133        err_ret:
2134                if (errno != ENOENT)
2135                        error_errno(_("failed to stat '%s'"), filename);
2136                return -1;
2137        }
2138        if (!S_ISREG(st.st_mode))
2139                return -1;
2140        size = xsize_t(st.st_size);
2141        i = open(filename, O_RDONLY);
2142        if (i < 0)
2143                goto err_ret;
2144        data = xmallocz(size);
2145        if (st.st_size != read_in_full(i, data, size)) {
2146                error_errno(_("'%s': short read"), filename);
2147                close(i);
2148                free(data);
2149                return -1;
2150        }
2151        close(i);
2152
2153        gs->buf = data;
2154        gs->size = size;
2155        return 0;
2156}
2157
2158static int grep_source_load(struct grep_source *gs)
2159{
2160        if (gs->buf)
2161                return 0;
2162
2163        switch (gs->type) {
2164        case GREP_SOURCE_FILE:
2165                return grep_source_load_file(gs);
2166        case GREP_SOURCE_OID:
2167                return grep_source_load_oid(gs);
2168        case GREP_SOURCE_BUF:
2169                return gs->buf ? 0 : -1;
2170        }
2171        BUG("invalid grep_source type to load");
2172}
2173
2174void grep_source_load_driver(struct grep_source *gs)
2175{
2176        if (gs->driver)
2177                return;
2178
2179        grep_attr_lock();
2180        if (gs->path)
2181                gs->driver = userdiff_find_by_path(gs->path);
2182        if (!gs->driver)
2183                gs->driver = userdiff_find_by_name("default");
2184        grep_attr_unlock();
2185}
2186
2187static int grep_source_is_binary(struct grep_source *gs)
2188{
2189        grep_source_load_driver(gs);
2190        if (gs->driver->binary != -1)
2191                return gs->driver->binary;
2192
2193        if (!grep_source_load(gs))
2194                return buffer_is_binary(gs->buf, gs->size);
2195
2196        return 0;
2197}