memset(opt, 0, sizeof(*opt));
opt->relative = 1;
opt->pathname = 1;
- opt->regflags = REG_NEWLINE;
opt->max_depth = -1;
opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
color_set(opt->color_context, "");
opt->linenum = def->linenum;
opt->max_depth = def->max_depth;
opt->pathname = def->pathname;
- opt->regflags = def->regflags;
opt->relative = def->relative;
opt->output = def->output;
static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
{
+ /*
+ * When committing to the pattern type by setting the relevant
+ * fields in grep_opt it's generally not necessary to zero out
+ * the fields we're not choosing, since they won't have been
+ * set by anything. The extended_regexp_option field is the
+ * only exception to this.
+ *
+ * This is because in the process of parsing grep.patternType
+ * & grep.extendedRegexp we set opt->pattern_type_option and
+ * opt->extended_regexp_option, respectively. We then
+ * internally use opt->extended_regexp_option to see if we're
+ * compiling an ERE. It must be unset if that's not actually
+ * the case.
+ */
+ if (pattern_type != GREP_PATTERN_TYPE_ERE &&
+ opt->extended_regexp_option)
+ opt->extended_regexp_option = 0;
+
switch (pattern_type) {
case GREP_PATTERN_TYPE_UNSPECIFIED:
/* fall through */
case GREP_PATTERN_TYPE_BRE:
- opt->fixed = 0;
- opt->pcre1 = 0;
- opt->pcre2 = 0;
break;
case GREP_PATTERN_TYPE_ERE:
- opt->fixed = 0;
- opt->pcre1 = 0;
- opt->pcre2 = 0;
- opt->regflags |= REG_EXTENDED;
+ opt->extended_regexp_option = 1;
break;
case GREP_PATTERN_TYPE_FIXED:
opt->fixed = 1;
- opt->pcre1 = 0;
- opt->pcre2 = 0;
break;
case GREP_PATTERN_TYPE_PCRE:
- opt->fixed = 0;
#ifdef USE_LIBPCRE2
- opt->pcre1 = 0;
opt->pcre2 = 1;
#else
/*
* "cannot use Perl-compatible regexes[...]".
*/
opt->pcre1 = 1;
- opt->pcre2 = 0;
#endif
break;
}
else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
grep_set_pattern_type_option(opt->pattern_type_option, opt);
else if (opt->extended_regexp_option)
+ /*
+ * This branch *must* happen after setting from the
+ * opt->pattern_type_option above, we don't want
+ * grep.extendedRegexp to override grep.patternType!
+ */
grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
}
if (!p->pcre1_regexp)
compile_regexp_failed(p, error);
- p->pcre1_extra_info = pcre_study(p->pcre1_regexp, PCRE_STUDY_JIT_COMPILE, &error);
+ p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
if (!p->pcre1_extra_info && error)
die("%s", error);
int options = PCRE2_MULTILINE;
const uint8_t *character_tables = NULL;
int jitret;
+ int patinforet;
+ size_t jitsizearg;
assert(opt->pcre2);
jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
if (jitret)
die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
+
+ /*
+ * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
+ * tells us whether the library itself supports JIT,
+ * but to see whether we're going to be actually using
+ * JIT we need to extract PCRE2_INFO_JITSIZE from the
+ * pattern *after* we do pcre2_jit_compile() above.
+ *
+ * This is because if the pattern contains the
+ * (*NO_JIT) verb (see pcre2syntax(3))
+ * pcre2_jit_compile() will exit early with 0. If we
+ * then proceed to call pcre2_jit_match() further down
+ * the line instead of pcre2_match() we'll either
+ * segfault (pre PCRE 10.31) or run into a fatal error
+ * (post PCRE2 10.31)
+ */
+ patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
+ if (patinforet)
+ BUG("pcre2_pattern_info() failed: %d", patinforet);
+ if (jitsizearg == 0) {
+ p->pcre2_jit_on = 0;
+ return;
+ }
+
p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
if (!p->pcre2_jit_stack)
die("Couldn't allocate PCRE2 JIT stack");
{
struct strbuf sb = STRBUF_INIT;
int err;
- int regflags = opt->regflags;
+ int regflags = 0;
basic_regex_quote_buf(&sb, p->pattern);
if (opt->ignore_case)
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
- int icase, ascii_only;
+ int ascii_only;
int err;
+ int regflags = REG_NEWLINE;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- icase = opt->regflags & REG_ICASE || p->ignore_case;
ascii_only = !has_non_ascii(p->pattern);
/*
if (opt->fixed ||
has_null(p->pattern, p->patternlen) ||
is_fixed(p->pattern, p->patternlen))
- p->fixed = !icase || ascii_only;
+ p->fixed = !p->ignore_case || ascii_only;
if (p->fixed) {
- p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
+ p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
return;
}
- err = regcomp(&p->regexp, p->pattern, opt->regflags);
+ if (p->ignore_case)
+ regflags |= REG_ICASE;
+ if (opt->extended_regexp_option)
+ regflags |= REG_EXTENDED;
+ err = regcomp(&p->regexp, p->pattern, regflags);
if (err) {
char errbuf[1024];
regerror(err, &p->regexp, errbuf, 1024);
}
}
+static int is_empty_line(const char *bol, const char *eol);
+
static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
char *bol, char *end, unsigned lno)
{
- unsigned cur = lno, from = 1, funcname_lno = 0;
- int funcname_needed = !!opt->funcname;
-
- if (opt->funcbody && !match_funcname(opt, gs, bol, end))
- funcname_needed = 2;
+ unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
+ int funcname_needed = !!opt->funcname, comment_needed = 0;
if (opt->pre_context < lno)
from = lno - opt->pre_context;
if (from <= opt->last_shown)
from = opt->last_shown + 1;
+ orig_from = from;
+ if (opt->funcbody) {
+ if (match_funcname(opt, gs, bol, end))
+ comment_needed = 1;
+ else
+ funcname_needed = 1;
+ from = opt->last_shown + 1;
+ }
/* Rewind. */
- while (bol > gs->buf &&
- cur > (funcname_needed == 2 ? opt->last_shown + 1 : from)) {
+ while (bol > gs->buf && cur > from) {
+ char *next_bol = bol;
char *eol = --bol;
while (bol > gs->buf && bol[-1] != '\n')
bol--;
cur--;
+ if (comment_needed && (is_empty_line(bol, eol) ||
+ match_funcname(opt, gs, bol, eol))) {
+ comment_needed = 0;
+ from = orig_from;
+ if (cur < from) {
+ cur++;
+ bol = next_bol;
+ break;
+ }
+ }
if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
funcname_lno = cur;
funcname_needed = 0;
+ if (opt->funcbody)
+ comment_needed = 1;
+ else
+ from = orig_from;
}
}
return 0;
if (opt->status_only)
- return 0;
+ return opt->unmatch_name_only;
if (opt->unmatch_name_only) {
/* We did not see any hit, so we want to show this */
show_name(opt, gs->name);
case GREP_SOURCE_FILE:
gs->identifier = xstrdup(identifier);
break;
- case GREP_SOURCE_SUBMODULE:
- if (!identifier) {
- gs->identifier = NULL;
- break;
- }
- /*
- * FALL THROUGH
- * If the identifier is non-NULL (in the submodule case) it
- * will be a SHA1 that needs to be copied.
- */
case GREP_SOURCE_OID:
gs->identifier = oiddup(identifier);
break;
switch (gs->type) {
case GREP_SOURCE_FILE:
case GREP_SOURCE_OID:
- case GREP_SOURCE_SUBMODULE:
FREE_AND_NULL(gs->buf);
gs->size = 0;
break;
return grep_source_load_oid(gs);
case GREP_SOURCE_BUF:
return gs->buf ? 0 : -1;
- case GREP_SOURCE_SUBMODULE:
- break;
}
die("BUG: invalid grep_source type to load");
}