#include "xdiff-interface.h"
#include "diff.h"
#include "diffcore.h"
+#include "commit.h"
+#include "quote.h"
static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs);
int erroffset;
int options = PCRE_MULTILINE;
- if (opt->ignore_case)
+ if (opt->ignore_case) {
+ if (has_non_ascii(p->pattern))
+ p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
+ }
+ if (is_utf8_locale() && has_non_ascii(p->pattern))
+ options |= PCRE_UTF8;
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
- NULL);
+ p->pcre_tables);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);
{
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
+ pcre_free((void *)p->pcre_tables);
}
#else /* !USE_LIBPCRE */
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
return 1;
}
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int err;
+ int regflags;
+
+ basic_regex_quote_buf(&sb, p->pattern);
+ regflags = opt->regflags & ~REG_EXTENDED;
+ if (opt->ignore_case)
+ regflags |= REG_ICASE;
+ err = regcomp(&p->regexp, sb.buf, regflags);
+ if (opt->debug)
+ fprintf(stderr, "fixed %s\n", sb.buf);
+ strbuf_release(&sb);
+ if (err) {
+ char errbuf[1024];
+ regerror(err, &p->regexp, errbuf, sizeof(errbuf));
+ regfree(&p->regexp);
+ compile_regexp_failed(p, errbuf);
+ }
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
+ int icase, ascii_only;
int err;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
+ icase = opt->regflags & REG_ICASE || p->ignore_case;
+ ascii_only = !has_non_ascii(p->pattern);
+ /*
+ * Even when -F (fixed) asks us to do a non-regexp search, we
+ * may not be able to correctly case-fold when -i
+ * (ignore-case) is asked (in which case, we'll synthesize a
+ * regexp to match the pattern that matches regexp special
+ * characters literally, while ignoring case differences). On
+ * the other hand, even without -F, if the pattern does not
+ * have any regexp special characters and there is no need for
+ * case-folding search, we can internally turn it into a
+ * simple string match using kws. p->fixed tells us if we
+ * want to use kws.
+ */
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
- p->fixed = 1;
+ p->fixed = !icase || ascii_only;
else
p->fixed = 0;
if (p->fixed) {
- if (opt->regflags & REG_ICASE || p->ignore_case)
- p->kws = kwsalloc(tolower_trans_tbl);
- else
- p->kws = kwsalloc(NULL);
+ p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
+ } else if (opt->fixed) {
+ /*
+ * We come here when the pattern has the non-ascii
+ * characters we cannot case-fold, and asked to
+ * ignore-case.
+ */
+ compile_fixed_regexp(p, opt);
+ return;
}
if (opt->pcre) {
for (p = opt->header_list; p; p = p->next) {
if (p->token != GREP_PATTERN_HEAD)
- die("bug: a non-header pattern in grep header list.");
+ die("BUG: a non-header pattern in grep header list.");
if (p->field < GREP_HEADER_FIELD_MIN ||
GREP_HEADER_FIELD_MAX <= p->field)
- die("bug: unknown header field %d", p->field);
+ die("BUG: unknown header field %d", p->field);
compile_regexp(p, opt);
}
h = compile_pattern_atom(&pp);
if (!h || pp != p->next)
- die("bug: malformed header expr");
+ die("BUG: malformed header expr");
if (!header_group[p->field]) {
header_group[p->field] = h;
continue;
case GREP_BINARY_TEXT:
break;
default:
- die("bug: unknown binary handling mode");
+ die("BUG: unknown binary handling mode");
}
}