#include "xdiff-interface.h"
#include "diff.h"
#include "diffcore.h"
+ #include "commit.h"
+ #include "quote.h"
static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs);
int erroffset;
int options = PCRE_MULTILINE;
- if (opt->ignore_case)
+ if (opt->ignore_case) {
+ if (has_non_ascii(p->pattern))
+ p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
+ }
+ if (is_utf8_locale() && has_non_ascii(p->pattern))
+ options |= PCRE_UTF8;
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
- NULL);
+ p->pcre_tables);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);
{
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
+ pcre_free((void *)p->pcre_tables);
}
#else /* !USE_LIBPCRE */
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
return 1;
}
+ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+ {
+ struct strbuf sb = STRBUF_INIT;
+ int err;
+ int regflags;
+
+ basic_regex_quote_buf(&sb, p->pattern);
+ regflags = opt->regflags & ~REG_EXTENDED;
+ if (opt->ignore_case)
+ regflags |= REG_ICASE;
+ err = regcomp(&p->regexp, sb.buf, regflags);
+ if (opt->debug)
+ fprintf(stderr, "fixed %s\n", sb.buf);
+ strbuf_release(&sb);
+ if (err) {
+ char errbuf[1024];
+ regerror(err, &p->regexp, errbuf, sizeof(errbuf));
+ regfree(&p->regexp);
+ compile_regexp_failed(p, errbuf);
+ }
+ }
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
+ int icase, ascii_only;
int err;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
-
+ icase = opt->regflags & REG_ICASE || p->ignore_case;
+ ascii_only = !has_non_ascii(p->pattern);
+
+ /*
+ * Even when -F (fixed) asks us to do a non-regexp search, we
+ * may not be able to correctly case-fold when -i
+ * (ignore-case) is asked (in which case, we'll synthesize a
+ * regexp to match the pattern that matches regexp special
+ * characters literally, while ignoring case differences). On
+ * the other hand, even without -F, if the pattern does not
+ * have any regexp special characters and there is no need for
+ * case-folding search, we can internally turn it into a
+ * simple string match using kws. p->fixed tells us if we
+ * want to use kws.
+ */
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
- p->fixed = 1;
+ p->fixed = !icase || ascii_only;
else
p->fixed = 0;
if (p->fixed) {
- if (opt->regflags & REG_ICASE || p->ignore_case)
- p->kws = kwsalloc(tolower_trans_tbl);
- else
- p->kws = kwsalloc(NULL);
+ p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
+ } else if (opt->fixed) {
+ /*
+ * We come here when the pattern has the non-ascii
+ * characters we cannot case-fold, and asked to
+ * ignore-case.
+ */
+ compile_fixed_regexp(p, opt);
+ return;
}
if (opt->pcre) {
return 0;
}
+static int is_empty_line(const char *bol, const char *eol)
+{
+ while (bol < eol && isspace(*bol))
+ bol++;
+ return bol == eol;
+}
+
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
{
char *bol;
+ char *peek_bol = NULL;
unsigned long left;
unsigned lno = 1;
unsigned last_hit = 0;
show_function = 1;
goto next_line;
}
- if (show_function && match_funcname(opt, gs, bol, eol))
- show_function = 0;
+ if (show_function && (!peek_bol || peek_bol < bol)) {
+ unsigned long peek_left = left;
+ char *peek_eol = eol;
+
+ /*
+ * Trailing empty lines are not interesting.
+ * Peek past them to see if they belong to the
+ * body of the current function.
+ */
+ peek_bol = bol;
+ while (is_empty_line(peek_bol, peek_eol)) {
+ peek_bol = peek_eol + 1;
+ peek_eol = end_of_line(peek_bol, &peek_left);
+ }
+
+ if (match_funcname(opt, gs, peek_bol, peek_eol))
+ show_function = 0;
+ }
if (show_function ||
(last_hit && lno <= last_hit + opt->post_context)) {
/* If the last hit is within the post context,
if (lstat(filename, &st) < 0) {
err_ret:
if (errno != ENOENT)
- error(_("'%s': %s"), filename, strerror(errno));
+ error_errno(_("failed to stat '%s'"), filename);
return -1;
}
if (!S_ISREG(st.st_mode))
i = open(filename, O_RDONLY);
if (i < 0)
goto err_ret;
- data = xmalloc(size + 1);
+ data = xmallocz(size);
if (st.st_size != read_in_full(i, data, size)) {
- error(_("'%s': short read %s"), filename, strerror(errno));
+ error_errno(_("'%s': short read"), filename);
close(i);
free(data);
return -1;
}
close(i);
- data[size] = 0;
gs->buf = data;
gs->size = size;
free(to_free);
}
+void sq_quotef(struct strbuf *dst, const char *fmt, ...)
+{
+ struct strbuf src = STRBUF_INIT;
+
+ va_list ap;
+ va_start(ap, fmt);
+ strbuf_vaddf(&src, fmt, ap);
+ va_end(ap);
+
+ sq_quote_buf(dst, src.buf);
+ strbuf_release(&src);
+}
+
void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
{
int i;
}
strbuf_addch(sb, '"');
}
+
+ void basic_regex_quote_buf(struct strbuf *sb, const char *src)
+ {
+ char c;
+
+ if (*src == '^') {
+ /* only beginning '^' is special and needs quoting */
+ strbuf_addch(sb, '\\');
+ strbuf_addch(sb, *src++);
+ }
+ if (*src == '*')
+ /* beginning '*' is not special, no quoting */
+ strbuf_addch(sb, *src++);
+
+ while ((c = *src++)) {
+ switch (c) {
+ case '[':
+ case '.':
+ case '\\':
+ case '*':
+ strbuf_addch(sb, '\\');
+ strbuf_addch(sb, c);
+ break;
+
+ case '$':
+ /* only the end '$' is special and needs quoting */
+ if (*src == '\0')
+ strbuf_addch(sb, '\\');
+ strbuf_addch(sb, c);
+ break;
+
+ default:
+ strbuf_addch(sb, c);
+ break;
+ }
+ }
+ }
* sq_quote_buf() writes to an existing buffer of specified size; it
* will return the number of characters that would have been written
* excluding the final null regardless of the buffer size.
+ *
+ * sq_quotef() quotes the entire formatted string as a single result.
*/
extern void sq_quote_buf(struct strbuf *, const char *src);
extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+extern void sq_quotef(struct strbuf *, const char *fmt, ...);
/* This unwraps what sq_quote() produces in place, but returns
* NULL if the input does not look like what sq_quote would have
extern void perl_quote_buf(struct strbuf *sb, const char *src);
extern void python_quote_buf(struct strbuf *sb, const char *src);
extern void tcl_quote_buf(struct strbuf *sb, const char *src);
+ extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
#endif
--- /dev/null
- int main(int argc, char **argv)
+#include "git-compat-util.h"
++#include "gettext.h"
+
- exit(0);
++struct reg_flag {
++ const char *name;
++ int flag;
++};
++
++static struct reg_flag reg_flags[] = {
++ { "EXTENDED", REG_EXTENDED },
++ { "NEWLINE", REG_NEWLINE },
++ { "ICASE", REG_ICASE },
++ { "NOTBOL", REG_NOTBOL },
++#ifdef REG_STARTEND
++ { "STARTEND", REG_STARTEND },
++#endif
++ { NULL, 0 }
++};
++
++static int test_regex_bug(void)
+{
+ char *pat = "[^={} \t]+";
+ char *str = "={}\nfred";
+ regex_t r;
+ regmatch_t m[1];
+
+ if (regcomp(&r, pat, REG_EXTENDED | REG_NEWLINE))
+ die("failed regcomp() for pattern '%s'", pat);
+ if (regexec(&r, str, 1, m, 0))
+ die("no match of pattern '%s' to string '%s'", pat, str);
+
+ /* http://sourceware.org/bugzilla/show_bug.cgi?id=3957 */
+ if (m[0].rm_so == 3) /* matches '\n' when it should not */
+ die("regex bug confirmed: re-build git with NO_REGEX=1");
+
++ return 0;
++}
++
++int main(int argc, char **argv)
++{
++ const char *pat;
++ const char *str;
++ int flags = 0;
++ regex_t r;
++ regmatch_t m[1];
++
++ if (argc == 2 && !strcmp(argv[1], "--bug"))
++ return test_regex_bug();
++ else if (argc < 3)
++ usage("test-regex --bug\n"
++ "test-regex <pattern> <string> [<options>]");
++
++ argv++;
++ pat = *argv++;
++ str = *argv++;
++ while (*argv) {
++ struct reg_flag *rf;
++ for (rf = reg_flags; rf->name; rf++)
++ if (!strcmp(*argv, rf->name)) {
++ flags |= rf->flag;
++ break;
++ }
++ if (!rf->name)
++ die("do not recognize %s", *argv);
++ argv++;
++ }
++ git_setup_gettext();
++
++ if (regcomp(&r, pat, flags))
++ die("failed regcomp() for pattern '%s'", pat);
++ if (regexec(&r, str, 1, m, 0))
++ return 1;
++
++ return 0;
+}