From: Junio C Hamano Date: Tue, 12 Jan 2010 08:56:15 +0000 (-0800) Subject: Merge branch 'jc/maint-1.6.4-grep-lookahead' into jc/maint-grep-lookahead X-Git-Tag: v1.7.0-rc0~64^2~4 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/e2d2e383d87dcbcd296ca9f21f3c8b7ef853c928?ds=inline;hp=-c Merge branch 'jc/maint-1.6.4-grep-lookahead' into jc/maint-grep-lookahead * jc/maint-1.6.4-grep-lookahead: grep: optimize built-in grep by skipping lines that do not hit This needs to be an evil merge as fixmatch() changed signature since 5183bf6 (grep: Allow case insensitive search of fixed-strings, 2009-11-06). Signed-off-by: Junio C Hamano --- e2d2e383d87dcbcd296ca9f21f3c8b7ef853c928 diff --combined grep.c index bdadf2c0cc,03ffcd4042..62723da134 --- a/grep.c +++ b/grep.c @@@ -41,7 -41,6 +41,7 @@@ static void compile_regexp(struct grep_ int err; p->word_regexp = opt->word_regexp; + p->ignore_case = opt->ignore_case; if (opt->fixed || is_fixed(p->pattern)) p->fixed = 1; @@@ -263,15 -262,9 +263,15 @@@ static void show_name(struct grep_opt * printf("%s%c", name, opt->null_following_name ? '\0' : '\n'); } -static int fixmatch(const char *pattern, char *line, regmatch_t *match) + +static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match) { - char *hit = strstr(line, pattern); + char *hit; + if (ignore_case) + hit = strcasestr(line, pattern); + else + hit = strstr(line, pattern); + if (!hit) { match->rm_so = match->rm_eo = -1; return REG_NOMATCH; @@@ -333,7 -326,7 +333,7 @@@ static int match_one_pattern(struct gre again: if (p->fixed) - hit = !fixmatch(p->pattern, bol, pmatch); + hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch); else hit = !regexec(&p->regexp, bol, 1, pmatch, eflags); @@@ -615,6 -608,65 +615,65 @@@ static void show_pre_context(struct gre } } + static int should_lookahead(struct grep_opt *opt) + { + struct grep_pat *p; + + if (opt->extended) + return 0; /* punt for too complex stuff */ + if (opt->invert) + return 0; + for (p = opt->pattern_list; p; p = p->next) { + if (p->token != GREP_PATTERN) + return 0; /* punt for "header only" and stuff */ + } + return 1; + } + + static int look_ahead(struct grep_opt *opt, + unsigned long *left_p, + unsigned *lno_p, + char **bol_p) + { + unsigned lno = *lno_p; + char *bol = *bol_p; + struct grep_pat *p; + char *sp, *last_bol; + regoff_t earliest = -1; + + for (p = opt->pattern_list; p; p = p->next) { + int hit; + regmatch_t m; + + if (p->fixed) - hit = !fixmatch(p->pattern, bol, &m); ++ hit = !fixmatch(p->pattern, bol, p->ignore_case, &m); + else + hit = !regexec(&p->regexp, bol, 1, &m, 0); + if (!hit || m.rm_so < 0 || m.rm_eo < 0) + continue; + if (earliest < 0 || m.rm_so < earliest) + earliest = m.rm_so; + } + + if (earliest < 0) { + *bol_p = bol + *left_p; + *left_p = 0; + return 1; + } + for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--) + ; /* find the beginning of the line */ + last_bol = sp; + + for (sp = bol; sp < last_bol; sp++) { + if (*sp == '\n') + lno++; + } + *left_p -= last_bol - bol; + *bol_p = last_bol; + *lno_p = lno; + return 0; + } + static int grep_buffer_1(struct grep_opt *opt, const char *name, char *buf, unsigned long size, int collect_hits) { @@@ -624,6 -676,7 +683,7 @@@ unsigned last_hit = 0; int binary_match_only = 0; unsigned count = 0; + int try_lookahead = 0; enum grep_context ctx = GREP_CONTEXT_HEAD; xdemitconf_t xecfg; @@@ -652,11 -705,26 +712,26 @@@ opt->priv = &xecfg; } } + try_lookahead = should_lookahead(opt); while (left) { char *eol, ch; int hit; + /* + * look_ahead() skips quicly to the line that possibly + * has the next hit; don't call it if we need to do + * something more than just skipping the current line + * in response to an unmatch for the current line. E.g. + * inside a post-context window, we will show the current + * line as a context around the previous hit when it + * doesn't hit. + */ + if (try_lookahead + && !(last_hit + && lno <= last_hit + opt->post_context) + && look_ahead(opt, &left, &lno, &bol)) + break; eol = end_of_line(bol, &left); ch = *eol; *eol = 0;