Merge branch 'jc/maint-1.6.4-grep-lookahead' into jc/maint-grep-lookahead
authorJunio C Hamano <gitster@pobox.com>
Tue, 12 Jan 2010 08:56:15 +0000 (00:56 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 12 Jan 2010 08:58:13 +0000 (00:58 -0800)
* jc/maint-1.6.4-grep-lookahead:
grep: optimize built-in grep by skipping lines that do not hit

This needs to be an evil merge as fixmatch() changed signature since
5183bf6 (grep: Allow case insensitive search of fixed-strings,
2009-11-06).

Signed-off-by: Junio C Hamano <gitster@pobox.com>
1  2 
grep.c
diff --combined grep.c
index bdadf2c0ccfafc18011beee3bc05754860392523,03ffcd4042e95b521e2aed03ecf2ae6d65fef41c..62723da1340d8b1e7ddbe00544dadbfc5abf4c8c
--- 1/grep.c
--- 2/grep.c
+++ b/grep.c
@@@ -41,7 -41,6 +41,7 @@@ static void compile_regexp(struct grep_
        int err;
  
        p->word_regexp = opt->word_regexp;
 +      p->ignore_case = opt->ignore_case;
  
        if (opt->fixed || is_fixed(p->pattern))
                p->fixed = 1;
@@@ -263,15 -262,9 +263,15 @@@ static void show_name(struct grep_opt *
        printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
  }
  
 -static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 +
 +static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match)
  {
 -      char *hit = strstr(line, pattern);
 +      char *hit;
 +      if (ignore_case)
 +              hit = strcasestr(line, pattern);
 +      else
 +              hit = strstr(line, pattern);
 +
        if (!hit) {
                match->rm_so = match->rm_eo = -1;
                return REG_NOMATCH;
@@@ -333,7 -326,7 +333,7 @@@ static int match_one_pattern(struct gre
  
   again:
        if (p->fixed)
 -              hit = !fixmatch(p->pattern, bol, pmatch);
 +              hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch);
        else
                hit = !regexec(&p->regexp, bol, 1, pmatch, eflags);
  
@@@ -615,6 -608,65 +615,65 @@@ static void show_pre_context(struct gre
        }
  }
  
 -                      hit = !fixmatch(p->pattern, bol, &m);
+ static int should_lookahead(struct grep_opt *opt)
+ {
+       struct grep_pat *p;
+       if (opt->extended)
+               return 0; /* punt for too complex stuff */
+       if (opt->invert)
+               return 0;
+       for (p = opt->pattern_list; p; p = p->next) {
+               if (p->token != GREP_PATTERN)
+                       return 0; /* punt for "header only" and stuff */
+       }
+       return 1;
+ }
+ static int look_ahead(struct grep_opt *opt,
+                     unsigned long *left_p,
+                     unsigned *lno_p,
+                     char **bol_p)
+ {
+       unsigned lno = *lno_p;
+       char *bol = *bol_p;
+       struct grep_pat *p;
+       char *sp, *last_bol;
+       regoff_t earliest = -1;
+       for (p = opt->pattern_list; p; p = p->next) {
+               int hit;
+               regmatch_t m;
+               if (p->fixed)
++                      hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
+               else
+                       hit = !regexec(&p->regexp, bol, 1, &m, 0);
+               if (!hit || m.rm_so < 0 || m.rm_eo < 0)
+                       continue;
+               if (earliest < 0 || m.rm_so < earliest)
+                       earliest = m.rm_so;
+       }
+       if (earliest < 0) {
+               *bol_p = bol + *left_p;
+               *left_p = 0;
+               return 1;
+       }
+       for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
+               ; /* find the beginning of the line */
+       last_bol = sp;
+       for (sp = bol; sp < last_bol; sp++) {
+               if (*sp == '\n')
+                       lno++;
+       }
+       *left_p -= last_bol - bol;
+       *bol_p = last_bol;
+       *lno_p = lno;
+       return 0;
+ }
  static int grep_buffer_1(struct grep_opt *opt, const char *name,
                         char *buf, unsigned long size, int collect_hits)
  {
        unsigned last_hit = 0;
        int binary_match_only = 0;
        unsigned count = 0;
+       int try_lookahead = 0;
        enum grep_context ctx = GREP_CONTEXT_HEAD;
        xdemitconf_t xecfg;
  
                        opt->priv = &xecfg;
                }
        }
+       try_lookahead = should_lookahead(opt);
  
        while (left) {
                char *eol, ch;
                int hit;
  
+               /*
+                * look_ahead() skips quicly to the line that possibly
+                * has the next hit; don't call it if we need to do
+                * something more than just skipping the current line
+                * in response to an unmatch for the current line.  E.g.
+                * inside a post-context window, we will show the current
+                * line as a context around the previous hit when it
+                * doesn't hit.
+                */
+               if (try_lookahead
+                   && !(last_hit
+                        && lno <= last_hit + opt->post_context)
+                   && look_ahead(opt, &left, &lno, &bol))
+                       break;
                eol = end_of_line(bol, &left);
                ch = *eol;
                *eol = 0;