tree_entry_interesting: do basedir compare on wildcard patterns when possible
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sat, 24 Nov 2012 04:33:51 +0000 (11:33 +0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 26 Nov 2012 19:16:34 +0000 (11:16 -0800)
Currently we treat "*.c" and "path/to/*.c" the same way. Which means
we check all possible paths in repo against "path/to/*.c". One could
see that "path/elsewhere/foo.c" obviously cannot match "path/to/*.c"
and we only need to check all paths _inside_ "path/to/" against that
pattern.

This patch checks the leading fixed part of a pathspec against base
directory and exit early if possible. We could even optimize further
in "path/to/something*.c" case (i.e. check the fixed part against
name_entry as well) but that's more complicated and probably does not
gain us much.

-O2 build on linux-2.6, without and with this patch respectively:

$ time git rev-list --quiet HEAD -- 'drivers/*.c'

real 1m9.484s
user 1m9.128s
sys 0m0.181s

$ time ~/w/git/git rev-list --quiet HEAD -- 'drivers/*.c'

real 0m15.710s
user 0m15.564s
sys 0m0.107s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
tree-walk.c
index 585899ea24c8f51e58e737e361bf64497ee3d3f6..6e30ef9d048c62c11a92aa5b0ee6df2d227776e6 100644 (file)
@@ -572,6 +572,54 @@ static int match_dir_prefix(const char *base,
        return 0;
 }
 
+/*
+ * Perform matching on the leading non-wildcard part of
+ * pathspec. item->nowildcard_len must be greater than zero. Return
+ * non-zero if base is matched.
+ */
+static int match_wildcard_base(const struct pathspec_item *item,
+                              const char *base, int baselen,
+                              int *matched)
+{
+       const char *match = item->match;
+       /* the wildcard part is not considered in this function */
+       int matchlen = item->nowildcard_len;
+
+       if (baselen) {
+               int dirlen;
+               /*
+                * Return early if base is longer than the
+                * non-wildcard part but it does not match.
+                */
+               if (baselen >= matchlen) {
+                       *matched = matchlen;
+                       return !strncmp(base, match, matchlen);
+               }
+
+               dirlen = matchlen;
+               while (dirlen && match[dirlen - 1] != '/')
+                       dirlen--;
+
+               /*
+                * Return early if base is shorter than the
+                * non-wildcard part but it does not match. Note that
+                * base ends with '/' so we are sure it really matches
+                * directory
+                */
+               if (strncmp(base, match, baselen))
+                       return 0;
+               *matched = baselen;
+       } else
+               *matched = 0;
+       /*
+        * we could have checked entry against the non-wildcard part
+        * that is not in base and does similar never_interesting
+        * optimization as in match_entry. For now just be happy with
+        * base comparison.
+        */
+       return entry_interesting;
+}
+
 /*
  * Is a tree entry interesting given the pathspec we have?
  *
@@ -602,7 +650,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
                const struct pathspec_item *item = ps->items+i;
                const char *match = item->match;
                const char *base_str = base->buf + base_offset;
-               int matchlen = item->len;
+               int matchlen = item->len, matched = 0;
 
                if (baselen >= matchlen) {
                        /* If it doesn't match, move along... */
@@ -647,9 +695,24 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
                if (item->nowildcard_len == item->len)
                        continue;
 
+               if (item->nowildcard_len &&
+                   !match_wildcard_base(item, base_str, baselen, &matched))
+                       return entry_not_interesting;
+
                /*
                 * Concatenate base and entry->path into one and do
                 * fnmatch() on it.
+                *
+                * While we could avoid concatenation in certain cases
+                * [1], which saves a memcpy and potentially a
+                * realloc, it turns out not worth it. Measurement on
+                * linux-2.6 does not show any clear improvements,
+                * partly because of the nowildcard_len optimization
+                * in git_fnmatch(). Avoid micro-optimizations here.
+                *
+                * [1] if match_wildcard_base() says the base
+                * directory is already matched, we only need to match
+                * the rest, which is shorter so _in theory_ faster.
                 */
 
                strbuf_add(base, entry->path, pathlen);