regex: use regexec_buf()
authorJohannes Schindelin <johannes.schindelin@gmx.de>
Wed, 21 Sep 2016 18:24:14 +0000 (20:24 +0200)
committerJunio C Hamano <gitster@pobox.com>
Wed, 21 Sep 2016 20:56:15 +0000 (13:56 -0700)
The new regexec_buf() function operates on buffers with an explicitly
specified length, rather than NUL-terminated strings.

We need to use this function whenever the buffer we want to pass to
regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated).

Note: the original motivation for this patch was to fix a bug where
`git diff -G <regex>` would crash. This patch converts more callers,
though, some of which allocated to construct NUL-terminated strings,
or worse, modified buffers to temporarily insert NULs while calling
regexec(3). By converting them to use regexec_buf(), the code has
become much cleaner.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff.c
diffcore-pickaxe.c
grep.c
t/t4062-diff-pickaxe.sh
xdiff-interface.c
diff --git a/diff.c b/diff.c
index 059123c5dcef4129763895b0f2ad5a54728b0c07..f77324e9e020bcc75d008f0d8e607646b2b185c8 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
 {
        if (word_regex && *begin < buffer->size) {
                regmatch_t match[1];
-               if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
+               if (!regexec_buf(word_regex, buffer->ptr + *begin,
+                                buffer->size - *begin, 1, match, 0)) {
                        char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
                                        '\n', match[0].rm_eo - match[0].rm_so);
                        *end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
index 7715c13ec4780a755ec2a6552c0aec9994691087..8413d76582c2b10a710f57dc3bda212e8fc7b91f 100644 (file)
@@ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
 {
        struct diffgrep_cb *data = priv;
        regmatch_t regmatch;
-       int hold;
 
        if (line[0] != '+' && line[0] != '-')
                return;
@@ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
                 * caller early.
                 */
                return;
-       /* Yuck -- line ought to be "const char *"! */
-       hold = line[len];
-       line[len] = '\0';
-       data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
-       line[len] = hold;
+       data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
+                                &regmatch, 0);
 }
 
 static int diff_grep(mmfile_t *one, mmfile_t *two,
@@ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
        xdemitconf_t xecfg;
 
        if (!one)
-               return !regexec(regexp, two->ptr, 1, &regmatch, 0);
+               return !regexec_buf(regexp, two->ptr, two->size,
+                                   1, &regmatch, 0);
        if (!two)
-               return !regexec(regexp, one->ptr, 1, &regmatch, 0);
+               return !regexec_buf(regexp, one->ptr, one->size,
+                                   1, &regmatch, 0);
 
        /*
         * We have both sides; need to run textual diff and see if
@@ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
                regmatch_t regmatch;
                int flags = 0;
 
-               assert(data[sz] == '\0');
-               while (*data && !regexec(regexp, data, 1, &regmatch, flags)) {
+               while (*data &&
+                      !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
                        flags |= REG_NOTBOL;
                        data += regmatch.rm_eo;
                        if (*data && regmatch.rm_so == regmatch.rm_eo)
diff --git a/grep.c b/grep.c
index 528b652f713d2b6db5f48e3829448212cc3837bf..8ed56236f049a47490f2c14d999a13e21b98fd2c 100644 (file)
--- a/grep.c
+++ b/grep.c
@@ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol,
        }
 }
 
-static int regmatch(const regex_t *preg, char *line, char *eol,
-                   regmatch_t *match, int eflags)
-{
-#ifdef REG_STARTEND
-       match->rm_so = 0;
-       match->rm_eo = eol - line;
-       eflags |= REG_STARTEND;
-#endif
-       return regexec(preg, line, 1, match, eflags);
-}
-
 static int patmatch(struct grep_pat *p, char *line, char *eol,
                    regmatch_t *match, int eflags)
 {
@@ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
        else if (p->pcre_regexp)
                hit = !pcrematch(p, line, eol, match, eflags);
        else
-               hit = !regmatch(&p->regexp, line, eol, match, eflags);
+               hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
+                                  eflags);
 
        return hit;
 }
index 5929f2eabb966305a4b0bb6ec6b1d4d9a4e9efe2..f0bf50bda780f04f9f2ffc2c1f39e354f69ae193 100755 (executable)
@@ -14,7 +14,7 @@ test_expect_success setup '
        test_tick &&
        git commit -m "A 4k file"
 '
-test_expect_failure '-G matches' '
+test_expect_success '-G matches' '
        git diff --name-only -G "^0{4096}$" HEAD^ >out &&
        test 4096-zeroes.txt = "$(cat out)"
 '
index 54236f24b9786710f91650ac63f6004cdeb012e6..08a7313e6aff06d0c95e1a28b9fa96d48c8c19b1 100644 (file)
@@ -216,11 +216,10 @@ struct ff_regs {
 static long ff_regexp(const char *line, long len,
                char *buffer, long buffer_size, void *priv)
 {
-       char *line_buffer;
        struct ff_regs *regs = priv;
        regmatch_t pmatch[2];
        int i;
-       int result = -1;
+       int result;
 
        /* Exclude terminating newline (and cr) from matching */
        if (len > 0 && line[len-1] == '\n') {
@@ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len,
                        len--;
        }
 
-       line_buffer = xstrndup(line, len); /* make NUL terminated */
-
        for (i = 0; i < regs->nr; i++) {
                struct ff_reg *reg = regs->array + i;
-               if (!regexec(&reg->re, line_buffer, 2, pmatch, 0)) {
+               if (!regexec_buf(&reg->re, line, len, 2, pmatch, 0)) {
                        if (reg->negate)
-                               goto fail;
+                               return -1;
                        break;
                }
        }
        if (regs->nr <= i)
-               goto fail;
+               return -1;
        i = pmatch[1].rm_so >= 0 ? 1 : 0;
        line += pmatch[i].rm_so;
        result = pmatch[i].rm_eo - pmatch[i].rm_so;
@@ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len,
        while (result > 0 && (isspace(line[result - 1])))
                result--;
        memcpy(buffer, line, result);
- fail:
-       free(line_buffer);
        return result;
 }