line-range.con commit log: fix -L bounds checking bug (63828b8)
   1#include "git-compat-util.h"
   2#include "line-range.h"
   3#include "xdiff-interface.h"
   4#include "strbuf.h"
   5#include "userdiff.h"
   6
   7/*
   8 * Parse one item in the -L option
   9 */
  10static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
  11                             void *data, long lines, long begin, long *ret)
  12{
  13        char *term;
  14        const char *line;
  15        long num;
  16        int reg_error;
  17        regex_t regexp;
  18        regmatch_t match[1];
  19
  20        /* Allow "-L <something>,+20" to mean starting at <something>
  21         * for 20 lines, or "-L <something>,-5" for 5 lines ending at
  22         * <something>.
  23         */
  24        if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
  25                num = strtol(spec + 1, &term, 10);
  26                if (term != spec + 1) {
  27                        if (!ret)
  28                                return term;
  29                        if (spec[0] == '-')
  30                                num = 0 - num;
  31                        if (0 < num)
  32                                *ret = begin + num - 2;
  33                        else if (!num)
  34                                *ret = begin;
  35                        else
  36                                *ret = begin + num;
  37                        return term;
  38                }
  39                return spec;
  40        }
  41        num = strtol(spec, &term, 10);
  42        if (term != spec) {
  43                if (ret)
  44                        *ret = num;
  45                return term;
  46        }
  47        if (spec[0] != '/')
  48                return spec;
  49
  50        /* it could be a regexp of form /.../ */
  51        for (term = (char *) spec + 1; *term && *term != '/'; term++) {
  52                if (*term == '\\')
  53                        term++;
  54        }
  55        if (*term != '/')
  56                return spec;
  57
  58        /* in the scan-only case we are not interested in the regex */
  59        if (!ret)
  60                return term+1;
  61
  62        /* try [spec+1 .. term-1] as regexp */
  63        *term = 0;
  64        begin--; /* input is in human terms */
  65        line = nth_line(data, begin);
  66
  67        if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
  68            !(reg_error = regexec(&regexp, line, 1, match, 0))) {
  69                const char *cp = line + match[0].rm_so;
  70                const char *nline;
  71
  72                while (begin++ < lines) {
  73                        nline = nth_line(data, begin);
  74                        if (line <= cp && cp < nline)
  75                                break;
  76                        line = nline;
  77                }
  78                *ret = begin;
  79                regfree(&regexp);
  80                *term++ = '/';
  81                return term;
  82        }
  83        else {
  84                char errbuf[1024];
  85                regerror(reg_error, &regexp, errbuf, 1024);
  86                die("-L parameter '%s': %s", spec + 1, errbuf);
  87        }
  88}
  89
  90static int match_funcname(xdemitconf_t *xecfg, const char *bol, const char *eol)
  91{
  92        if (xecfg) {
  93                char buf[1];
  94                return xecfg->find_func(bol, eol - bol, buf, 1,
  95                                        xecfg->find_func_priv) >= 0;
  96        }
  97
  98        if (bol == eol)
  99                return 0;
 100        if (isalpha(*bol) || *bol == '_' || *bol == '$')
 101                return 1;
 102        return 0;
 103}
 104
 105static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char *start,
 106                                                 regex_t *regexp)
 107{
 108        int reg_error;
 109        regmatch_t match[1];
 110        while (1) {
 111                const char *bol, *eol;
 112                reg_error = regexec(regexp, start, 1, match, 0);
 113                if (reg_error == REG_NOMATCH)
 114                        return NULL;
 115                else if (reg_error) {
 116                        char errbuf[1024];
 117                        regerror(reg_error, regexp, errbuf, 1024);
 118                        die("-L parameter: regexec() failed: %s", errbuf);
 119                }
 120                /* determine extent of line matched */
 121                bol = start+match[0].rm_so;
 122                eol = start+match[0].rm_eo;
 123                while (bol > start && *bol != '\n')
 124                        bol--;
 125                if (*bol == '\n')
 126                        bol++;
 127                while (*eol && *eol != '\n')
 128                        eol++;
 129                if (*eol == '\n')
 130                        eol++;
 131                /* is it a funcname line? */
 132                if (match_funcname(xecfg, (char*) bol, (char*) eol))
 133                        return bol;
 134                start = eol;
 135        }
 136}
 137
 138static const char *parse_range_funcname(const char *arg, nth_line_fn_t nth_line_cb,
 139                                        void *cb_data, long lines, long *begin, long *end,
 140                                        const char *path)
 141{
 142        char *pattern;
 143        const char *term;
 144        struct userdiff_driver *drv;
 145        xdemitconf_t *xecfg = NULL;
 146        const char *start;
 147        const char *p;
 148        int reg_error;
 149        regex_t regexp;
 150
 151        assert(*arg == ':');
 152        term = arg+1;
 153        while (*term && *term != ':') {
 154                if (*term == '\\' && *(term+1))
 155                        term++;
 156                term++;
 157        }
 158        if (term == arg+1)
 159                return NULL;
 160        if (!begin) /* skip_range_arg case */
 161                return term;
 162
 163        pattern = xstrndup(arg+1, term-(arg+1));
 164
 165        start = nth_line_cb(cb_data, 0);
 166
 167        drv = userdiff_find_by_path(path);
 168        if (drv && drv->funcname.pattern) {
 169                const struct userdiff_funcname *pe = &drv->funcname;
 170                xecfg = xcalloc(1, sizeof(*xecfg));
 171                xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
 172        }
 173
 174        reg_error = regcomp(&regexp, pattern, REG_NEWLINE);
 175        if (reg_error) {
 176                char errbuf[1024];
 177                regerror(reg_error, &regexp, errbuf, 1024);
 178                die("-L parameter '%s': %s", pattern, errbuf);
 179        }
 180
 181        p = find_funcname_matching_regexp(xecfg, (char*) start, &regexp);
 182        if (!p)
 183                die("-L parameter '%s': no match", pattern);
 184        *begin = 0;
 185        while (p > nth_line_cb(cb_data, *begin))
 186                (*begin)++;
 187
 188        if (*begin >= lines)
 189                die("-L parameter '%s' matches at EOF", pattern);
 190
 191        *end = *begin+1;
 192        while (*end < lines) {
 193                const char *bol = nth_line_cb(cb_data, *end);
 194                const char *eol = nth_line_cb(cb_data, *end+1);
 195                if (match_funcname(xecfg, bol, eol))
 196                        break;
 197                (*end)++;
 198        }
 199
 200        regfree(&regexp);
 201        free(xecfg);
 202        free(pattern);
 203
 204        /* compensate for 1-based numbering */
 205        (*begin)++;
 206
 207        return term;
 208}
 209
 210int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb,
 211                    void *cb_data, long lines, long *begin, long *end,
 212                    const char *path)
 213{
 214        *begin = *end = 0;
 215
 216        if (*arg == ':') {
 217                arg = parse_range_funcname(arg, nth_line_cb, cb_data, lines, begin, end, path);
 218                if (!arg || *arg)
 219                        return -1;
 220                return 0;
 221        }
 222
 223        arg = parse_loc(arg, nth_line_cb, cb_data, lines, 1, begin);
 224
 225        if (*arg == ',')
 226                arg = parse_loc(arg + 1, nth_line_cb, cb_data, lines, *begin + 1, end);
 227
 228        if (*arg)
 229                return -1;
 230
 231        if (*begin && *end && *end < *begin) {
 232                long tmp;
 233                tmp = *end; *end = *begin; *begin = tmp;
 234        }
 235
 236        return 0;
 237}
 238
 239const char *skip_range_arg(const char *arg)
 240{
 241        if (*arg == ':')
 242                return parse_range_funcname(arg, NULL, NULL, 0, NULL, NULL, NULL);
 243
 244        arg = parse_loc(arg, NULL, NULL, 0, -1, NULL);
 245
 246        if (*arg == ',')
 247                arg = parse_loc(arg+1, NULL, NULL, 0, 0, NULL);
 248
 249        return arg;
 250}