line-range.con commit The second batch (e902e9b)
   1#include "git-compat-util.h"
   2#include "line-range.h"
   3#include "xdiff-interface.h"
   4#include "strbuf.h"
   5#include "userdiff.h"
   6
   7/*
   8 * Parse one item in the -L option
   9 *
  10 * 'begin' is applicable only to relative range anchors. Absolute anchors
  11 * ignore this value.
  12 *
  13 * When parsing "-L A,B", parse_loc() is called once for A and once for B.
  14 *
  15 * When parsing A, 'begin' must be a negative number, the absolute value of
  16 * which is the line at which relative start-of-range anchors should be
  17 * based. Beginning of file is represented by -1.
  18 *
  19 * When parsing B, 'begin' must be the positive line number immediately
  20 * following the line computed for 'A'.
  21 */
  22static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
  23                             void *data, long lines, long begin, long *ret)
  24{
  25        char *term;
  26        const char *line;
  27        long num;
  28        int reg_error;
  29        regex_t regexp;
  30        regmatch_t match[1];
  31
  32        /* Allow "-L <something>,+20" to mean starting at <something>
  33         * for 20 lines, or "-L <something>,-5" for 5 lines ending at
  34         * <something>.
  35         */
  36        if (1 <= begin && (spec[0] == '+' || spec[0] == '-')) {
  37                num = strtol(spec + 1, &term, 10);
  38                if (term != spec + 1) {
  39                        if (!ret)
  40                                return term;
  41                        if (num == 0)
  42                                die("-L invalid empty range");
  43                        if (spec[0] == '-')
  44                                num = 0 - num;
  45                        if (0 < num)
  46                                *ret = begin + num - 2;
  47                        else if (!num)
  48                                *ret = begin;
  49                        else
  50                                *ret = begin + num > 0 ? begin + num : 1;
  51                        return term;
  52                }
  53                return spec;
  54        }
  55        num = strtol(spec, &term, 10);
  56        if (term != spec) {
  57                if (ret) {
  58                        if (num <= 0)
  59                                die("-L invalid line number: %ld", num);
  60                        *ret = num;
  61                }
  62                return term;
  63        }
  64
  65        if (begin < 0) {
  66                if (spec[0] != '^')
  67                        begin = -begin;
  68                else {
  69                        begin = 1;
  70                        spec++;
  71                }
  72        }
  73
  74        if (spec[0] != '/')
  75                return spec;
  76
  77        /* it could be a regexp of form /.../ */
  78        for (term = (char *) spec + 1; *term && *term != '/'; term++) {
  79                if (*term == '\\')
  80                        term++;
  81        }
  82        if (*term != '/')
  83                return spec;
  84
  85        /* in the scan-only case we are not interested in the regex */
  86        if (!ret)
  87                return term+1;
  88
  89        /* try [spec+1 .. term-1] as regexp */
  90        *term = 0;
  91        begin--; /* input is in human terms */
  92        line = nth_line(data, begin);
  93
  94        if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
  95            !(reg_error = regexec(&regexp, line, 1, match, 0))) {
  96                const char *cp = line + match[0].rm_so;
  97                const char *nline;
  98
  99                while (begin++ < lines) {
 100                        nline = nth_line(data, begin);
 101                        if (line <= cp && cp < nline)
 102                                break;
 103                        line = nline;
 104                }
 105                *ret = begin;
 106                regfree(&regexp);
 107                *term++ = '/';
 108                return term;
 109        }
 110        else {
 111                char errbuf[1024];
 112                regerror(reg_error, &regexp, errbuf, 1024);
 113                die("-L parameter '%s' starting at line %ld: %s",
 114                    spec + 1, begin + 1, errbuf);
 115        }
 116}
 117
 118static int match_funcname(xdemitconf_t *xecfg, const char *bol, const char *eol)
 119{
 120        if (xecfg) {
 121                char buf[1];
 122                return xecfg->find_func(bol, eol - bol, buf, 1,
 123                                        xecfg->find_func_priv) >= 0;
 124        }
 125
 126        if (bol == eol)
 127                return 0;
 128        if (isalpha(*bol) || *bol == '_' || *bol == '$')
 129                return 1;
 130        return 0;
 131}
 132
 133static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char *start,
 134                                                 regex_t *regexp)
 135{
 136        int reg_error;
 137        regmatch_t match[1];
 138        while (1) {
 139                const char *bol, *eol;
 140                reg_error = regexec(regexp, start, 1, match, 0);
 141                if (reg_error == REG_NOMATCH)
 142                        return NULL;
 143                else if (reg_error) {
 144                        char errbuf[1024];
 145                        regerror(reg_error, regexp, errbuf, 1024);
 146                        die("-L parameter: regexec() failed: %s", errbuf);
 147                }
 148                /* determine extent of line matched */
 149                bol = start+match[0].rm_so;
 150                eol = start+match[0].rm_eo;
 151                while (bol > start && *bol != '\n')
 152                        bol--;
 153                if (*bol == '\n')
 154                        bol++;
 155                while (*eol && *eol != '\n')
 156                        eol++;
 157                if (*eol == '\n')
 158                        eol++;
 159                /* is it a funcname line? */
 160                if (match_funcname(xecfg, (char*) bol, (char*) eol))
 161                        return bol;
 162                start = eol;
 163        }
 164}
 165
 166static const char *parse_range_funcname(
 167        const char *arg, nth_line_fn_t nth_line_cb,
 168        void *cb_data, long lines, long anchor, long *begin, long *end,
 169        const char *path, struct index_state *istate)
 170{
 171        char *pattern;
 172        const char *term;
 173        struct userdiff_driver *drv;
 174        xdemitconf_t *xecfg = NULL;
 175        const char *start;
 176        const char *p;
 177        int reg_error;
 178        regex_t regexp;
 179
 180        if (*arg == '^') {
 181                anchor = 1;
 182                arg++;
 183        }
 184
 185        assert(*arg == ':');
 186        term = arg+1;
 187        while (*term && *term != ':') {
 188                if (*term == '\\' && *(term+1))
 189                        term++;
 190                term++;
 191        }
 192        if (term == arg+1)
 193                return NULL;
 194        if (!begin) /* skip_range_arg case */
 195                return term;
 196
 197        pattern = xstrndup(arg+1, term-(arg+1));
 198
 199        anchor--; /* input is in human terms */
 200        start = nth_line_cb(cb_data, anchor);
 201
 202        drv = userdiff_find_by_path(istate, path);
 203        if (drv && drv->funcname.pattern) {
 204                const struct userdiff_funcname *pe = &drv->funcname;
 205                xecfg = xcalloc(1, sizeof(*xecfg));
 206                xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
 207        }
 208
 209        reg_error = regcomp(&regexp, pattern, REG_NEWLINE);
 210        if (reg_error) {
 211                char errbuf[1024];
 212                regerror(reg_error, &regexp, errbuf, 1024);
 213                die("-L parameter '%s': %s", pattern, errbuf);
 214        }
 215
 216        p = find_funcname_matching_regexp(xecfg, (char*) start, &regexp);
 217        if (!p)
 218                die("-L parameter '%s' starting at line %ld: no match",
 219                    pattern, anchor + 1);
 220        *begin = 0;
 221        while (p > nth_line_cb(cb_data, *begin))
 222                (*begin)++;
 223
 224        if (*begin >= lines)
 225                die("-L parameter '%s' matches at EOF", pattern);
 226
 227        *end = *begin+1;
 228        while (*end < lines) {
 229                const char *bol = nth_line_cb(cb_data, *end);
 230                const char *eol = nth_line_cb(cb_data, *end+1);
 231                if (match_funcname(xecfg, bol, eol))
 232                        break;
 233                (*end)++;
 234        }
 235
 236        regfree(&regexp);
 237        free(xecfg);
 238        free(pattern);
 239
 240        /* compensate for 1-based numbering */
 241        (*begin)++;
 242
 243        return term;
 244}
 245
 246int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb,
 247                    void *cb_data, long lines, long anchor,
 248                    long *begin, long *end,
 249                    const char *path, struct index_state *istate)
 250{
 251        *begin = *end = 0;
 252
 253        if (anchor < 1)
 254                anchor = 1;
 255        if (anchor > lines)
 256                anchor = lines + 1;
 257
 258        if (*arg == ':' || (*arg == '^' && *(arg + 1) == ':')) {
 259                arg = parse_range_funcname(arg, nth_line_cb, cb_data,
 260                                           lines, anchor, begin, end,
 261                                           path, istate);
 262                if (!arg || *arg)
 263                        return -1;
 264                return 0;
 265        }
 266
 267        arg = parse_loc(arg, nth_line_cb, cb_data, lines, -anchor, begin);
 268
 269        if (*arg == ',')
 270                arg = parse_loc(arg + 1, nth_line_cb, cb_data, lines, *begin + 1, end);
 271
 272        if (*arg)
 273                return -1;
 274
 275        if (*begin && *end && *end < *begin) {
 276                SWAP(*end, *begin);
 277        }
 278
 279        return 0;
 280}
 281
 282const char *skip_range_arg(const char *arg, struct index_state *istate)
 283{
 284        if (*arg == ':' || (*arg == '^' && *(arg + 1) == ':'))
 285                return parse_range_funcname(arg, NULL, NULL,
 286                                            0, 0, NULL, NULL,
 287                                            NULL, istate);
 288
 289        arg = parse_loc(arg, NULL, NULL, 0, -1, NULL);
 290
 291        if (*arg == ',')
 292                arg = parse_loc(arg+1, NULL, NULL, 0, 0, NULL);
 293
 294        return arg;
 295}