builtin-grep.con commit Merge branch 'jc/show-branch-dense' into next (302f57f)
   1/*
   2 * Builtin "git grep"
   3 *
   4 * Copyright (c) 2006 Junio C Hamano
   5 */
   6#include "cache.h"
   7#include "blob.h"
   8#include "tree.h"
   9#include "commit.h"
  10#include "tag.h"
  11#include "tree-walk.h"
  12#include "builtin.h"
  13#include <regex.h>
  14#include <fnmatch.h>
  15
  16/*
  17 * git grep pathspecs are somewhat different from diff-tree pathspecs;
  18 * pathname wildcards are allowed.
  19 */
  20static int pathspec_matches(const char **paths, const char *name)
  21{
  22        int namelen, i;
  23        if (!paths || !*paths)
  24                return 1;
  25        namelen = strlen(name);
  26        for (i = 0; paths[i]; i++) {
  27                const char *match = paths[i];
  28                int matchlen = strlen(match);
  29                const char *slash, *cp;
  30
  31                if ((matchlen <= namelen) &&
  32                    !strncmp(name, match, matchlen) &&
  33                    (match[matchlen-1] == '/' ||
  34                     name[matchlen] == '\0' || name[matchlen] == '/'))
  35                        return 1;
  36                if (!fnmatch(match, name, 0))
  37                        return 1;
  38                if (name[namelen-1] != '/')
  39                        continue;
  40
  41                /* We are being asked if the name directory is worth
  42                 * descending into.
  43                 *
  44                 * Find the longest leading directory name that does
  45                 * not have metacharacter in the pathspec; the name
  46                 * we are looking at must overlap with that directory.
  47                 */
  48                for (cp = match, slash = NULL; cp - match < matchlen; cp++) {
  49                        char ch = *cp;
  50                        if (ch == '/')
  51                                slash = cp;
  52                        if (ch == '*' || ch == '[')
  53                                break;
  54                }
  55                if (!slash)
  56                        slash = match; /* toplevel */
  57                else
  58                        slash++;
  59                if (namelen <= slash - match) {
  60                        /* Looking at "Documentation/" and
  61                         * the pattern says "Documentation/howto/", or
  62                         * "Documentation/diff*.txt".
  63                         */
  64                        if (!memcmp(match, name, namelen))
  65                                return 1;
  66                }
  67                else {
  68                        /* Looking at "Documentation/howto/" and
  69                         * the pattern says "Documentation/h*".
  70                         */
  71                        if (!memcmp(match, name, slash - match))
  72                                return 1;
  73                }
  74        }
  75        return 0;
  76}
  77
  78struct grep_opt {
  79        const char *pattern;
  80        regex_t regexp;
  81        unsigned linenum:1;
  82        unsigned invert:1;
  83        unsigned name_only:1;
  84        int regflags;
  85        unsigned pre_context;
  86        unsigned post_context;
  87};
  88
  89static char *end_of_line(char *cp, unsigned long *left)
  90{
  91        unsigned long l = *left;
  92        while (l && *cp != '\n') {
  93                l--;
  94                cp++;
  95        }
  96        *left = l;
  97        return cp;
  98}
  99
 100static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 101                      const char *name, unsigned lno, char sign)
 102{
 103        printf("%s%c", name, sign);
 104        if (opt->linenum)
 105                printf("%d%c", lno, sign);
 106        printf("%.*s\n", eol-bol, bol);
 107}
 108
 109static int grep_buffer(struct grep_opt *opt, const char *name,
 110                       char *buf, unsigned long size)
 111{
 112        char *bol = buf;
 113        unsigned long left = size;
 114        unsigned lno = 1;
 115        struct pre_context_line {
 116                char *bol;
 117                char *eol;
 118        } *prev = NULL, *pcl;
 119        unsigned last_hit = 0;
 120        unsigned last_shown = 0;
 121        const char *hunk_mark = "";
 122
 123        if (opt->pre_context)
 124                prev = xcalloc(opt->pre_context, sizeof(*prev));
 125        if (opt->pre_context || opt->post_context)
 126                hunk_mark = "--\n";
 127
 128        while (left) {
 129                regmatch_t pmatch[10];
 130                char *eol, ch;
 131                int hit;
 132
 133                eol = end_of_line(bol, &left);
 134                ch = *eol;
 135                *eol = 0;
 136
 137                hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch),
 138                               pmatch, 0);
 139                if (opt->invert)
 140                        hit = !hit;
 141                if (hit) {
 142                        if (opt->name_only) {
 143                                printf("%s\n", name);
 144                                return 1;
 145                        }
 146                        /* Hit at this line.  If we haven't shown the
 147                         * pre-context lines, we would need to show them.
 148                         */
 149                        if (opt->pre_context) {
 150                                unsigned from;
 151                                if (opt->pre_context < lno)
 152                                        from = lno - opt->pre_context;
 153                                else
 154                                        from = 1;
 155                                if (from <= last_shown)
 156                                        from = last_shown + 1;
 157                                if (last_shown && from != last_shown + 1)
 158                                        printf(hunk_mark);
 159                                while (from < lno) {
 160                                        pcl = &prev[lno-from-1];
 161                                        show_line(opt, pcl->bol, pcl->eol,
 162                                                  name, from, '-');
 163                                        from++;
 164                                }
 165                                last_shown = lno-1;
 166                        }
 167                        if (last_shown && lno != last_shown + 1)
 168                                printf(hunk_mark);
 169                        show_line(opt, bol, eol, name, lno, ':');
 170                        last_shown = last_hit = lno;
 171                }
 172                else if (last_hit &&
 173                         lno <= last_hit + opt->post_context) {
 174                        /* If the last hit is within the post context,
 175                         * we need to show this line.
 176                         */
 177                        if (last_shown && lno != last_shown + 1)
 178                                printf(hunk_mark);
 179                        show_line(opt, bol, eol, name, lno, '-');
 180                        last_shown = lno;
 181                }
 182                if (opt->pre_context) {
 183                        memmove(prev+1, prev,
 184                                (opt->pre_context-1) * sizeof(*prev));
 185                        prev->bol = bol;
 186                        prev->eol = eol;
 187                }
 188                *eol = ch;
 189                bol = eol + 1;
 190                left--;
 191                lno++;
 192        }
 193        return !!last_hit;
 194}
 195
 196static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
 197{
 198        unsigned long size;
 199        char *data;
 200        char type[20];
 201        int hit;
 202        data = read_sha1_file(sha1, type, &size);
 203        if (!data) {
 204                error("'%s': unable to read %s", name, sha1_to_hex(sha1));
 205                return 0;
 206        }
 207        hit = grep_buffer(opt, name, data, size);
 208        free(data);
 209        return hit;
 210}
 211
 212static int grep_file(struct grep_opt *opt, const char *filename)
 213{
 214        struct stat st;
 215        int i;
 216        char *data;
 217        if (lstat(filename, &st) < 0) {
 218        err_ret:
 219                if (errno != ENOENT)
 220                        error("'%s': %s", filename, strerror(errno));
 221                return 0;
 222        }
 223        if (!st.st_size)
 224                return 0; /* empty file -- no grep hit */
 225        if (!S_ISREG(st.st_mode))
 226                return 0;
 227        i = open(filename, O_RDONLY);
 228        if (i < 0)
 229                goto err_ret;
 230        data = xmalloc(st.st_size + 1);
 231        if (st.st_size != xread(i, data, st.st_size)) {
 232                error("'%s': short read %s", filename, strerror(errno));
 233                close(i);
 234                free(data);
 235                return 0;
 236        }
 237        close(i);
 238        i = grep_buffer(opt, filename, data, st.st_size);
 239        free(data);
 240        return i;
 241}
 242
 243static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
 244{
 245        int hit = 0;
 246        int nr;
 247        read_cache();
 248
 249        for (nr = 0; nr < active_nr; nr++) {
 250                struct cache_entry *ce = active_cache[nr];
 251                if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
 252                        continue;
 253                if (!pathspec_matches(paths, ce->name))
 254                        continue;
 255                if (cached)
 256                        hit |= grep_sha1(opt, ce->sha1, ce->name);
 257                else
 258                        hit |= grep_file(opt, ce->name);
 259        }
 260        return hit;
 261}
 262
 263static int grep_tree(struct grep_opt *opt, const char **paths,
 264                     struct tree_desc *tree,
 265                     const char *tree_name, const char *base)
 266{
 267        unsigned mode;
 268        int len;
 269        int hit = 0;
 270        const char *path;
 271        const unsigned char *sha1;
 272        char *down;
 273        char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
 274
 275        if (tree_name[0]) {
 276                int offset = sprintf(path_buf, "%s:", tree_name);
 277                down = path_buf + offset;
 278                strcat(down, base);
 279        }
 280        else {
 281                down = path_buf;
 282                strcpy(down, base);
 283        }
 284        len = strlen(path_buf);
 285
 286        while (tree->size) {
 287                int pathlen;
 288                sha1 = tree_entry_extract(tree, &path, &mode);
 289                pathlen = strlen(path);
 290                strcpy(path_buf + len, path);
 291
 292                if (S_ISDIR(mode))
 293                        /* Match "abc/" against pathspec to
 294                         * decide if we want to descend into "abc"
 295                         * directory.
 296                         */
 297                        strcpy(path_buf + len + pathlen, "/");
 298
 299                if (!pathspec_matches(paths, down))
 300                        ;
 301                else if (S_ISREG(mode))
 302                        hit |= grep_sha1(opt, sha1, path_buf);
 303                else if (S_ISDIR(mode)) {
 304                        char type[20];
 305                        struct tree_desc sub;
 306                        void *data;
 307                        data = read_sha1_file(sha1, type, &sub.size);
 308                        if (!data)
 309                                die("unable to read tree (%s)",
 310                                    sha1_to_hex(sha1));
 311                        sub.buf = data;
 312                        hit |= grep_tree(opt, paths, &sub, tree_name, down);
 313                        free(data);
 314                }
 315                update_tree_entry(tree);
 316        }
 317        return hit;
 318}
 319
 320static int grep_object(struct grep_opt *opt, const char **paths,
 321                       struct object *obj, const char *name)
 322{
 323        if (!strcmp(obj->type, blob_type))
 324                return grep_sha1(opt, obj->sha1, name);
 325        if (!strcmp(obj->type, commit_type) ||
 326            !strcmp(obj->type, tree_type)) {
 327                struct tree_desc tree;
 328                void *data;
 329                int hit;
 330                data = read_object_with_reference(obj->sha1, tree_type,
 331                                                  &tree.size, NULL);
 332                if (!data)
 333                        die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
 334                tree.buf = data;
 335                hit = grep_tree(opt, paths, &tree, name, "");
 336                free(data);
 337                return hit;
 338        }
 339        die("unable to grep from object of type %s", obj->type);
 340}
 341
 342static const char builtin_grep_usage[] =
 343"git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
 344
 345int cmd_grep(int argc, const char **argv, char **envp)
 346{
 347        int err;
 348        int hit = 0;
 349        int no_more_flags = 0;
 350        int seen_noncommit = 0;
 351        int cached = 0;
 352        struct grep_opt opt;
 353        struct object_list *list, **tail, *object_list = NULL;
 354        const char *prefix = setup_git_directory();
 355        const char **paths = NULL;
 356
 357        memset(&opt, 0, sizeof(opt));
 358        opt.regflags = REG_NEWLINE;
 359
 360        /*
 361         * No point using rev_info, really.
 362         */
 363        while (1 < argc) {
 364                const char *arg = argv[1];
 365                argc--; argv++;
 366                if (!strcmp("--cached", arg)) {
 367                        cached = 1;
 368                        continue;
 369                }
 370                if (!strcmp("-i", arg) ||
 371                    !strcmp("--ignore-case", arg)) {
 372                        opt.regflags |= REG_ICASE;
 373                        continue;
 374                }
 375                if (!strcmp("-v", arg) ||
 376                    !strcmp("--invert-match", arg)) {
 377                        opt.invert = 1;
 378                        continue;
 379                }
 380                if (!strcmp("-E", arg) ||
 381                    !strcmp("--extended-regexp", arg)) {
 382                        opt.regflags |= REG_EXTENDED;
 383                        continue;
 384                }
 385                if (!strcmp("-G", arg) ||
 386                    !strcmp("--basic-regexp", arg)) {
 387                        opt.regflags &= ~REG_EXTENDED;
 388                        continue;
 389                }
 390                if (!strcmp("-n", arg)) {
 391                        opt.linenum = 1;
 392                        continue;
 393                }
 394                if (!strcmp("-H", arg)) {
 395                        /* We always show the pathname, so this
 396                         * is a noop.
 397                         */
 398                        continue;
 399                }
 400                if (!strcmp("-l", arg) ||
 401                    !strcmp("--files-with-matches", arg)) {
 402                        opt.name_only = 1;
 403                        continue;
 404                }
 405                if (!strcmp("-A", arg) ||
 406                    !strcmp("-B", arg) ||
 407                    !strcmp("-C", arg)) {
 408                        unsigned num;
 409                        if (argc <= 1 ||
 410                            sscanf(*++argv, "%u", &num) != 1)
 411                                usage(builtin_grep_usage);
 412                        argc--;
 413                        switch (arg[1]) {
 414                        case 'A':
 415                                opt.post_context = num;
 416                                break;
 417                        case 'C':
 418                                opt.post_context = num;
 419                        case 'B':
 420                                opt.pre_context = num;
 421                                break;
 422                        }
 423                        continue;
 424                }
 425                if (!strcmp("-e", arg)) {
 426                        if (1 < argc) {
 427                                /* We probably would want to do
 428                                 * -e pat1 -e pat2 as well later...
 429                                 */
 430                                if (opt.pattern)
 431                                        die("more than one pattern?");
 432                                opt.pattern = *++argv;
 433                                argc--;
 434                                continue;
 435                        }
 436                        usage(builtin_grep_usage);
 437                }
 438                if (!strcmp("--", arg)) {
 439                        no_more_flags = 1;
 440                        continue;
 441                }
 442                /* Either unrecognized option or a single pattern */
 443                if (!no_more_flags && *arg == '-')
 444                        usage(builtin_grep_usage);
 445                if (!opt.pattern) {
 446                        opt.pattern = arg;
 447                        break;
 448                }
 449                else {
 450                        /* We are looking at the first path or rev;
 451                         * it is found at argv[0] after leaving the
 452                         * loop.
 453                         */
 454                        argc++; argv--;
 455                        break;
 456                }
 457        }
 458        if (!opt.pattern)
 459                die("no pattern given.");
 460        err = regcomp(&opt.regexp, opt.pattern, opt.regflags);
 461        if (err) {
 462                char errbuf[1024];
 463                regerror(err, &opt.regexp, errbuf, 1024);
 464                regfree(&opt.regexp);
 465                die("'%s': %s", opt.pattern, errbuf);
 466        }
 467        tail = &object_list;
 468        while (1 < argc) {
 469                struct object *object;
 470                struct object_list *elem;
 471                const char *arg = argv[1];
 472                unsigned char sha1[20];
 473                if (get_sha1(arg, sha1) < 0)
 474                        break;
 475                object = parse_object(sha1);
 476                if (!object)
 477                        die("bad object %s", arg);
 478                elem = object_list_insert(object, tail);
 479                elem->name = arg;
 480                tail = &elem->next;
 481                argc--; argv++;
 482        }
 483        if (1 < argc)
 484                paths = get_pathspec(prefix, argv + 1);
 485        else if (prefix) {
 486                paths = xcalloc(2, sizeof(const char *));
 487                paths[0] = prefix;
 488                paths[1] = NULL;
 489        }
 490
 491        if (!object_list)
 492                return !grep_cache(&opt, paths, cached);
 493        /*
 494         * Do not walk "grep -e foo master next pu -- Documentation/"
 495         * but do walk "grep -e foo master..next -- Documentation/".
 496         * Ranged request mixed with a blob or tree object, like
 497         * "grep -e foo v1.0.0:Documentation/ master..next"
 498         * so detect that and complain.
 499         */
 500        for (list = object_list; list; list = list->next) {
 501                struct object *real_obj;
 502                real_obj = deref_tag(list->item, NULL, 0);
 503                if (strcmp(real_obj->type, commit_type))
 504                        seen_noncommit = 1;
 505        }
 506        if (cached)
 507                die("both --cached and revisions given.");
 508
 509        for (list = object_list; list; list = list->next) {
 510                struct object *real_obj;
 511                real_obj = deref_tag(list->item, NULL, 0);
 512                if (grep_object(&opt, paths, real_obj, list->name))
 513                        hit = 1;
 514        }
 515        return !hit;
 516}