653b65ea1040a6d4e41f0c32df6c6766e04bc8d0
   1/*
   2 * Builtin "git grep"
   3 *
   4 * Copyright (c) 2006 Junio C Hamano
   5 */
   6#include "cache.h"
   7#include "blob.h"
   8#include "tree.h"
   9#include "commit.h"
  10#include "tag.h"
  11#include "diff.h"
  12#include "revision.h"
  13#include "builtin.h"
  14#include <regex.h>
  15#include <fnmatch.h>
  16
  17/*
  18 * git grep pathspecs are somewhat different from diff-tree pathspecs;
  19 * pathname wildcards are allowed.
  20 */
  21static int pathspec_matches(struct diff_options *opt, const char *name)
  22{
  23        int namelen, i;
  24        if (!opt->nr_paths)
  25                return 1;
  26        namelen = strlen(name);
  27        for (i = 0; i < opt->nr_paths; i++) {
  28                const char *match = opt->paths[i];
  29                int matchlen = opt->pathlens[i];
  30                const char *slash, *cp;
  31
  32                if ((matchlen <= namelen) &&
  33                    !strncmp(name, match, matchlen) &&
  34                    (match[matchlen-1] == '/' ||
  35                     name[matchlen] == '\0' || name[matchlen] == '/'))
  36                        return 1;
  37                if (!fnmatch(match, name, 0))
  38                        return 1;
  39                if (name[namelen-1] != '/')
  40                        continue;
  41
  42                /* We are being asked if the name directory is worth
  43                 * descending into.
  44                 *
  45                 * Find the longest leading directory name that does
  46                 * not have metacharacter in the pathspec; the name
  47                 * we are looking at must overlap with that directory.
  48                 */
  49                for (cp = match, slash = NULL; cp - match < matchlen; cp++) {
  50                        char ch = *cp;
  51                        if (ch == '/')
  52                                slash = cp;
  53                        if (ch == '*' || ch == '[')
  54                                break;
  55                }
  56                if (!slash)
  57                        slash = match; /* toplevel */
  58                else
  59                        slash++;
  60                if (namelen <= slash - match) {
  61                        /* Looking at "Documentation/" and
  62                         * the pattern says "Documentation/howto/", or
  63                         * "Documentation/diff*.txt".
  64                         */
  65                        if (!memcmp(match, name, namelen))
  66                                return 1;
  67                }
  68                else {
  69                        /* Looking at "Documentation/howto/" and
  70                         * the pattern says "Documentation/h*".
  71                         */
  72                        if (!memcmp(match, name, slash - match))
  73                                return 1;
  74                }
  75        }
  76        return 0;
  77}
  78
  79struct grep_opt {
  80        const char *pattern;
  81        regex_t regexp;
  82        unsigned linenum:1;
  83        unsigned invert:1;
  84        int regflags;
  85        unsigned pre_context;
  86        unsigned post_context;
  87};
  88
  89static char *end_of_line(char *cp, unsigned long *left)
  90{
  91        unsigned long l = *left;
  92        while (l && *cp != '\n') {
  93                l--;
  94                cp++;
  95        }
  96        *left = l;
  97        return cp;
  98}
  99
 100static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 101                      const char *name, unsigned lno, char sign)
 102{
 103        printf("%s%c", name, sign);
 104        if (opt->linenum)
 105                printf("%d%c", lno, sign);
 106        printf("%.*s\n", eol-bol, bol);
 107}
 108
 109static int grep_buffer(struct grep_opt *opt, const char *name,
 110                       char *buf, unsigned long size)
 111{
 112        char *bol = buf;
 113        unsigned long left = size;
 114        unsigned lno = 1;
 115        struct pre_context_line {
 116                char *bol;
 117                char *eol;
 118        } *prev = NULL, *pcl;
 119        unsigned last_hit = 0;
 120        unsigned last_shown = 0;
 121        const char *hunk_mark = "";
 122
 123        if (opt->pre_context)
 124                prev = xcalloc(opt->pre_context, sizeof(*prev));
 125        if (opt->pre_context || opt->post_context)
 126                hunk_mark = "--\n";
 127
 128        while (left) {
 129                regmatch_t pmatch[10];
 130                char *eol, ch;
 131                int hit;
 132
 133                eol = end_of_line(bol, &left);
 134                ch = *eol;
 135                *eol = 0;
 136
 137                hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch),
 138                               pmatch, 0);
 139                if (opt->invert)
 140                        hit = !hit;
 141                if (hit) {
 142                        /* Hit at this line.  If we haven't shown the
 143                         * pre-context lines, we would need to show them.
 144                         */
 145                        if (opt->pre_context) {
 146                                unsigned from;
 147                                if (opt->pre_context < lno)
 148                                        from = lno - opt->pre_context;
 149                                else
 150                                        from = 1;
 151                                if (from <= last_shown)
 152                                        from = last_shown + 1;
 153                                if (last_shown && from != last_shown + 1)
 154                                        printf(hunk_mark);
 155                                while (from < lno) {
 156                                        pcl = &prev[lno-from-1];
 157                                        show_line(opt, pcl->bol, pcl->eol,
 158                                                  name, from, '-');
 159                                        from++;
 160                                }
 161                                last_shown = lno-1;
 162                        }
 163                        if (last_shown && lno != last_shown + 1)
 164                                printf(hunk_mark);
 165                        show_line(opt, bol, eol, name, lno, ':');
 166                        last_shown = last_hit = lno;
 167                }
 168                else if (last_hit &&
 169                         lno <= last_hit + opt->post_context) {
 170                        /* If the last hit is within the post context,
 171                         * we need to show this line.
 172                         */
 173                        if (last_shown && lno != last_shown + 1)
 174                                printf(hunk_mark);
 175                        show_line(opt, bol, eol, name, lno, '-');
 176                        last_shown = lno;
 177                }
 178                if (opt->pre_context) {
 179                        memmove(prev+1, prev,
 180                                (opt->pre_context-1) * sizeof(*prev));
 181                        prev->bol = bol;
 182                        prev->eol = eol;
 183                }
 184                *eol = ch;
 185                bol = eol + 1;
 186                left--;
 187                lno++;
 188        }
 189        return !!last_hit;
 190}
 191
 192static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
 193{
 194        unsigned long size;
 195        char *data;
 196        char type[20];
 197        int hit;
 198        data = read_sha1_file(sha1, type, &size);
 199        if (!data) {
 200                error("'%s': unable to read %s", name, sha1_to_hex(sha1));
 201                return 0;
 202        }
 203        hit = grep_buffer(opt, name, data, size);
 204        free(data);
 205        return hit;
 206}
 207
 208static int grep_file(struct grep_opt *opt, const char *filename)
 209{
 210        struct stat st;
 211        int i;
 212        char *data;
 213        if (lstat(filename, &st) < 0) {
 214        err_ret:
 215                if (errno != ENOENT)
 216                        error("'%s': %s", filename, strerror(errno));
 217                return 0;
 218        }
 219        if (!st.st_size)
 220                return 0; /* empty file -- no grep hit */
 221        if (!S_ISREG(st.st_mode))
 222                return 0;
 223        i = open(filename, O_RDONLY);
 224        if (i < 0)
 225                goto err_ret;
 226        data = xmalloc(st.st_size + 1);
 227        if (st.st_size != xread(i, data, st.st_size)) {
 228                error("'%s': short read %s", filename, strerror(errno));
 229                close(i);
 230                free(data);
 231                return 0;
 232        }
 233        close(i);
 234        i = grep_buffer(opt, filename, data, st.st_size);
 235        free(data);
 236        return i;
 237}
 238
 239static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached)
 240{
 241        int hit = 0;
 242        int nr;
 243        read_cache();
 244
 245        for (nr = 0; nr < active_nr; nr++) {
 246                struct cache_entry *ce = active_cache[nr];
 247                if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
 248                        continue;
 249                if (!pathspec_matches(&revs->diffopt, ce->name))
 250                        continue;
 251                if (cached)
 252                        hit |= grep_sha1(opt, ce->sha1, ce->name);
 253                else
 254                        hit |= grep_file(opt, ce->name);
 255        }
 256        return hit;
 257}
 258
 259static int grep_tree(struct grep_opt *opt, struct rev_info *revs,
 260                     struct tree_desc *tree,
 261                     const char *tree_name, const char *base)
 262{
 263        unsigned mode;
 264        int len;
 265        int hit = 0;
 266        const char *path;
 267        const unsigned char *sha1;
 268        char *down;
 269        char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
 270
 271        if (tree_name[0]) {
 272                int offset = sprintf(path_buf, "%s:", tree_name);
 273                down = path_buf + offset;
 274                strcat(down, base);
 275        }
 276        else {
 277                down = path_buf;
 278                strcpy(down, base);
 279        }
 280        len = strlen(path_buf);
 281
 282        while (tree->size) {
 283                int pathlen;
 284                sha1 = tree_entry_extract(tree, &path, &mode);
 285                pathlen = strlen(path);
 286                strcpy(path_buf + len, path);
 287
 288                if (S_ISDIR(mode))
 289                        /* Match "abc/" against pathspec to
 290                         * decide if we want to descend into "abc"
 291                         * directory.
 292                         */
 293                        strcpy(path_buf + len + pathlen, "/");
 294
 295                if (!pathspec_matches(&revs->diffopt, down))
 296                        ;
 297                else if (S_ISREG(mode))
 298                        hit |= grep_sha1(opt, sha1, path_buf);
 299                else if (S_ISDIR(mode)) {
 300                        char type[20];
 301                        struct tree_desc sub;
 302                        void *data;
 303                        data = read_sha1_file(sha1, type, &sub.size);
 304                        if (!data)
 305                                die("unable to read tree (%s)",
 306                                    sha1_to_hex(sha1));
 307                        sub.buf = data;
 308                        hit |= grep_tree(opt, revs, &sub, tree_name, down);
 309                        free(data);
 310                }
 311                update_tree_entry(tree);
 312        }
 313        return hit;
 314}
 315
 316static int grep_object(struct grep_opt *opt, struct rev_info *revs,
 317                       struct object *obj, const char *name)
 318{
 319        if (!strcmp(obj->type, blob_type))
 320                return grep_sha1(opt, obj->sha1, name);
 321        if (!strcmp(obj->type, commit_type) ||
 322            !strcmp(obj->type, tree_type)) {
 323                struct tree_desc tree;
 324                void *data;
 325                int hit;
 326                data = read_object_with_reference(obj->sha1, tree_type,
 327                                                  &tree.size, NULL);
 328                if (!data)
 329                        die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
 330                tree.buf = data;
 331                hit = grep_tree(opt, revs, &tree, name, "");
 332                free(data);
 333                return hit;
 334        }
 335        die("unable to grep from object of type %s", obj->type);
 336}
 337
 338static const char builtin_grep_usage[] =
 339"git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
 340
 341int cmd_grep(int argc, const char **argv, char **envp)
 342{
 343        struct rev_info rev;
 344        const char **dst, **src;
 345        int err;
 346        int hit = 0;
 347        int no_more_arg = 0;
 348        int seen_range = 0;
 349        int seen_noncommit = 0;
 350        int cached = 0;
 351        struct grep_opt opt;
 352        struct object_list *list;
 353
 354        memset(&opt, 0, sizeof(opt));
 355        opt.regflags = REG_NEWLINE;
 356
 357        /*
 358         * Interpret and remove the grep options upfront.  Sigh...
 359         */
 360        for (dst = src = &argv[1]; src < argc + argv; ) {
 361                const char *arg = *src++;
 362                if (!no_more_arg) {
 363                        if (!strcmp("--", arg)) {
 364                                no_more_arg = 1;
 365                                *dst++ = arg;
 366                                continue;
 367                        }
 368                        if (!strcmp("--cached", arg)) {
 369                                cached = 1;
 370                                continue;
 371                        }
 372                        if (!strcmp("-i", arg) ||
 373                            !strcmp("--ignore-case", arg)) {
 374                                opt.regflags |= REG_ICASE;
 375                                continue;
 376                        }
 377                        if (!strcmp("-v", arg) ||
 378                            !strcmp("--invert-match", arg)) {
 379                                opt.invert = 1;
 380                                continue;
 381                        }
 382                        if (!strcmp("-E", arg) ||
 383                            !strcmp("--extended-regexp", arg)) {
 384                                opt.regflags |= REG_EXTENDED;
 385                                continue;
 386                        }
 387                        if (!strcmp("-G", arg) ||
 388                            !strcmp("--basic-regexp", arg)) {
 389                                opt.regflags &= ~REG_EXTENDED;
 390                                continue;
 391                        }
 392                        if (!strcmp("-e", arg)) {
 393                                if (src < argc + argv) {
 394                                        opt.pattern = *src++;
 395                                        continue;
 396                                }
 397                                usage(builtin_grep_usage);
 398                        }
 399                        if (!strcmp("-n", arg)) {
 400                                opt.linenum = 1;
 401                                continue;
 402                        }
 403                        if (!strcmp("-H", arg)) {
 404                                /* We always show the pathname, so this
 405                                 * is a noop.
 406                                 */
 407                                continue;
 408                        }
 409                        if (!strcmp("-A", arg) ||
 410                            !strcmp("-B", arg) ||
 411                            !strcmp("-C", arg)) {
 412                                unsigned num;
 413                                if ((argc + argv <= src) ||
 414                                    sscanf(*src++, "%u", &num) != 1)
 415                                        usage(builtin_grep_usage);
 416                                switch (arg[1]) {
 417                                case 'A':
 418                                        opt.post_context = num;
 419                                        break;
 420                                case 'C':
 421                                        opt.post_context = num;
 422                                case 'B':
 423                                        opt.pre_context = num;
 424                                        break;
 425                                }
 426                                continue;
 427                        }
 428                }
 429                *dst++ = arg;
 430        }
 431        if (!opt.pattern)
 432                die("no pattern given.");
 433
 434        err = regcomp(&opt.regexp, opt.pattern, opt.regflags);
 435        if (err) {
 436                char errbuf[1024];
 437                regerror(err, &opt.regexp, errbuf, 1024);
 438                regfree(&opt.regexp);
 439                die("'%s': %s", opt.pattern, errbuf);
 440        }
 441
 442        init_revisions(&rev);
 443        *dst = NULL;
 444        argc = setup_revisions(dst - argv, argv, &rev, NULL);
 445
 446        /*
 447         * Do not walk "grep -e foo master next pu -- Documentation/"
 448         * but do walk "grep -e foo master..next -- Documentation/".
 449         * Ranged request mixed with a blob or tree object, like
 450         * "grep -e foo v1.0.0:Documentation/ master..next"
 451         * so detect that and complain.
 452         */
 453        for (list = rev.pending_objects; list; list = list->next) {
 454                struct object *real_obj;
 455                if (list->item->flags & UNINTERESTING)
 456                        seen_range = 1;
 457                real_obj = deref_tag(list->item, NULL, 0);
 458                if (strcmp(real_obj->type, commit_type))
 459                        seen_noncommit = 1;
 460        }
 461        if (!rev.pending_objects)
 462                return !grep_cache(&opt, &rev, cached);
 463        if (cached)
 464                die("both --cached and revisions given.");
 465
 466        if (seen_range && seen_noncommit)
 467                die("both A..B and non commit are given.");
 468        if (seen_range) {
 469                struct commit *commit;
 470                prepare_revision_walk(&rev);
 471                while ((commit = get_revision(&rev)) != NULL) {
 472                        unsigned char *sha1 = commit->object.sha1;
 473                        const char *n = find_unique_abbrev(sha1, rev.abbrev);
 474                        char rev_name[41];
 475                        strcpy(rev_name, n);
 476                        if (grep_object(&opt, &rev, &commit->object, rev_name))
 477                                hit = 1;
 478                        commit->buffer = NULL;
 479                }
 480                return !hit;
 481        }
 482
 483        /* all of them are non-commit; do not walk, and
 484         * do not lose their names.
 485         */
 486        for (list = rev.pending_objects; list; list = list->next) {
 487                struct object *real_obj;
 488                real_obj = deref_tag(list->item, NULL, 0);
 489                if (grep_object(&opt, &rev, real_obj, list->name))
 490                        hit = 1;
 491        }
 492        return !hit;
 493}