builtin-grep.con commit Merge branch 'maint' (e866ffd)
   1/*
   2 * Builtin "git grep"
   3 *
   4 * Copyright (c) 2006 Junio C Hamano
   5 */
   6#include "cache.h"
   7#include "blob.h"
   8#include "tree.h"
   9#include "commit.h"
  10#include "tag.h"
  11#include "tree-walk.h"
  12#include "builtin.h"
  13#include <regex.h>
  14#include <fnmatch.h>
  15#include <sys/wait.h>
  16
  17/*
  18 * git grep pathspecs are somewhat different from diff-tree pathspecs;
  19 * pathname wildcards are allowed.
  20 */
  21static int pathspec_matches(const char **paths, const char *name)
  22{
  23        int namelen, i;
  24        if (!paths || !*paths)
  25                return 1;
  26        namelen = strlen(name);
  27        for (i = 0; paths[i]; i++) {
  28                const char *match = paths[i];
  29                int matchlen = strlen(match);
  30                const char *cp, *meta;
  31
  32                if (!matchlen ||
  33                    ((matchlen <= namelen) &&
  34                     !strncmp(name, match, matchlen) &&
  35                     (match[matchlen-1] == '/' ||
  36                      name[matchlen] == '\0' || name[matchlen] == '/')))
  37                        return 1;
  38                if (!fnmatch(match, name, 0))
  39                        return 1;
  40                if (name[namelen-1] != '/')
  41                        continue;
  42
  43                /* We are being asked if the directory ("name") is worth
  44                 * descending into.
  45                 *
  46                 * Find the longest leading directory name that does
  47                 * not have metacharacter in the pathspec; the name
  48                 * we are looking at must overlap with that directory.
  49                 */
  50                for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
  51                        char ch = *cp;
  52                        if (ch == '*' || ch == '[' || ch == '?') {
  53                                meta = cp;
  54                                break;
  55                        }
  56                }
  57                if (!meta)
  58                        meta = cp; /* fully literal */
  59
  60                if (namelen <= meta - match) {
  61                        /* Looking at "Documentation/" and
  62                         * the pattern says "Documentation/howto/", or
  63                         * "Documentation/diff*.txt".  The name we
  64                         * have should match prefix.
  65                         */
  66                        if (!memcmp(match, name, namelen))
  67                                return 1;
  68                        continue;
  69                }
  70
  71                if (meta - match < namelen) {
  72                        /* Looking at "Documentation/howto/" and
  73                         * the pattern says "Documentation/h*";
  74                         * match up to "Do.../h"; this avoids descending
  75                         * into "Documentation/technical/".
  76                         */
  77                        if (!memcmp(match, name, meta - match))
  78                                return 1;
  79                        continue;
  80                }
  81        }
  82        return 0;
  83}
  84
  85enum grep_pat_token {
  86        GREP_PATTERN,
  87        GREP_AND,
  88        GREP_OPEN_PAREN,
  89        GREP_CLOSE_PAREN,
  90        GREP_NOT,
  91        GREP_OR,
  92};
  93
  94struct grep_pat {
  95        struct grep_pat *next;
  96        const char *origin;
  97        int no;
  98        enum grep_pat_token token;
  99        const char *pattern;
 100        regex_t regexp;
 101};
 102
 103enum grep_expr_node {
 104        GREP_NODE_ATOM,
 105        GREP_NODE_NOT,
 106        GREP_NODE_AND,
 107        GREP_NODE_OR,
 108};
 109
 110struct grep_expr {
 111        enum grep_expr_node node;
 112        union {
 113                struct grep_pat *atom;
 114                struct grep_expr *unary;
 115                struct {
 116                        struct grep_expr *left;
 117                        struct grep_expr *right;
 118                } binary;
 119        } u;
 120};
 121
 122struct grep_opt {
 123        struct grep_pat *pattern_list;
 124        struct grep_pat **pattern_tail;
 125        struct grep_expr *pattern_expression;
 126        int prefix_length;
 127        regex_t regexp;
 128        unsigned linenum:1;
 129        unsigned invert:1;
 130        unsigned name_only:1;
 131        unsigned unmatch_name_only:1;
 132        unsigned count:1;
 133        unsigned word_regexp:1;
 134        unsigned fixed:1;
 135#define GREP_BINARY_DEFAULT     0
 136#define GREP_BINARY_NOMATCH     1
 137#define GREP_BINARY_TEXT        2
 138        unsigned binary:2;
 139        unsigned extended:1;
 140        unsigned relative:1;
 141        int regflags;
 142        unsigned pre_context;
 143        unsigned post_context;
 144};
 145
 146static void add_pattern(struct grep_opt *opt, const char *pat,
 147                        const char *origin, int no, enum grep_pat_token t)
 148{
 149        struct grep_pat *p = xcalloc(1, sizeof(*p));
 150        p->pattern = pat;
 151        p->origin = origin;
 152        p->no = no;
 153        p->token = t;
 154        *opt->pattern_tail = p;
 155        opt->pattern_tail = &p->next;
 156        p->next = NULL;
 157}
 158
 159static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 160{
 161        int err = regcomp(&p->regexp, p->pattern, opt->regflags);
 162        if (err) {
 163                char errbuf[1024];
 164                char where[1024];
 165                if (p->no)
 166                        sprintf(where, "In '%s' at %d, ",
 167                                p->origin, p->no);
 168                else if (p->origin)
 169                        sprintf(where, "%s, ", p->origin);
 170                else
 171                        where[0] = 0;
 172                regerror(err, &p->regexp, errbuf, 1024);
 173                regfree(&p->regexp);
 174                die("%s'%s': %s", where, p->pattern, errbuf);
 175        }
 176}
 177
 178static struct grep_expr *compile_pattern_expr(struct grep_pat **);
 179static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 180{
 181        struct grep_pat *p;
 182        struct grep_expr *x;
 183
 184        p = *list;
 185        switch (p->token) {
 186        case GREP_PATTERN: /* atom */
 187                x = xcalloc(1, sizeof (struct grep_expr));
 188                x->node = GREP_NODE_ATOM;
 189                x->u.atom = p;
 190                *list = p->next;
 191                return x;
 192        case GREP_OPEN_PAREN:
 193                *list = p->next;
 194                x = compile_pattern_expr(list);
 195                if (!x)
 196                        return NULL;
 197                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 198                        die("unmatched parenthesis");
 199                *list = (*list)->next;
 200                return x;
 201        default:
 202                return NULL;
 203        }
 204}
 205
 206static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 207{
 208        struct grep_pat *p;
 209        struct grep_expr *x;
 210
 211        p = *list;
 212        switch (p->token) {
 213        case GREP_NOT:
 214                if (!p->next)
 215                        die("--not not followed by pattern expression");
 216                *list = p->next;
 217                x = xcalloc(1, sizeof (struct grep_expr));
 218                x->node = GREP_NODE_NOT;
 219                x->u.unary = compile_pattern_not(list);
 220                if (!x->u.unary)
 221                        die("--not followed by non pattern expression");
 222                return x;
 223        default:
 224                return compile_pattern_atom(list);
 225        }
 226}
 227
 228static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 229{
 230        struct grep_pat *p;
 231        struct grep_expr *x, *y, *z;
 232
 233        x = compile_pattern_not(list);
 234        p = *list;
 235        if (p && p->token == GREP_AND) {
 236                if (!p->next)
 237                        die("--and not followed by pattern expression");
 238                *list = p->next;
 239                y = compile_pattern_and(list);
 240                if (!y)
 241                        die("--and not followed by pattern expression");
 242                z = xcalloc(1, sizeof (struct grep_expr));
 243                z->node = GREP_NODE_AND;
 244                z->u.binary.left = x;
 245                z->u.binary.right = y;
 246                return z;
 247        }
 248        return x;
 249}
 250
 251static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 252{
 253        struct grep_pat *p;
 254        struct grep_expr *x, *y, *z;
 255
 256        x = compile_pattern_and(list);
 257        p = *list;
 258        if (x && p && p->token != GREP_CLOSE_PAREN) {
 259                y = compile_pattern_or(list);
 260                if (!y)
 261                        die("not a pattern expression %s", p->pattern);
 262                z = xcalloc(1, sizeof (struct grep_expr));
 263                z->node = GREP_NODE_OR;
 264                z->u.binary.left = x;
 265                z->u.binary.right = y;
 266                return z;
 267        }
 268        return x;
 269}
 270
 271static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 272{
 273        return compile_pattern_or(list);
 274}
 275
 276static void compile_patterns(struct grep_opt *opt)
 277{
 278        struct grep_pat *p;
 279
 280        /* First compile regexps */
 281        for (p = opt->pattern_list; p; p = p->next) {
 282                if (p->token == GREP_PATTERN)
 283                        compile_regexp(p, opt);
 284                else
 285                        opt->extended = 1;
 286        }
 287
 288        if (!opt->extended)
 289                return;
 290
 291        /* Then bundle them up in an expression.
 292         * A classic recursive descent parser would do.
 293         */
 294        p = opt->pattern_list;
 295        opt->pattern_expression = compile_pattern_expr(&p);
 296#if DEBUG
 297        dump_pattern_exp(opt->pattern_expression, 0);
 298#endif
 299        if (p)
 300                die("incomplete pattern expression: %s", p->pattern);
 301}
 302
 303static char *end_of_line(char *cp, unsigned long *left)
 304{
 305        unsigned long l = *left;
 306        while (l && *cp != '\n') {
 307                l--;
 308                cp++;
 309        }
 310        *left = l;
 311        return cp;
 312}
 313
 314static int word_char(char ch)
 315{
 316        return isalnum(ch) || ch == '_';
 317}
 318
 319static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 320                      const char *name, unsigned lno, char sign)
 321{
 322        printf("%s%c", name, sign);
 323        if (opt->linenum)
 324                printf("%d%c", lno, sign);
 325        printf("%.*s\n", (int)(eol-bol), bol);
 326}
 327
 328/*
 329 * NEEDSWORK: share code with diff.c
 330 */
 331#define FIRST_FEW_BYTES 8000
 332static int buffer_is_binary(const char *ptr, unsigned long size)
 333{
 334        if (FIRST_FEW_BYTES < size)
 335                size = FIRST_FEW_BYTES;
 336        return !!memchr(ptr, 0, size);
 337}
 338
 339static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 340{
 341        char *hit = strstr(line, pattern);
 342        if (!hit) {
 343                match->rm_so = match->rm_eo = -1;
 344                return REG_NOMATCH;
 345        }
 346        else {
 347                match->rm_so = hit - line;
 348                match->rm_eo = match->rm_so + strlen(pattern);
 349                return 0;
 350        }
 351}
 352
 353static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol)
 354{
 355        int hit = 0;
 356        int at_true_bol = 1;
 357        regmatch_t pmatch[10];
 358
 359 again:
 360        if (!opt->fixed) {
 361                regex_t *exp = &p->regexp;
 362                hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
 363                               pmatch, 0);
 364        }
 365        else {
 366                hit = !fixmatch(p->pattern, bol, pmatch);
 367        }
 368
 369        if (hit && opt->word_regexp) {
 370                if ((pmatch[0].rm_so < 0) ||
 371                    (eol - bol) <= pmatch[0].rm_so ||
 372                    (pmatch[0].rm_eo < 0) ||
 373                    (eol - bol) < pmatch[0].rm_eo)
 374                        die("regexp returned nonsense");
 375
 376                /* Match beginning must be either beginning of the
 377                 * line, or at word boundary (i.e. the last char must
 378                 * not be a word char).  Similarly, match end must be
 379                 * either end of the line, or at word boundary
 380                 * (i.e. the next char must not be a word char).
 381                 */
 382                if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
 383                      !word_char(bol[pmatch[0].rm_so-1])) &&
 384                     ((pmatch[0].rm_eo == (eol-bol)) ||
 385                      !word_char(bol[pmatch[0].rm_eo])) )
 386                        ;
 387                else
 388                        hit = 0;
 389
 390                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
 391                        /* There could be more than one match on the
 392                         * line, and the first match might not be
 393                         * strict word match.  But later ones could be!
 394                         */
 395                        bol = pmatch[0].rm_so + bol + 1;
 396                        at_true_bol = 0;
 397                        goto again;
 398                }
 399        }
 400        return hit;
 401}
 402
 403static int match_expr_eval(struct grep_opt *opt,
 404                           struct grep_expr *x,
 405                           char *bol, char *eol)
 406{
 407        switch (x->node) {
 408        case GREP_NODE_ATOM:
 409                return match_one_pattern(opt, x->u.atom, bol, eol);
 410                break;
 411        case GREP_NODE_NOT:
 412                return !match_expr_eval(opt, x->u.unary, bol, eol);
 413        case GREP_NODE_AND:
 414                return (match_expr_eval(opt, x->u.binary.left, bol, eol) &&
 415                        match_expr_eval(opt, x->u.binary.right, bol, eol));
 416        case GREP_NODE_OR:
 417                return (match_expr_eval(opt, x->u.binary.left, bol, eol) ||
 418                        match_expr_eval(opt, x->u.binary.right, bol, eol));
 419        }
 420        die("Unexpected node type (internal error) %d\n", x->node);
 421}
 422
 423static int match_expr(struct grep_opt *opt, char *bol, char *eol)
 424{
 425        struct grep_expr *x = opt->pattern_expression;
 426        return match_expr_eval(opt, x, bol, eol);
 427}
 428
 429static int match_line(struct grep_opt *opt, char *bol, char *eol)
 430{
 431        struct grep_pat *p;
 432        if (opt->extended)
 433                return match_expr(opt, bol, eol);
 434        for (p = opt->pattern_list; p; p = p->next) {
 435                if (match_one_pattern(opt, p, bol, eol))
 436                        return 1;
 437        }
 438        return 0;
 439}
 440
 441static int grep_buffer(struct grep_opt *opt, const char *name,
 442                       char *buf, unsigned long size)
 443{
 444        char *bol = buf;
 445        unsigned long left = size;
 446        unsigned lno = 1;
 447        struct pre_context_line {
 448                char *bol;
 449                char *eol;
 450        } *prev = NULL, *pcl;
 451        unsigned last_hit = 0;
 452        unsigned last_shown = 0;
 453        int binary_match_only = 0;
 454        const char *hunk_mark = "";
 455        unsigned count = 0;
 456
 457        if (buffer_is_binary(buf, size)) {
 458                switch (opt->binary) {
 459                case GREP_BINARY_DEFAULT:
 460                        binary_match_only = 1;
 461                        break;
 462                case GREP_BINARY_NOMATCH:
 463                        return 0; /* Assume unmatch */
 464                        break;
 465                default:
 466                        break;
 467                }
 468        }
 469
 470        if (opt->pre_context)
 471                prev = xcalloc(opt->pre_context, sizeof(*prev));
 472        if (opt->pre_context || opt->post_context)
 473                hunk_mark = "--\n";
 474
 475        while (left) {
 476                char *eol, ch;
 477                int hit = 0;
 478
 479                eol = end_of_line(bol, &left);
 480                ch = *eol;
 481                *eol = 0;
 482
 483                hit = match_line(opt, bol, eol);
 484
 485                /* "grep -v -e foo -e bla" should list lines
 486                 * that do not have either, so inversion should
 487                 * be done outside.
 488                 */
 489                if (opt->invert)
 490                        hit = !hit;
 491                if (opt->unmatch_name_only) {
 492                        if (hit)
 493                                return 0;
 494                        goto next_line;
 495                }
 496                if (hit) {
 497                        count++;
 498                        if (binary_match_only) {
 499                                printf("Binary file %s matches\n", name);
 500                                return 1;
 501                        }
 502                        if (opt->name_only) {
 503                                printf("%s\n", name);
 504                                return 1;
 505                        }
 506                        /* Hit at this line.  If we haven't shown the
 507                         * pre-context lines, we would need to show them.
 508                         * When asked to do "count", this still show
 509                         * the context which is nonsense, but the user
 510                         * deserves to get that ;-).
 511                         */
 512                        if (opt->pre_context) {
 513                                unsigned from;
 514                                if (opt->pre_context < lno)
 515                                        from = lno - opt->pre_context;
 516                                else
 517                                        from = 1;
 518                                if (from <= last_shown)
 519                                        from = last_shown + 1;
 520                                if (last_shown && from != last_shown + 1)
 521                                        printf(hunk_mark);
 522                                while (from < lno) {
 523                                        pcl = &prev[lno-from-1];
 524                                        show_line(opt, pcl->bol, pcl->eol,
 525                                                  name, from, '-');
 526                                        from++;
 527                                }
 528                                last_shown = lno-1;
 529                        }
 530                        if (last_shown && lno != last_shown + 1)
 531                                printf(hunk_mark);
 532                        if (!opt->count)
 533                                show_line(opt, bol, eol, name, lno, ':');
 534                        last_shown = last_hit = lno;
 535                }
 536                else if (last_hit &&
 537                         lno <= last_hit + opt->post_context) {
 538                        /* If the last hit is within the post context,
 539                         * we need to show this line.
 540                         */
 541                        if (last_shown && lno != last_shown + 1)
 542                                printf(hunk_mark);
 543                        show_line(opt, bol, eol, name, lno, '-');
 544                        last_shown = lno;
 545                }
 546                if (opt->pre_context) {
 547                        memmove(prev+1, prev,
 548                                (opt->pre_context-1) * sizeof(*prev));
 549                        prev->bol = bol;
 550                        prev->eol = eol;
 551                }
 552
 553        next_line:
 554                *eol = ch;
 555                bol = eol + 1;
 556                if (!left)
 557                        break;
 558                left--;
 559                lno++;
 560        }
 561
 562        if (opt->unmatch_name_only) {
 563                /* We did not see any hit, so we want to show this */
 564                printf("%s\n", name);
 565                return 1;
 566        }
 567
 568        /* NEEDSWORK:
 569         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
 570         * which feels mostly useless but sometimes useful.  Maybe
 571         * make it another option?  For now suppress them.
 572         */
 573        if (opt->count && count)
 574                printf("%s:%u\n", name, count);
 575        return !!last_hit;
 576}
 577
 578static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
 579{
 580        unsigned long size;
 581        char *data;
 582        char type[20];
 583        char *to_free = NULL;
 584        int hit;
 585
 586        data = read_sha1_file(sha1, type, &size);
 587        if (!data) {
 588                error("'%s': unable to read %s", name, sha1_to_hex(sha1));
 589                return 0;
 590        }
 591        if (opt->relative && opt->prefix_length) {
 592                static char name_buf[PATH_MAX];
 593                char *cp;
 594                int name_len = strlen(name) - opt->prefix_length + 1;
 595
 596                if (!tree_name_len)
 597                        name += opt->prefix_length;
 598                else {
 599                        if (ARRAY_SIZE(name_buf) <= name_len)
 600                                cp = to_free = xmalloc(name_len);
 601                        else
 602                                cp = name_buf;
 603                        memcpy(cp, name, tree_name_len);
 604                        strcpy(cp + tree_name_len,
 605                               name + tree_name_len + opt->prefix_length);
 606                        name = cp;
 607                }
 608        }
 609        hit = grep_buffer(opt, name, data, size);
 610        free(data);
 611        free(to_free);
 612        return hit;
 613}
 614
 615static int grep_file(struct grep_opt *opt, const char *filename)
 616{
 617        struct stat st;
 618        int i;
 619        char *data;
 620        if (lstat(filename, &st) < 0) {
 621        err_ret:
 622                if (errno != ENOENT)
 623                        error("'%s': %s", filename, strerror(errno));
 624                return 0;
 625        }
 626        if (!st.st_size)
 627                return 0; /* empty file -- no grep hit */
 628        if (!S_ISREG(st.st_mode))
 629                return 0;
 630        i = open(filename, O_RDONLY);
 631        if (i < 0)
 632                goto err_ret;
 633        data = xmalloc(st.st_size + 1);
 634        if (st.st_size != xread(i, data, st.st_size)) {
 635                error("'%s': short read %s", filename, strerror(errno));
 636                close(i);
 637                free(data);
 638                return 0;
 639        }
 640        close(i);
 641        if (opt->relative && opt->prefix_length)
 642                filename += opt->prefix_length;
 643        i = grep_buffer(opt, filename, data, st.st_size);
 644        free(data);
 645        return i;
 646}
 647
 648static int exec_grep(int argc, const char **argv)
 649{
 650        pid_t pid;
 651        int status;
 652
 653        argv[argc] = NULL;
 654        pid = fork();
 655        if (pid < 0)
 656                return pid;
 657        if (!pid) {
 658                execvp("grep", (char **) argv);
 659                exit(255);
 660        }
 661        while (waitpid(pid, &status, 0) < 0) {
 662                if (errno == EINTR)
 663                        continue;
 664                return -1;
 665        }
 666        if (WIFEXITED(status)) {
 667                if (!WEXITSTATUS(status))
 668                        return 1;
 669                return 0;
 670        }
 671        return -1;
 672}
 673
 674#define MAXARGS 1000
 675#define ARGBUF 4096
 676#define push_arg(a) do { \
 677        if (nr < MAXARGS) argv[nr++] = (a); \
 678        else die("maximum number of args exceeded"); \
 679        } while (0)
 680
 681static int external_grep(struct grep_opt *opt, const char **paths, int cached)
 682{
 683        int i, nr, argc, hit, len, status;
 684        const char *argv[MAXARGS+1];
 685        char randarg[ARGBUF];
 686        char *argptr = randarg;
 687        struct grep_pat *p;
 688
 689        if (opt->extended || (opt->relative && opt->prefix_length))
 690                return -1;
 691        len = nr = 0;
 692        push_arg("grep");
 693        if (opt->fixed)
 694                push_arg("-F");
 695        if (opt->linenum)
 696                push_arg("-n");
 697        if (opt->regflags & REG_EXTENDED)
 698                push_arg("-E");
 699        if (opt->regflags & REG_ICASE)
 700                push_arg("-i");
 701        if (opt->word_regexp)
 702                push_arg("-w");
 703        if (opt->name_only)
 704                push_arg("-l");
 705        if (opt->unmatch_name_only)
 706                push_arg("-L");
 707        if (opt->count)
 708                push_arg("-c");
 709        if (opt->post_context || opt->pre_context) {
 710                if (opt->post_context != opt->pre_context) {
 711                        if (opt->pre_context) {
 712                                push_arg("-B");
 713                                len += snprintf(argptr, sizeof(randarg)-len,
 714                                                "%u", opt->pre_context);
 715                                if (sizeof(randarg) <= len)
 716                                        die("maximum length of args exceeded");
 717                                push_arg(argptr);
 718                                argptr += len;
 719                        }
 720                        if (opt->post_context) {
 721                                push_arg("-A");
 722                                len += snprintf(argptr, sizeof(randarg)-len,
 723                                                "%u", opt->post_context);
 724                                if (sizeof(randarg) <= len)
 725                                        die("maximum length of args exceeded");
 726                                push_arg(argptr);
 727                                argptr += len;
 728                        }
 729                }
 730                else {
 731                        push_arg("-C");
 732                        len += snprintf(argptr, sizeof(randarg)-len,
 733                                        "%u", opt->post_context);
 734                        if (sizeof(randarg) <= len)
 735                                die("maximum length of args exceeded");
 736                        push_arg(argptr);
 737                        argptr += len;
 738                }
 739        }
 740        for (p = opt->pattern_list; p; p = p->next) {
 741                push_arg("-e");
 742                push_arg(p->pattern);
 743        }
 744
 745        /*
 746         * To make sure we get the header printed out when we want it,
 747         * add /dev/null to the paths to grep.  This is unnecessary
 748         * (and wrong) with "-l" or "-L", which always print out the
 749         * name anyway.
 750         *
 751         * GNU grep has "-H", but this is portable.
 752         */
 753        if (!opt->name_only && !opt->unmatch_name_only)
 754                push_arg("/dev/null");
 755
 756        hit = 0;
 757        argc = nr;
 758        for (i = 0; i < active_nr; i++) {
 759                struct cache_entry *ce = active_cache[i];
 760                char *name;
 761                if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
 762                        continue;
 763                if (!pathspec_matches(paths, ce->name))
 764                        continue;
 765                name = ce->name;
 766                if (name[0] == '-') {
 767                        int len = ce_namelen(ce);
 768                        name = xmalloc(len + 3);
 769                        memcpy(name, "./", 2);
 770                        memcpy(name + 2, ce->name, len + 1);
 771                }
 772                argv[argc++] = name;
 773                if (argc < MAXARGS)
 774                        continue;
 775                status = exec_grep(argc, argv);
 776                if (0 < status)
 777                        hit = 1;
 778                argc = nr;
 779        }
 780        if (argc > nr) {
 781                status = exec_grep(argc, argv);
 782                if (0 < status)
 783                        hit = 1;
 784        }
 785        return hit;
 786}
 787
 788static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
 789{
 790        int hit = 0;
 791        int nr;
 792        read_cache();
 793
 794#ifdef __unix__
 795        /*
 796         * Use the external "grep" command for the case where
 797         * we grep through the checked-out files. It tends to
 798         * be a lot more optimized
 799         */
 800        if (!cached) {
 801                hit = external_grep(opt, paths, cached);
 802                if (hit >= 0)
 803                        return hit;
 804        }
 805#endif
 806
 807        for (nr = 0; nr < active_nr; nr++) {
 808                struct cache_entry *ce = active_cache[nr];
 809                if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
 810                        continue;
 811                if (!pathspec_matches(paths, ce->name))
 812                        continue;
 813                if (cached)
 814                        hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
 815                else
 816                        hit |= grep_file(opt, ce->name);
 817        }
 818        return hit;
 819}
 820
 821static int grep_tree(struct grep_opt *opt, const char **paths,
 822                     struct tree_desc *tree,
 823                     const char *tree_name, const char *base)
 824{
 825        int len;
 826        int hit = 0;
 827        struct name_entry entry;
 828        char *down;
 829        int tn_len = strlen(tree_name);
 830        char *path_buf = xmalloc(PATH_MAX + tn_len + 100);
 831
 832        if (tn_len) {
 833                tn_len = sprintf(path_buf, "%s:", tree_name);
 834                down = path_buf + tn_len;
 835                strcat(down, base);
 836        }
 837        else {
 838                down = path_buf;
 839                strcpy(down, base);
 840        }
 841        len = strlen(path_buf);
 842
 843        while (tree_entry(tree, &entry)) {
 844                strcpy(path_buf + len, entry.path);
 845
 846                if (S_ISDIR(entry.mode))
 847                        /* Match "abc/" against pathspec to
 848                         * decide if we want to descend into "abc"
 849                         * directory.
 850                         */
 851                        strcpy(path_buf + len + entry.pathlen, "/");
 852
 853                if (!pathspec_matches(paths, down))
 854                        ;
 855                else if (S_ISREG(entry.mode))
 856                        hit |= grep_sha1(opt, entry.sha1, path_buf, tn_len);
 857                else if (S_ISDIR(entry.mode)) {
 858                        char type[20];
 859                        struct tree_desc sub;
 860                        void *data;
 861                        data = read_sha1_file(entry.sha1, type, &sub.size);
 862                        if (!data)
 863                                die("unable to read tree (%s)",
 864                                    sha1_to_hex(entry.sha1));
 865                        sub.buf = data;
 866                        hit |= grep_tree(opt, paths, &sub, tree_name, down);
 867                        free(data);
 868                }
 869        }
 870        return hit;
 871}
 872
 873static int grep_object(struct grep_opt *opt, const char **paths,
 874                       struct object *obj, const char *name)
 875{
 876        if (obj->type == OBJ_BLOB)
 877                return grep_sha1(opt, obj->sha1, name, 0);
 878        if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
 879                struct tree_desc tree;
 880                void *data;
 881                int hit;
 882                data = read_object_with_reference(obj->sha1, tree_type,
 883                                                  &tree.size, NULL);
 884                if (!data)
 885                        die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
 886                tree.buf = data;
 887                hit = grep_tree(opt, paths, &tree, name, "");
 888                free(data);
 889                return hit;
 890        }
 891        die("unable to grep from object of type %s", typename(obj->type));
 892}
 893
 894static const char builtin_grep_usage[] =
 895"git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
 896
 897static const char emsg_invalid_context_len[] =
 898"%s: invalid context length argument";
 899static const char emsg_missing_context_len[] =
 900"missing context length argument";
 901static const char emsg_missing_argument[] =
 902"option requires an argument -%s";
 903
 904int cmd_grep(int argc, const char **argv, const char *prefix)
 905{
 906        int hit = 0;
 907        int cached = 0;
 908        int seen_dashdash = 0;
 909        struct grep_opt opt;
 910        struct object_array list = { 0, 0, NULL };
 911        const char **paths = NULL;
 912        int i;
 913
 914        memset(&opt, 0, sizeof(opt));
 915        opt.prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 916        opt.relative = 1;
 917        opt.pattern_tail = &opt.pattern_list;
 918        opt.regflags = REG_NEWLINE;
 919
 920        /*
 921         * If there is no -- then the paths must exist in the working
 922         * tree.  If there is no explicit pattern specified with -e or
 923         * -f, we take the first unrecognized non option to be the
 924         * pattern, but then what follows it must be zero or more
 925         * valid refs up to the -- (if exists), and then existing
 926         * paths.  If there is an explicit pattern, then the first
 927         * unrecognized non option is the beginning of the refs list
 928         * that continues up to the -- (if exists), and then paths.
 929         */
 930
 931        while (1 < argc) {
 932                const char *arg = argv[1];
 933                argc--; argv++;
 934                if (!strcmp("--cached", arg)) {
 935                        cached = 1;
 936                        continue;
 937                }
 938                if (!strcmp("-a", arg) ||
 939                    !strcmp("--text", arg)) {
 940                        opt.binary = GREP_BINARY_TEXT;
 941                        continue;
 942                }
 943                if (!strcmp("-i", arg) ||
 944                    !strcmp("--ignore-case", arg)) {
 945                        opt.regflags |= REG_ICASE;
 946                        continue;
 947                }
 948                if (!strcmp("-I", arg)) {
 949                        opt.binary = GREP_BINARY_NOMATCH;
 950                        continue;
 951                }
 952                if (!strcmp("-v", arg) ||
 953                    !strcmp("--invert-match", arg)) {
 954                        opt.invert = 1;
 955                        continue;
 956                }
 957                if (!strcmp("-E", arg) ||
 958                    !strcmp("--extended-regexp", arg)) {
 959                        opt.regflags |= REG_EXTENDED;
 960                        continue;
 961                }
 962                if (!strcmp("-F", arg) ||
 963                    !strcmp("--fixed-strings", arg)) {
 964                        opt.fixed = 1;
 965                        continue;
 966                }
 967                if (!strcmp("-G", arg) ||
 968                    !strcmp("--basic-regexp", arg)) {
 969                        opt.regflags &= ~REG_EXTENDED;
 970                        continue;
 971                }
 972                if (!strcmp("-n", arg)) {
 973                        opt.linenum = 1;
 974                        continue;
 975                }
 976                if (!strcmp("-H", arg)) {
 977                        /* We always show the pathname, so this
 978                         * is a noop.
 979                         */
 980                        continue;
 981                }
 982                if (!strcmp("-l", arg) ||
 983                    !strcmp("--files-with-matches", arg)) {
 984                        opt.name_only = 1;
 985                        continue;
 986                }
 987                if (!strcmp("-L", arg) ||
 988                    !strcmp("--files-without-match", arg)) {
 989                        opt.unmatch_name_only = 1;
 990                        continue;
 991                }
 992                if (!strcmp("-c", arg) ||
 993                    !strcmp("--count", arg)) {
 994                        opt.count = 1;
 995                        continue;
 996                }
 997                if (!strcmp("-w", arg) ||
 998                    !strcmp("--word-regexp", arg)) {
 999                        opt.word_regexp = 1;
1000                        continue;
1001                }
1002                if (!strncmp("-A", arg, 2) ||
1003                    !strncmp("-B", arg, 2) ||
1004                    !strncmp("-C", arg, 2) ||
1005                    (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) {
1006                        unsigned num;
1007                        const char *scan;
1008                        switch (arg[1]) {
1009                        case 'A': case 'B': case 'C':
1010                                if (!arg[2]) {
1011                                        if (argc <= 1)
1012                                                die(emsg_missing_context_len);
1013                                        scan = *++argv;
1014                                        argc--;
1015                                }
1016                                else
1017                                        scan = arg + 2;
1018                                break;
1019                        default:
1020                                scan = arg + 1;
1021                                break;
1022                        }
1023                        if (sscanf(scan, "%u", &num) != 1)
1024                                die(emsg_invalid_context_len, scan);
1025                        switch (arg[1]) {
1026                        case 'A':
1027                                opt.post_context = num;
1028                                break;
1029                        default:
1030                        case 'C':
1031                                opt.post_context = num;
1032                        case 'B':
1033                                opt.pre_context = num;
1034                                break;
1035                        }
1036                        continue;
1037                }
1038                if (!strcmp("-f", arg)) {
1039                        FILE *patterns;
1040                        int lno = 0;
1041                        char buf[1024];
1042                        if (argc <= 1)
1043                                die(emsg_missing_argument, arg);
1044                        patterns = fopen(argv[1], "r");
1045                        if (!patterns)
1046                                die("'%s': %s", argv[1], strerror(errno));
1047                        while (fgets(buf, sizeof(buf), patterns)) {
1048                                int len = strlen(buf);
1049                                if (buf[len-1] == '\n')
1050                                        buf[len-1] = 0;
1051                                /* ignore empty line like grep does */
1052                                if (!buf[0])
1053                                        continue;
1054                                add_pattern(&opt, strdup(buf), argv[1], ++lno,
1055                                            GREP_PATTERN);
1056                        }
1057                        fclose(patterns);
1058                        argv++;
1059                        argc--;
1060                        continue;
1061                }
1062                if (!strcmp("--not", arg)) {
1063                        add_pattern(&opt, arg, "command line", 0, GREP_NOT);
1064                        continue;
1065                }
1066                if (!strcmp("--and", arg)) {
1067                        add_pattern(&opt, arg, "command line", 0, GREP_AND);
1068                        continue;
1069                }
1070                if (!strcmp("--or", arg))
1071                        continue; /* no-op */
1072                if (!strcmp("(", arg)) {
1073                        add_pattern(&opt, arg, "command line", 0, GREP_OPEN_PAREN);
1074                        continue;
1075                }
1076                if (!strcmp(")", arg)) {
1077                        add_pattern(&opt, arg, "command line", 0, GREP_CLOSE_PAREN);
1078                        continue;
1079                }
1080                if (!strcmp("-e", arg)) {
1081                        if (1 < argc) {
1082                                add_pattern(&opt, argv[1], "-e option", 0,
1083                                            GREP_PATTERN);
1084                                argv++;
1085                                argc--;
1086                                continue;
1087                        }
1088                        die(emsg_missing_argument, arg);
1089                }
1090                if (!strcmp("--full-name", arg)) {
1091                        opt.relative = 0;
1092                        continue;
1093                }
1094                if (!strcmp("--", arg)) {
1095                        /* later processing wants to have this at argv[1] */
1096                        argv--;
1097                        argc++;
1098                        break;
1099                }
1100                if (*arg == '-')
1101                        usage(builtin_grep_usage);
1102
1103                /* First unrecognized non-option token */
1104                if (!opt.pattern_list) {
1105                        add_pattern(&opt, arg, "command line", 0,
1106                                    GREP_PATTERN);
1107                        break;
1108                }
1109                else {
1110                        /* We are looking at the first path or rev;
1111                         * it is found at argv[1] after leaving the
1112                         * loop.
1113                         */
1114                        argc++; argv--;
1115                        break;
1116                }
1117        }
1118
1119        if (!opt.pattern_list)
1120                die("no pattern given.");
1121        if ((opt.regflags != REG_NEWLINE) && opt.fixed)
1122                die("cannot mix --fixed-strings and regexp");
1123        if (!opt.fixed)
1124                compile_patterns(&opt);
1125
1126        /* Check revs and then paths */
1127        for (i = 1; i < argc; i++) {
1128                const char *arg = argv[i];
1129                unsigned char sha1[20];
1130                /* Is it a rev? */
1131                if (!get_sha1(arg, sha1)) {
1132                        struct object *object = parse_object(sha1);
1133                        if (!object)
1134                                die("bad object %s", arg);
1135                        add_object_array(object, arg, &list);
1136                        continue;
1137                }
1138                if (!strcmp(arg, "--")) {
1139                        i++;
1140                        seen_dashdash = 1;
1141                }
1142                break;
1143        }
1144
1145        /* The rest are paths */
1146        if (!seen_dashdash) {
1147                int j;
1148                for (j = i; j < argc; j++)
1149                        verify_filename(prefix, argv[j]);
1150        }
1151
1152        if (i < argc) {
1153                paths = get_pathspec(prefix, argv + i);
1154                if (opt.prefix_length && opt.relative) {
1155                        /* Make sure we do not get outside of paths */
1156                        for (i = 0; paths[i]; i++)
1157                                if (strncmp(prefix, paths[i], opt.prefix_length))
1158                                        die("git-grep: cannot generate relative filenames containing '..'");
1159                }
1160        }
1161        else if (prefix) {
1162                paths = xcalloc(2, sizeof(const char *));
1163                paths[0] = prefix;
1164                paths[1] = NULL;
1165        }
1166
1167        if (!list.nr)
1168                return !grep_cache(&opt, paths, cached);
1169
1170        if (cached)
1171                die("both --cached and trees are given.");
1172
1173        for (i = 0; i < list.nr; i++) {
1174                struct object *real_obj;
1175                real_obj = deref_tag(list.objects[i].item, NULL, 0);
1176                if (grep_object(&opt, paths, real_obj, list.objects[i].name))
1177                        hit = 1;
1178        }
1179        return !hit;
1180}