c3e6701aa0d29e13421fb47bdcb7bdfcad7d5438
   1/*
   2 * Builtin "git grep"
   3 *
   4 * Copyright (c) 2006 Junio C Hamano
   5 */
   6#include "cache.h"
   7#include "blob.h"
   8#include "tree.h"
   9#include "commit.h"
  10#include "tag.h"
  11#include "diff.h"
  12#include "revision.h"
  13#include "builtin.h"
  14#include <regex.h>
  15#include <fnmatch.h>
  16
  17/*
  18 * git grep pathspecs are somewhat different from diff-tree pathspecs;
  19 * pathname wildcards are allowed.
  20 */
  21static int pathspec_matches(struct diff_options *opt, const char *name)
  22{
  23        int namelen, i;
  24        if (!opt->nr_paths)
  25                return 1;
  26        namelen = strlen(name);
  27        for (i = 0; i < opt->nr_paths; i++) {
  28                const char *match = opt->paths[i];
  29                int matchlen = opt->pathlens[i];
  30                const char *slash, *cp;
  31
  32                if ((matchlen <= namelen) &&
  33                    !strncmp(name, match, matchlen) &&
  34                    (match[matchlen-1] == '/' ||
  35                     name[matchlen] == '\0' || name[matchlen] == '/'))
  36                        return 1;
  37                if (!fnmatch(match, name, 0))
  38                        return 1;
  39                if (name[namelen-1] != '/')
  40                        continue;
  41
  42                /* We are being asked if the name directory is worth
  43                 * descending into.
  44                 *
  45                 * Find the longest leading directory name that does
  46                 * not have metacharacter in the pathspec; the name
  47                 * we are looking at must overlap with that directory.
  48                 */
  49                for (cp = match, slash = NULL; cp - match < matchlen; cp++) {
  50                        char ch = *cp;
  51                        if (ch == '/')
  52                                slash = cp;
  53                        if (ch == '*' || ch == '[')
  54                                break;
  55                }
  56                if (!slash)
  57                        slash = match; /* toplevel */
  58                else
  59                        slash++;
  60                if (namelen <= slash - match) {
  61                        /* Looking at "Documentation/" and
  62                         * the pattern says "Documentation/howto/", or
  63                         * "Documentation/diff*.txt".
  64                         */
  65                        if (!memcmp(match, name, namelen))
  66                                return 1;
  67                }
  68                else {
  69                        /* Looking at "Documentation/howto/" and
  70                         * the pattern says "Documentation/h*".
  71                         */
  72                        if (!memcmp(match, name, slash - match))
  73                                return 1;
  74                }
  75        }
  76        return 0;
  77}
  78
  79struct grep_opt {
  80        const char *pattern;
  81        regex_t regexp;
  82        unsigned linenum:1;
  83        unsigned invert:1;
  84        unsigned name_only:1;
  85        int regflags;
  86        unsigned pre_context;
  87        unsigned post_context;
  88};
  89
  90static char *end_of_line(char *cp, unsigned long *left)
  91{
  92        unsigned long l = *left;
  93        while (l && *cp != '\n') {
  94                l--;
  95                cp++;
  96        }
  97        *left = l;
  98        return cp;
  99}
 100
 101static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
 102                      const char *name, unsigned lno, char sign)
 103{
 104        printf("%s%c", name, sign);
 105        if (opt->linenum)
 106                printf("%d%c", lno, sign);
 107        printf("%.*s\n", eol-bol, bol);
 108}
 109
 110static int grep_buffer(struct grep_opt *opt, const char *name,
 111                       char *buf, unsigned long size)
 112{
 113        char *bol = buf;
 114        unsigned long left = size;
 115        unsigned lno = 1;
 116        struct pre_context_line {
 117                char *bol;
 118                char *eol;
 119        } *prev = NULL, *pcl;
 120        unsigned last_hit = 0;
 121        unsigned last_shown = 0;
 122        const char *hunk_mark = "";
 123
 124        if (opt->pre_context)
 125                prev = xcalloc(opt->pre_context, sizeof(*prev));
 126        if (opt->pre_context || opt->post_context)
 127                hunk_mark = "--\n";
 128
 129        while (left) {
 130                regmatch_t pmatch[10];
 131                char *eol, ch;
 132                int hit;
 133
 134                eol = end_of_line(bol, &left);
 135                ch = *eol;
 136                *eol = 0;
 137
 138                hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch),
 139                               pmatch, 0);
 140                if (opt->invert)
 141                        hit = !hit;
 142                if (hit) {
 143                        if (opt->name_only) {
 144                                printf("%s\n", name);
 145                                return 1;
 146                        }
 147                        /* Hit at this line.  If we haven't shown the
 148                         * pre-context lines, we would need to show them.
 149                         */
 150                        if (opt->pre_context) {
 151                                unsigned from;
 152                                if (opt->pre_context < lno)
 153                                        from = lno - opt->pre_context;
 154                                else
 155                                        from = 1;
 156                                if (from <= last_shown)
 157                                        from = last_shown + 1;
 158                                if (last_shown && from != last_shown + 1)
 159                                        printf(hunk_mark);
 160                                while (from < lno) {
 161                                        pcl = &prev[lno-from-1];
 162                                        show_line(opt, pcl->bol, pcl->eol,
 163                                                  name, from, '-');
 164                                        from++;
 165                                }
 166                                last_shown = lno-1;
 167                        }
 168                        if (last_shown && lno != last_shown + 1)
 169                                printf(hunk_mark);
 170                        show_line(opt, bol, eol, name, lno, ':');
 171                        last_shown = last_hit = lno;
 172                }
 173                else if (last_hit &&
 174                         lno <= last_hit + opt->post_context) {
 175                        /* If the last hit is within the post context,
 176                         * we need to show this line.
 177                         */
 178                        if (last_shown && lno != last_shown + 1)
 179                                printf(hunk_mark);
 180                        show_line(opt, bol, eol, name, lno, '-');
 181                        last_shown = lno;
 182                }
 183                if (opt->pre_context) {
 184                        memmove(prev+1, prev,
 185                                (opt->pre_context-1) * sizeof(*prev));
 186                        prev->bol = bol;
 187                        prev->eol = eol;
 188                }
 189                *eol = ch;
 190                bol = eol + 1;
 191                left--;
 192                lno++;
 193        }
 194        return !!last_hit;
 195}
 196
 197static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
 198{
 199        unsigned long size;
 200        char *data;
 201        char type[20];
 202        int hit;
 203        data = read_sha1_file(sha1, type, &size);
 204        if (!data) {
 205                error("'%s': unable to read %s", name, sha1_to_hex(sha1));
 206                return 0;
 207        }
 208        hit = grep_buffer(opt, name, data, size);
 209        free(data);
 210        return hit;
 211}
 212
 213static int grep_file(struct grep_opt *opt, const char *filename)
 214{
 215        struct stat st;
 216        int i;
 217        char *data;
 218        if (lstat(filename, &st) < 0) {
 219        err_ret:
 220                if (errno != ENOENT)
 221                        error("'%s': %s", filename, strerror(errno));
 222                return 0;
 223        }
 224        if (!st.st_size)
 225                return 0; /* empty file -- no grep hit */
 226        if (!S_ISREG(st.st_mode))
 227                return 0;
 228        i = open(filename, O_RDONLY);
 229        if (i < 0)
 230                goto err_ret;
 231        data = xmalloc(st.st_size + 1);
 232        if (st.st_size != xread(i, data, st.st_size)) {
 233                error("'%s': short read %s", filename, strerror(errno));
 234                close(i);
 235                free(data);
 236                return 0;
 237        }
 238        close(i);
 239        i = grep_buffer(opt, filename, data, st.st_size);
 240        free(data);
 241        return i;
 242}
 243
 244static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached)
 245{
 246        int hit = 0;
 247        int nr;
 248        read_cache();
 249
 250        for (nr = 0; nr < active_nr; nr++) {
 251                struct cache_entry *ce = active_cache[nr];
 252                if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
 253                        continue;
 254                if (!pathspec_matches(&revs->diffopt, ce->name))
 255                        continue;
 256                if (cached)
 257                        hit |= grep_sha1(opt, ce->sha1, ce->name);
 258                else
 259                        hit |= grep_file(opt, ce->name);
 260        }
 261        return hit;
 262}
 263
 264static int grep_tree(struct grep_opt *opt, struct rev_info *revs,
 265                     struct tree_desc *tree,
 266                     const char *tree_name, const char *base)
 267{
 268        unsigned mode;
 269        int len;
 270        int hit = 0;
 271        const char *path;
 272        const unsigned char *sha1;
 273        char *down;
 274        char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
 275
 276        if (tree_name[0]) {
 277                int offset = sprintf(path_buf, "%s:", tree_name);
 278                down = path_buf + offset;
 279                strcat(down, base);
 280        }
 281        else {
 282                down = path_buf;
 283                strcpy(down, base);
 284        }
 285        len = strlen(path_buf);
 286
 287        while (tree->size) {
 288                int pathlen;
 289                sha1 = tree_entry_extract(tree, &path, &mode);
 290                pathlen = strlen(path);
 291                strcpy(path_buf + len, path);
 292
 293                if (S_ISDIR(mode))
 294                        /* Match "abc/" against pathspec to
 295                         * decide if we want to descend into "abc"
 296                         * directory.
 297                         */
 298                        strcpy(path_buf + len + pathlen, "/");
 299
 300                if (!pathspec_matches(&revs->diffopt, down))
 301                        ;
 302                else if (S_ISREG(mode))
 303                        hit |= grep_sha1(opt, sha1, path_buf);
 304                else if (S_ISDIR(mode)) {
 305                        char type[20];
 306                        struct tree_desc sub;
 307                        void *data;
 308                        data = read_sha1_file(sha1, type, &sub.size);
 309                        if (!data)
 310                                die("unable to read tree (%s)",
 311                                    sha1_to_hex(sha1));
 312                        sub.buf = data;
 313                        hit |= grep_tree(opt, revs, &sub, tree_name, down);
 314                        free(data);
 315                }
 316                update_tree_entry(tree);
 317        }
 318        return hit;
 319}
 320
 321static int grep_object(struct grep_opt *opt, struct rev_info *revs,
 322                       struct object *obj, const char *name)
 323{
 324        if (!strcmp(obj->type, blob_type))
 325                return grep_sha1(opt, obj->sha1, name);
 326        if (!strcmp(obj->type, commit_type) ||
 327            !strcmp(obj->type, tree_type)) {
 328                struct tree_desc tree;
 329                void *data;
 330                int hit;
 331                data = read_object_with_reference(obj->sha1, tree_type,
 332                                                  &tree.size, NULL);
 333                if (!data)
 334                        die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
 335                tree.buf = data;
 336                hit = grep_tree(opt, revs, &tree, name, "");
 337                free(data);
 338                return hit;
 339        }
 340        die("unable to grep from object of type %s", obj->type);
 341}
 342
 343static const char builtin_grep_usage[] =
 344"git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
 345
 346int cmd_grep(int argc, const char **argv, char **envp)
 347{
 348        struct rev_info rev;
 349        const char **dst, **src;
 350        int err;
 351        int hit = 0;
 352        int no_more_arg = 0;
 353        int seen_range = 0;
 354        int seen_noncommit = 0;
 355        int cached = 0;
 356        struct grep_opt opt;
 357        struct object_list *list;
 358
 359        memset(&opt, 0, sizeof(opt));
 360        opt.regflags = REG_NEWLINE;
 361
 362        /*
 363         * Interpret and remove the grep options upfront.  Sigh...
 364         */
 365        for (dst = src = &argv[1]; src < argc + argv; ) {
 366                const char *arg = *src++;
 367                if (!no_more_arg) {
 368                        if (!strcmp("--", arg)) {
 369                                no_more_arg = 1;
 370                                *dst++ = arg;
 371                                continue;
 372                        }
 373                        if (!strcmp("--cached", arg)) {
 374                                cached = 1;
 375                                continue;
 376                        }
 377                        if (!strcmp("-i", arg) ||
 378                            !strcmp("--ignore-case", arg)) {
 379                                opt.regflags |= REG_ICASE;
 380                                continue;
 381                        }
 382                        if (!strcmp("-v", arg) ||
 383                            !strcmp("--invert-match", arg)) {
 384                                opt.invert = 1;
 385                                continue;
 386                        }
 387                        if (!strcmp("-E", arg) ||
 388                            !strcmp("--extended-regexp", arg)) {
 389                                opt.regflags |= REG_EXTENDED;
 390                                continue;
 391                        }
 392                        if (!strcmp("-G", arg) ||
 393                            !strcmp("--basic-regexp", arg)) {
 394                                opt.regflags &= ~REG_EXTENDED;
 395                                continue;
 396                        }
 397                        if (!strcmp("-e", arg)) {
 398                                if (src < argc + argv) {
 399                                        opt.pattern = *src++;
 400                                        continue;
 401                                }
 402                                usage(builtin_grep_usage);
 403                        }
 404                        if (!strcmp("-n", arg)) {
 405                                opt.linenum = 1;
 406                                continue;
 407                        }
 408                        if (!strcmp("-H", arg)) {
 409                                /* We always show the pathname, so this
 410                                 * is a noop.
 411                                 */
 412                                continue;
 413                        }
 414                        if (!strcmp("-l", arg) ||
 415                            !strcmp("--files-with-matches", arg)) {
 416                                opt.name_only = 1;
 417                                continue;
 418                        }
 419                        if (!strcmp("-A", arg) ||
 420                            !strcmp("-B", arg) ||
 421                            !strcmp("-C", arg)) {
 422                                unsigned num;
 423                                if ((argc + argv <= src) ||
 424                                    sscanf(*src++, "%u", &num) != 1)
 425                                        usage(builtin_grep_usage);
 426                                switch (arg[1]) {
 427                                case 'A':
 428                                        opt.post_context = num;
 429                                        break;
 430                                case 'C':
 431                                        opt.post_context = num;
 432                                case 'B':
 433                                        opt.pre_context = num;
 434                                        break;
 435                                }
 436                                continue;
 437                        }
 438                }
 439                *dst++ = arg;
 440        }
 441        if (!opt.pattern)
 442                die("no pattern given.");
 443
 444        err = regcomp(&opt.regexp, opt.pattern, opt.regflags);
 445        if (err) {
 446                char errbuf[1024];
 447                regerror(err, &opt.regexp, errbuf, 1024);
 448                regfree(&opt.regexp);
 449                die("'%s': %s", opt.pattern, errbuf);
 450        }
 451
 452        init_revisions(&rev);
 453        *dst = NULL;
 454        argc = setup_revisions(dst - argv, argv, &rev, NULL);
 455
 456        /*
 457         * Do not walk "grep -e foo master next pu -- Documentation/"
 458         * but do walk "grep -e foo master..next -- Documentation/".
 459         * Ranged request mixed with a blob or tree object, like
 460         * "grep -e foo v1.0.0:Documentation/ master..next"
 461         * so detect that and complain.
 462         */
 463        for (list = rev.pending_objects; list; list = list->next) {
 464                struct object *real_obj;
 465                if (list->item->flags & UNINTERESTING)
 466                        seen_range = 1;
 467                real_obj = deref_tag(list->item, NULL, 0);
 468                if (strcmp(real_obj->type, commit_type))
 469                        seen_noncommit = 1;
 470        }
 471        if (!rev.pending_objects)
 472                return !grep_cache(&opt, &rev, cached);
 473        if (cached)
 474                die("both --cached and revisions given.");
 475
 476        if (seen_range && seen_noncommit)
 477                die("both A..B and non commit are given.");
 478        if (seen_range) {
 479                struct commit *commit;
 480                prepare_revision_walk(&rev);
 481                while ((commit = get_revision(&rev)) != NULL) {
 482                        unsigned char *sha1 = commit->object.sha1;
 483                        const char *n = find_unique_abbrev(sha1, rev.abbrev);
 484                        char rev_name[41];
 485                        strcpy(rev_name, n);
 486                        if (grep_object(&opt, &rev, &commit->object, rev_name))
 487                                hit = 1;
 488                        commit->buffer = NULL;
 489                }
 490                return !hit;
 491        }
 492
 493        /* all of them are non-commit; do not walk, and
 494         * do not lose their names.
 495         */
 496        for (list = rev.pending_objects; list; list = list->next) {
 497                struct object *real_obj;
 498                real_obj = deref_tag(list->item, NULL, 0);
 499                if (grep_object(&opt, &rev, real_obj, list->name))
 500                        hit = 1;
 501        }
 502        return !hit;
 503}