builtin-grep.con commit Merge branch 'jc/grep' into next (afbe700)
   1/*
   2 * Builtin "git grep"
   3 *
   4 * Copyright (c) 2006 Junio C Hamano
   5 */
   6#include "cache.h"
   7#include "blob.h"
   8#include "tree.h"
   9#include "commit.h"
  10#include "tag.h"
  11#include "diff.h"
  12#include "revision.h"
  13#include "builtin.h"
  14#include <regex.h>
  15
  16static int pathspec_matches(struct diff_options *opt, const char *name)
  17{
  18        int i, j;
  19        int namelen;
  20        if (!opt->nr_paths)
  21                return 1;
  22        namelen = strlen(name);
  23        for (i = 0; i < opt->nr_paths; i++) {
  24                const char *match = opt->paths[i];
  25                int matchlen = opt->pathlens[i];
  26                if (matchlen <= namelen) {
  27                        if (!strncmp(name, match, matchlen))
  28                                return 1;
  29                        continue;
  30                }
  31                /* If name is "Documentation" and pathspec is
  32                 * "Documentation/", they should match.  Maybe
  33                 * we would want to strip it in get_pathspec()???
  34                 */
  35                if (strncmp(name, match, namelen))
  36                        continue;
  37                for (j = namelen; j < matchlen; j++)
  38                        if (match[j] != '/')
  39                                break;
  40                if (matchlen <= j)
  41                        return 1;
  42        }
  43        return 0;
  44}
  45
  46struct grep_opt {
  47        const char *pattern;
  48        regex_t regexp;
  49        unsigned linenum:1;
  50        unsigned invert:1;
  51        int regflags;
  52        unsigned pre_context;
  53        unsigned post_context;
  54};
  55
  56static char *end_of_line(char *cp, unsigned long *left)
  57{
  58        unsigned long l = *left;
  59        while (l && *cp != '\n') {
  60                l--;
  61                cp++;
  62        }
  63        *left = l;
  64        return cp;
  65}
  66
  67static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
  68                      const char *name, unsigned lno, char sign)
  69{
  70        printf("%s%c", name, sign);
  71        if (opt->linenum)
  72                printf("%d%c", lno, sign);
  73        printf("%.*s\n", eol-bol, bol);
  74}
  75
  76static int grep_buffer(struct grep_opt *opt, const char *name,
  77                       char *buf, unsigned long size)
  78{
  79        char *bol = buf;
  80        unsigned long left = size;
  81        unsigned lno = 1;
  82        struct pre_context_line {
  83                char *bol;
  84                char *eol;
  85        } *prev = NULL, *pcl;
  86        unsigned last_hit = 0;
  87        unsigned last_shown = 0;
  88        const char *hunk_mark = "";
  89
  90        if (opt->pre_context)
  91                prev = xcalloc(opt->pre_context, sizeof(*prev));
  92        if (opt->pre_context || opt->post_context)
  93                hunk_mark = "--\n";
  94
  95        while (left) {
  96                regmatch_t pmatch[10];
  97                char *eol, ch;
  98                int hit;
  99
 100                eol = end_of_line(bol, &left);
 101                ch = *eol;
 102                *eol = 0;
 103
 104                hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch),
 105                               pmatch, 0);
 106                if (opt->invert)
 107                        hit = !hit;
 108                if (hit) {
 109                        /* Hit at this line.  If we haven't shown the
 110                         * pre-context lines, we would need to show them.
 111                         */
 112                        if (opt->pre_context) {
 113                                unsigned from;
 114                                if (opt->pre_context < lno)
 115                                        from = lno - opt->pre_context;
 116                                else
 117                                        from = 1;
 118                                if (from <= last_shown)
 119                                        from = last_shown + 1;
 120                                if (last_shown && from != last_shown + 1)
 121                                        printf(hunk_mark);
 122                                while (from < lno) {
 123                                        pcl = &prev[lno-from-1];
 124                                        show_line(opt, pcl->bol, pcl->eol,
 125                                                  name, from, '-');
 126                                        from++;
 127                                }
 128                                last_shown = lno-1;
 129                        }
 130                        if (last_shown && lno != last_shown + 1)
 131                                printf(hunk_mark);
 132                        show_line(opt, bol, eol, name, lno, ':');
 133                        last_shown = last_hit = lno;
 134                }
 135                else if (last_hit &&
 136                         lno <= last_hit + opt->post_context) {
 137                        /* If the last hit is within the post context,
 138                         * we need to show this line.
 139                         */
 140                        if (last_shown && lno != last_shown + 1)
 141                                printf(hunk_mark);
 142                        show_line(opt, bol, eol, name, lno, '-');
 143                        last_shown = lno;
 144                }
 145                if (opt->pre_context) {
 146                        memmove(prev+1, prev,
 147                                (opt->pre_context-1) * sizeof(*prev));
 148                        prev->bol = bol;
 149                        prev->eol = eol;
 150                }
 151                *eol = ch;
 152                bol = eol + 1;
 153                left--;
 154                lno++;
 155        }
 156        return !!last_hit;
 157}
 158
 159static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
 160{
 161        unsigned long size;
 162        char *data;
 163        char type[20];
 164        int hit;
 165        data = read_sha1_file(sha1, type, &size);
 166        if (!data) {
 167                error("'%s': unable to read %s", name, sha1_to_hex(sha1));
 168                return 0;
 169        }
 170        hit = grep_buffer(opt, name, data, size);
 171        free(data);
 172        return hit;
 173}
 174
 175static int grep_file(struct grep_opt *opt, const char *filename)
 176{
 177        struct stat st;
 178        int i;
 179        char *data;
 180        if (lstat(filename, &st) < 0) {
 181        err_ret:
 182                if (errno != ENOENT)
 183                        error("'%s': %s", filename, strerror(errno));
 184                return 0;
 185        }
 186        if (!st.st_size)
 187                return 0; /* empty file -- no grep hit */
 188        if (!S_ISREG(st.st_mode))
 189                return 0;
 190        i = open(filename, O_RDONLY);
 191        if (i < 0)
 192                goto err_ret;
 193        data = xmalloc(st.st_size + 1);
 194        if (st.st_size != xread(i, data, st.st_size)) {
 195                error("'%s': short read %s", filename, strerror(errno));
 196                close(i);
 197                free(data);
 198                return 0;
 199        }
 200        close(i);
 201        i = grep_buffer(opt, filename, data, st.st_size);
 202        free(data);
 203        return i;
 204}
 205
 206static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached)
 207{
 208        int hit = 0;
 209        int nr;
 210        read_cache();
 211
 212        for (nr = 0; nr < active_nr; nr++) {
 213                struct cache_entry *ce = active_cache[nr];
 214                if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
 215                        continue;
 216                if (!pathspec_matches(&revs->diffopt, ce->name))
 217                        continue;
 218                if (cached)
 219                        hit |= grep_sha1(opt, ce->sha1, ce->name);
 220                else
 221                        hit |= grep_file(opt, ce->name);
 222        }
 223        return hit;
 224}
 225
 226static int grep_tree(struct grep_opt *opt, struct rev_info *revs,
 227                     struct tree_desc *tree,
 228                     const char *tree_name, const char *base)
 229{
 230        unsigned mode;
 231        int len;
 232        int hit = 0;
 233        const char *path;
 234        const unsigned char *sha1;
 235        char *down_base;
 236        char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
 237
 238        if (tree_name[0]) {
 239                int offset = sprintf(path_buf, "%s:", tree_name);
 240                down_base = path_buf + offset;
 241                strcat(down_base, base);
 242        }
 243        else {
 244                down_base = path_buf;
 245                strcpy(down_base, base);
 246        }
 247        len = strlen(path_buf);
 248
 249        while (tree->size) {
 250                int pathlen;
 251                sha1 = tree_entry_extract(tree, &path, &mode);
 252                pathlen = strlen(path);
 253                strcpy(path_buf + len, path);
 254
 255                if (!pathspec_matches(&revs->diffopt, down_base))
 256                        ;
 257                else if (S_ISREG(mode))
 258                        hit |= grep_sha1(opt, sha1, path_buf);
 259                else if (S_ISDIR(mode)) {
 260                        char type[20];
 261                        struct tree_desc sub;
 262                        void *data;
 263                        data = read_sha1_file(sha1, type, &sub.size);
 264                        if (!data)
 265                                die("unable to read tree (%s)",
 266                                    sha1_to_hex(sha1));
 267                        strcpy(path_buf + len + pathlen, "/");
 268                        sub.buf = data;
 269                        hit = grep_tree(opt, revs, &sub, tree_name, down_base);
 270                        free(data);
 271                }
 272                update_tree_entry(tree);
 273        }
 274        return hit;
 275}
 276
 277static int grep_object(struct grep_opt *opt, struct rev_info *revs,
 278                       struct object *obj, const char *name)
 279{
 280        if (!strcmp(obj->type, blob_type))
 281                return grep_sha1(opt, obj->sha1, name);
 282        if (!strcmp(obj->type, commit_type) ||
 283            !strcmp(obj->type, tree_type)) {
 284                struct tree_desc tree;
 285                void *data;
 286                int hit;
 287                data = read_object_with_reference(obj->sha1, tree_type,
 288                                                  &tree.size, NULL);
 289                if (!data)
 290                        die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
 291                tree.buf = data;
 292                hit = grep_tree(opt, revs, &tree, name, "");
 293                free(data);
 294                return hit;
 295        }
 296        die("unable to grep from object of type %s", obj->type);
 297}
 298
 299static const char builtin_grep_usage[] =
 300"git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
 301
 302int cmd_grep(int argc, const char **argv, char **envp)
 303{
 304        struct rev_info rev;
 305        const char **dst, **src;
 306        int err;
 307        int hit = 0;
 308        int no_more_arg = 0;
 309        int seen_range = 0;
 310        int seen_noncommit = 0;
 311        int cached = 0;
 312        struct grep_opt opt;
 313        struct object_list *list;
 314
 315        memset(&opt, 0, sizeof(opt));
 316        opt.regflags = REG_NEWLINE;
 317
 318        /*
 319         * Interpret and remove the grep options upfront.  Sigh...
 320         */
 321        for (dst = src = &argv[1]; src < argc + argv; ) {
 322                const char *arg = *src++;
 323                if (!no_more_arg) {
 324                        if (!strcmp("--", arg)) {
 325                                no_more_arg = 1;
 326                                *dst++ = arg;
 327                                continue;
 328                        }
 329                        if (!strcmp("--cached", arg)) {
 330                                cached = 1;
 331                                continue;
 332                        }
 333                        if (!strcmp("-i", arg) ||
 334                            !strcmp("--ignore-case", arg)) {
 335                                opt.regflags |= REG_ICASE;
 336                                continue;
 337                        }
 338                        if (!strcmp("-v", arg) ||
 339                            !strcmp("--invert-match", arg)) {
 340                                opt.invert = 1;
 341                                continue;
 342                        }
 343                        if (!strcmp("-E", arg) ||
 344                            !strcmp("--extended-regexp", arg)) {
 345                                opt.regflags |= REG_EXTENDED;
 346                                continue;
 347                        }
 348                        if (!strcmp("-G", arg) ||
 349                            !strcmp("--basic-regexp", arg)) {
 350                                opt.regflags &= ~REG_EXTENDED;
 351                                continue;
 352                        }
 353                        if (!strcmp("-e", arg)) {
 354                                if (src < argc + argv) {
 355                                        opt.pattern = *src++;
 356                                        continue;
 357                                }
 358                                usage(builtin_grep_usage);
 359                        }
 360                        if (!strcmp("-n", arg)) {
 361                                opt.linenum = 1;
 362                                continue;
 363                        }
 364                        if (!strcmp("-H", arg)) {
 365                                /* We always show the pathname, so this
 366                                 * is a noop.
 367                                 */
 368                                continue;
 369                        }
 370                        if (!strcmp("-A", arg) ||
 371                            !strcmp("-B", arg) ||
 372                            !strcmp("-C", arg)) {
 373                                unsigned num;
 374                                if ((argc + argv <= src) ||
 375                                    sscanf(*src++, "%u", &num) != 1)
 376                                        usage(builtin_grep_usage);
 377                                switch (arg[1]) {
 378                                case 'A':
 379                                        opt.post_context = num;
 380                                        break;
 381                                case 'C':
 382                                        opt.post_context = num;
 383                                case 'B':
 384                                        opt.pre_context = num;
 385                                        break;
 386                                }
 387                                continue;
 388                        }
 389                }
 390                *dst++ = arg;
 391        }
 392        if (!opt.pattern)
 393                die("no pattern given.");
 394
 395        err = regcomp(&opt.regexp, opt.pattern, opt.regflags);
 396        if (err) {
 397                char errbuf[1024];
 398                regerror(err, &opt.regexp, errbuf, 1024);
 399                regfree(&opt.regexp);
 400                die("'%s': %s", opt.pattern, errbuf);
 401        }
 402
 403        init_revisions(&rev);
 404        *dst = NULL;
 405        argc = setup_revisions(dst - argv, argv, &rev, NULL);
 406
 407        /*
 408         * Do not walk "grep -e foo master next pu -- Documentation/"
 409         * but do walk "grep -e foo master..next -- Documentation/".
 410         * Ranged request mixed with a blob or tree object, like
 411         * "grep -e foo v1.0.0:Documentation/ master..next"
 412         * so detect that and complain.
 413         */
 414        for (list = rev.pending_objects; list; list = list->next) {
 415                struct object *real_obj;
 416                if (list->item->flags & UNINTERESTING)
 417                        seen_range = 1;
 418                real_obj = deref_tag(list->item, NULL, 0);
 419                if (strcmp(real_obj->type, commit_type))
 420                        seen_noncommit = 1;
 421        }
 422        if (!rev.pending_objects)
 423                return !grep_cache(&opt, &rev, cached);
 424        if (cached)
 425                die("both --cached and revisions given.");
 426
 427        if (seen_range && seen_noncommit)
 428                die("both A..B and non commit are given.");
 429        if (seen_range) {
 430                struct commit *commit;
 431                prepare_revision_walk(&rev);
 432                while ((commit = get_revision(&rev)) != NULL) {
 433                        unsigned char *sha1 = commit->object.sha1;
 434                        const char *n = find_unique_abbrev(sha1, rev.abbrev);
 435                        char rev_name[41];
 436                        strcpy(rev_name, n);
 437                        if (grep_object(&opt, &rev, &commit->object, rev_name))
 438                                hit = 1;
 439                        commit->buffer = NULL;
 440                }
 441                return !hit;
 442        }
 443
 444        /* all of them are non-commit; do not walk, and
 445         * do not lose their names.
 446         */
 447        for (list = rev.pending_objects; list; list = list->next) {
 448                struct object *real_obj;
 449                real_obj = deref_tag(list->item, NULL, 0);
 450                if (grep_object(&opt, &rev, real_obj, list->name))
 451                        hit = 1;
 452        }
 453        return !hit;
 454}