#include "builtin.h"
#include <regex.h>
#include <fnmatch.h>
+#include <sys/wait.h>
/*
* git grep pathspecs are somewhat different from diff-tree pathspecs;
unsigned unmatch_name_only:1;
unsigned count:1;
unsigned word_regexp:1;
+ unsigned fixed:1;
#define GREP_BINARY_DEFAULT 0
#define GREP_BINARY_NOMATCH 1
#define GREP_BINARY_TEXT 2
return 0;
}
+static int fixmatch(const char *pattern, char *line, regmatch_t *match)
+{
+ char *hit = strstr(line, pattern);
+ if (!hit) {
+ match->rm_so = match->rm_eo = -1;
+ return REG_NOMATCH;
+ }
+ else {
+ match->rm_so = hit - line;
+ match->rm_eo = match->rm_so + strlen(pattern);
+ return 0;
+ }
+}
+
static int grep_buffer(struct grep_opt *opt, const char *name,
char *buf, unsigned long size)
{
*eol = 0;
for (p = opt->pattern_list; p; p = p->next) {
- regex_t *exp = &p->regexp;
- hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
- pmatch, 0);
+ if (!opt->fixed) {
+ regex_t *exp = &p->regexp;
+ hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
+ pmatch, 0);
+ }
+ else {
+ hit = !fixmatch(p->pattern, bol, pmatch);
+ }
if (hit && opt->word_regexp) {
/* Match beginning must be either
die("regexp returned nonsense");
if (pmatch[0].rm_so != 0 &&
word_char(bol[pmatch[0].rm_so-1]))
- continue; /* not a word boundary */
- if ((eol-bol) < pmatch[0].rm_eo &&
+ hit = 0;
+ if (pmatch[0].rm_eo != (eol-bol) &&
word_char(bol[pmatch[0].rm_eo]))
- continue; /* not a word boundary */
+ hit = 0;
}
if (hit)
break;
return i;
}
+static int exec_grep(int argc, const char **argv)
+{
+ pid_t pid;
+ int status;
+
+ argv[argc] = NULL;
+ pid = fork();
+ if (pid < 0)
+ return pid;
+ if (!pid) {
+ execvp("grep", (char **) argv);
+ exit(255);
+ }
+ while (waitpid(pid, &status, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+ return -1;
+ }
+ if (WIFEXITED(status)) {
+ if (!WEXITSTATUS(status))
+ return 1;
+ return 0;
+ }
+ return -1;
+}
+
+#define MAXARGS 1000
+#define ARGBUF 4096
+#define push_arg(a) do { \
+ if (nr < MAXARGS) argv[nr++] = (a); \
+ else die("maximum number of args exceeded"); \
+ } while (0)
+
+static int external_grep(struct grep_opt *opt, const char **paths, int cached)
+{
+ int i, nr, argc, hit, len;
+ const char *argv[MAXARGS+1];
+ char randarg[ARGBUF];
+ char *argptr = randarg;
+ struct grep_pat *p;
+
+ len = nr = 0;
+ push_arg("grep");
+ push_arg("-H");
+ if (opt->fixed)
+ push_arg("-H");
+ if (opt->linenum)
+ push_arg("-n");
+ if (opt->regflags & REG_EXTENDED)
+ push_arg("-E");
+ if (opt->word_regexp)
+ push_arg("-w");
+ if (opt->name_only)
+ push_arg("-l");
+ if (opt->unmatch_name_only)
+ push_arg("-L");
+ if (opt->count)
+ push_arg("-c");
+ if (opt->post_context || opt->pre_context) {
+ if (opt->post_context != opt->pre_context) {
+ if (opt->pre_context) {
+ push_arg("-B");
+ len += snprintf(argptr, sizeof(randarg)-len,
+ "%u", opt->pre_context);
+ if (sizeof(randarg) <= len)
+ die("maximum length of args exceeded");
+ push_arg(argptr);
+ argptr += len;
+ }
+ if (opt->post_context) {
+ push_arg("-A");
+ len += snprintf(argptr, sizeof(randarg)-len,
+ "%u", opt->post_context);
+ if (sizeof(randarg) <= len)
+ die("maximum length of args exceeded");
+ push_arg(argptr);
+ argptr += len;
+ }
+ }
+ else {
+ push_arg("-C");
+ len += snprintf(argptr, sizeof(randarg)-len,
+ "%u", opt->post_context);
+ if (sizeof(randarg) <= len)
+ die("maximum length of args exceeded");
+ push_arg(argptr);
+ argptr += len;
+ }
+ }
+ for (p = opt->pattern_list; p; p = p->next) {
+ push_arg("-e");
+ push_arg(p->pattern);
+ }
+ push_arg("--");
+
+ hit = 0;
+ argc = nr;
+ for (i = 0; i < active_nr; i++) {
+ struct cache_entry *ce = active_cache[i];
+ if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
+ continue;
+ if (!pathspec_matches(paths, ce->name))
+ continue;
+ argv[argc++] = ce->name;
+ if (argc < MAXARGS)
+ continue;
+ hit += exec_grep(argc, argv);
+ argc = nr;
+ }
+ if (argc > nr)
+ hit += exec_grep(argc, argv);
+ return 0;
+}
+
static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
{
int hit = 0;
int nr;
read_cache();
+#ifdef __unix__
+ /*
+ * Use the external "grep" command for the case where
+ * we grep through the checked-out files. It tends to
+ * be a lot more optimized
+ */
+ if (!cached) {
+ hit = external_grep(opt, paths, cached);
+ if (hit >= 0)
+ return hit;
+ }
+#endif
+
for (nr = 0; nr < active_nr; nr++) {
struct cache_entry *ce = active_cache[nr];
if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
int cmd_grep(int argc, const char **argv, char **envp)
{
int hit = 0;
- int no_more_flags = 0;
int cached = 0;
+ int seen_dashdash = 0;
struct grep_opt opt;
struct object_list *list, **tail, *object_list = NULL;
const char *prefix = setup_git_directory();
const char **paths = NULL;
+ int i;
memset(&opt, 0, sizeof(opt));
opt.pattern_tail = &opt.pattern_list;
opt.regflags = REG_NEWLINE;
/*
- * No point using rev_info, really.
+ * If there is no -- then the paths must exist in the working
+ * tree. If there is no explicit pattern specified with -e or
+ * -f, we take the first unrecognized non option to be the
+ * pattern, but then what follows it must be zero or more
+ * valid refs up to the -- (if exists), and then existing
+ * paths. If there is an explicit pattern, then the first
+ * unrecocnized non option is the beginning of the refs list
+ * that continues up to the -- (if exists), and then paths.
*/
+
+ tail = &object_list;
while (1 < argc) {
const char *arg = argv[1];
argc--; argv++;
opt.regflags |= REG_EXTENDED;
continue;
}
+ if (!strcmp("-F", arg) ||
+ !strcmp("--fixed-strings", arg)) {
+ opt.fixed = 1;
+ continue;
+ }
if (!strcmp("-G", arg) ||
!strcmp("--basic-regexp", arg)) {
opt.regflags &= ~REG_EXTENDED;
usage(builtin_grep_usage);
patterns = fopen(argv[1], "r");
if (!patterns)
- die("'%s': %s", strerror(errno));
+ die("'%s': %s", argv[1], strerror(errno));
while (fgets(buf, sizeof(buf), patterns)) {
int len = strlen(buf);
if (buf[len-1] == '\n')
}
usage(builtin_grep_usage);
}
- if (!strcmp("--", arg)) {
- no_more_flags = 1;
- continue;
- }
- /* Either unrecognized option or a single pattern */
- if (!no_more_flags && *arg == '-')
+ if (!strcmp("--", arg))
+ break;
+ if (*arg == '-')
usage(builtin_grep_usage);
+
+ /* First unrecognized non-option token */
if (!opt.pattern_list) {
add_pattern(&opt, arg, "command line", 0);
break;
}
else {
/* We are looking at the first path or rev;
- * it is found at argv[0] after leaving the
+ * it is found at argv[1] after leaving the
* loop.
*/
argc++; argv--;
break;
}
}
+
if (!opt.pattern_list)
die("no pattern given.");
- compile_patterns(&opt);
- tail = &object_list;
- while (1 < argc) {
- struct object *object;
- struct object_list *elem;
- const char *arg = argv[1];
+ if ((opt.regflags != REG_NEWLINE) && opt.fixed)
+ die("cannot mix --fixed-strings and regexp");
+ if (!opt.fixed)
+ compile_patterns(&opt);
+
+ /* Check revs and then paths */
+ for (i = 1; i < argc; i++) {
+ const char *arg = argv[i];
unsigned char sha1[20];
- if (get_sha1(arg, sha1) < 0)
- break;
- object = parse_object(sha1);
- if (!object)
- die("bad object %s", arg);
- elem = object_list_insert(object, tail);
- elem->name = arg;
- tail = &elem->next;
- argc--; argv++;
+ /* Is it a rev? */
+ if (!get_sha1(arg, sha1)) {
+ struct object *object = parse_object(sha1);
+ struct object_list *elem;
+ if (!object)
+ die("bad object %s", arg);
+ elem = object_list_insert(object, tail);
+ elem->name = arg;
+ tail = &elem->next;
+ continue;
+ }
+ if (!strcmp(arg, "--")) {
+ i++;
+ seen_dashdash = 1;
+ }
+ break;
+ }
+
+ /* The rest are paths */
+ if (!seen_dashdash) {
+ int j;
+ for (j = i; j < argc; j++)
+ verify_filename(prefix, argv[j]);
}
- if (1 < argc)
- paths = get_pathspec(prefix, argv + 1);
+
+ if (i < argc)
+ paths = get_pathspec(prefix, argv + i);
else if (prefix) {
paths = xcalloc(2, sizeof(const char *));
paths[0] = prefix;