1/*
2 * Copyright (C) 2005 Junio C Hamano
3 * Copyright (C) 2010 Google Inc.
4 */
5#include "cache.h"
6#include "diff.h"
7#include "diffcore.h"
8#include "xdiff-interface.h"
9#include "kwset.h"
10
11typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
12 struct diff_options *o,
13 regex_t *regexp, kwset_t kws);
14
15struct diffgrep_cb {
16 regex_t *regexp;
17 int hit;
18};
19
20static void diffgrep_consume(void *priv, char *line, unsigned long len)
21{
22 struct diffgrep_cb *data = priv;
23 regmatch_t regmatch;
24
25 if (line[0] != '+' && line[0] != '-')
26 return;
27 if (data->hit)
28 /*
29 * NEEDSWORK: we should have a way to terminate the
30 * caller early.
31 */
32 return;
33 data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
34 ®match, 0);
35}
36
37static int diff_grep(mmfile_t *one, mmfile_t *two,
38 struct diff_options *o,
39 regex_t *regexp, kwset_t kws)
40{
41 regmatch_t regmatch;
42 struct diffgrep_cb ecbdata;
43 xpparam_t xpp;
44 xdemitconf_t xecfg;
45
46 if (!one)
47 return !regexec_buf(regexp, two->ptr, two->size,
48 1, ®match, 0);
49 if (!two)
50 return !regexec_buf(regexp, one->ptr, one->size,
51 1, ®match, 0);
52
53 /*
54 * We have both sides; need to run textual diff and see if
55 * the pattern appears on added/deleted lines.
56 */
57 memset(&xpp, 0, sizeof(xpp));
58 memset(&xecfg, 0, sizeof(xecfg));
59 ecbdata.regexp = regexp;
60 ecbdata.hit = 0;
61 xecfg.ctxlen = o->context;
62 xecfg.interhunkctxlen = o->interhunkcontext;
63 if (xdi_diff_outf(one, two, diffgrep_consume, &ecbdata, &xpp, &xecfg))
64 return 0;
65 return ecbdata.hit;
66}
67
68static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
69{
70 unsigned int cnt;
71 unsigned long sz;
72 const char *data;
73
74 sz = mf->size;
75 data = mf->ptr;
76 cnt = 0;
77
78 if (regexp) {
79 regmatch_t regmatch;
80 int flags = 0;
81
82 while (sz && *data &&
83 !regexec_buf(regexp, data, sz, 1, ®match, flags)) {
84 flags |= REG_NOTBOL;
85 data += regmatch.rm_eo;
86 sz -= regmatch.rm_eo;
87 if (sz && *data && regmatch.rm_so == regmatch.rm_eo) {
88 data++;
89 sz--;
90 }
91 cnt++;
92 }
93
94 } else { /* Classic exact string match */
95 while (sz) {
96 struct kwsmatch kwsm;
97 size_t offset = kwsexec(kws, data, sz, &kwsm);
98 if (offset == -1)
99 break;
100 sz -= offset + kwsm.size[0];
101 data += offset + kwsm.size[0];
102 cnt++;
103 }
104 }
105 return cnt;
106}
107
108static int has_changes(mmfile_t *one, mmfile_t *two,
109 struct diff_options *o,
110 regex_t *regexp, kwset_t kws)
111{
112 unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
113 unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
114 return one_contains != two_contains;
115}
116
117static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
118 regex_t *regexp, kwset_t kws, pickaxe_fn fn)
119{
120 struct userdiff_driver *textconv_one = NULL;
121 struct userdiff_driver *textconv_two = NULL;
122 mmfile_t mf1, mf2;
123 int ret;
124
125 if (!o->pickaxe[0])
126 return 0;
127
128 /* ignore unmerged */
129 if (!DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two))
130 return 0;
131
132 if (DIFF_OPT_TST(o, ALLOW_TEXTCONV)) {
133 textconv_one = get_textconv(p->one);
134 textconv_two = get_textconv(p->two);
135 }
136
137 /*
138 * If we have an unmodified pair, we know that the count will be the
139 * same and don't even have to load the blobs. Unless textconv is in
140 * play, _and_ we are using two different textconv filters (e.g.,
141 * because a pair is an exact rename with different textconv attributes
142 * for each side, which might generate different content).
143 */
144 if (textconv_one == textconv_two && diff_unmodified_pair(p))
145 return 0;
146
147 mf1.size = fill_textconv(textconv_one, p->one, &mf1.ptr);
148 mf2.size = fill_textconv(textconv_two, p->two, &mf2.ptr);
149
150 ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
151 DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
152 o, regexp, kws);
153
154 if (textconv_one)
155 free(mf1.ptr);
156 if (textconv_two)
157 free(mf2.ptr);
158 diff_free_filespec_data(p->one);
159 diff_free_filespec_data(p->two);
160
161 return ret;
162}
163
164static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
165 regex_t *regexp, kwset_t kws, pickaxe_fn fn)
166{
167 int i;
168 struct diff_queue_struct outq;
169
170 DIFF_QUEUE_CLEAR(&outq);
171
172 if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
173 /* Showing the whole changeset if needle exists */
174 for (i = 0; i < q->nr; i++) {
175 struct diff_filepair *p = q->queue[i];
176 if (pickaxe_match(p, o, regexp, kws, fn))
177 return; /* do not munge the queue */
178 }
179
180 /*
181 * Otherwise we will clear the whole queue by copying
182 * the empty outq at the end of this function, but
183 * first clear the current entries in the queue.
184 */
185 for (i = 0; i < q->nr; i++)
186 diff_free_filepair(q->queue[i]);
187 } else {
188 /* Showing only the filepairs that has the needle */
189 for (i = 0; i < q->nr; i++) {
190 struct diff_filepair *p = q->queue[i];
191 if (pickaxe_match(p, o, regexp, kws, fn))
192 diff_q(&outq, p);
193 else
194 diff_free_filepair(p);
195 }
196 }
197
198 free(q->queue);
199 *q = outq;
200}
201
202void diffcore_pickaxe(struct diff_options *o)
203{
204 const char *needle = o->pickaxe;
205 int opts = o->pickaxe_opts;
206 regex_t regex, *regexp = NULL;
207 kwset_t kws = NULL;
208
209 if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
210 int err;
211 int cflags = REG_EXTENDED | REG_NEWLINE;
212 if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
213 cflags |= REG_ICASE;
214 err = regcomp(®ex, needle, cflags);
215 if (err) {
216 /* The POSIX.2 people are surely sick */
217 char errbuf[1024];
218 regerror(err, ®ex, errbuf, 1024);
219 regfree(®ex);
220 die("invalid regex: %s", errbuf);
221 }
222 regexp = ®ex;
223 } else {
224 kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
225 ? tolower_trans_tbl : NULL);
226 kwsincr(kws, needle, strlen(needle));
227 kwsprep(kws);
228 }
229
230 /* Might want to warn when both S and G are on; I don't care... */
231 pickaxe(&diff_queued_diff, o, regexp, kws,
232 (opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
233
234 if (regexp)
235 regfree(regexp);
236 else
237 kwsfree(kws);
238 return;
239}