struct filter_params {
const char *src;
unsigned long size;
+ int fd;
const char *cmd;
const char *path;
};
-static int filter_buffer(int in, int out, void *data)
+static int filter_buffer_or_fd(int in, int out, void *data)
{
/*
* Spawn cmd and feed the buffer contents through its stdin.
sigchain_push(SIGPIPE, SIG_IGN);
- write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ if (params->src) {
+ write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ } else {
+ write_err = copy_fd(params->fd, child_process.in);
+ }
+
if (close(child_process.in))
write_err = 1;
if (write_err)
return (write_err || status);
}
-static int apply_filter(const char *path, const char *src, size_t len,
+static int apply_filter(const char *path, const char *src, size_t len, int fd,
struct strbuf *dst, const char *cmd)
{
/*
return 1;
memset(&async, 0, sizeof(async));
- async.proc = filter_buffer;
+ async.proc = filter_buffer_or_fd;
async.data = ¶ms;
async.out = -1;
params.src = src;
params.size = len;
+ params.fd = fd;
params.cmd = cmd;
params.path = path;
}
}
+int would_convert_to_git_filter_fd(const char *path)
+{
+ struct conv_attrs ca;
+
+ convert_attrs(&ca, path);
+ if (!ca.drv)
+ return 0;
+
+ /*
+ * Apply a filter to an fd only if the filter is required to succeed.
+ * We must die if the filter fails, because the original data before
+ * filtering is not available.
+ */
+ if (!ca.drv->required)
+ return 0;
+
+ return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
+}
+
int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe)
{
required = ca.drv->required;
}
- ret |= apply_filter(path, src, len, dst, filter);
+ ret |= apply_filter(path, src, len, -1, dst, filter);
if (!ret && required)
die("%s: clean filter '%s' failed", path, ca.drv->name);
return ret | ident_to_git(path, src, len, dst, ca.ident);
}
+void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
+ enum safe_crlf checksafe)
+{
+ struct conv_attrs ca;
+ convert_attrs(&ca, path);
+
+ assert(ca.drv);
+ assert(ca.drv->clean);
+
+ if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
+ die("%s: clean filter '%s' failed", path, ca.drv->name);
+
+ ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+ crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
+ ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
+}
+
static int convert_to_working_tree_internal(const char *path, const char *src,
size_t len, struct strbuf *dst,
int normalizing)
}
}
- ret_filter = apply_filter(path, src, len, dst, filter);
+ ret_filter = apply_filter(path, src, len, -1, dst, filter);
if (!ret_filter && required)
die("%s: smudge filter %s failed", path, ca.drv->name);
return ret;
}
+static int index_stream_convert_blob(unsigned char *sha1, int fd,
+ const char *path, unsigned flags)
+{
+ int ret;
+ const int write_object = flags & HASH_WRITE_OBJECT;
+ struct strbuf sbuf = STRBUF_INIT;
+
+ assert(path);
+ assert(would_convert_to_git_filter_fd(path));
+
+ convert_to_git_filter_fd(path, fd, &sbuf,
+ write_object ? safe_crlf : SAFE_CRLF_FALSE);
+
+ if (write_object)
+ ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+ sha1);
+ else
+ ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+ sha1);
+ strbuf_release(&sbuf);
+ return ret;
+}
+
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
const char *path, unsigned flags)
{
int ret;
size_t size = xsize_t(st->st_size);
- if (!S_ISREG(st->st_mode))
+ if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
+ ret = index_stream_convert_blob(sha1, fd, path, flags);
+ else if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
else if (size <= big_file_threshold || type != OBJ_BLOB ||
(path && would_convert_to_git(path)))
:
'
-test_expect_success 'required filter success' '
- git config filter.required.smudge cat &&
- git config filter.required.clean cat &&
+test_expect_success 'required filter should filter data' '
+ git config filter.required.smudge ./rot13.sh &&
+ git config filter.required.clean ./rot13.sh &&
git config filter.required.required true &&
echo "*.r filter=required" >.gitattributes &&
- echo test >test.r &&
+ cat test.o >test.r &&
git add test.r &&
+
rm -f test.r &&
- git checkout -- test.r
+ git checkout -- test.r &&
+ cmp test.o test.r &&
+
+ ./rot13.sh <test.o >expected &&
+ git cat-file blob :test.r >actual &&
+ cmp expected actual
'
test_expect_success 'required filter smudge failure' '
test_must_fail git add test.fc
'
+test_expect_success 'filtering large input to small output should use little memory' '
+ git config filter.devnull.clean "cat >/dev/null" &&
+ git config filter.devnull.required true &&
+ for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB &&
+ echo "30MB filter=devnull" >.gitattributes &&
+ GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB
+'
+
test_expect_success EXPENSIVE 'filter large file' '
git config filter.largefile.smudge cat &&
git config filter.largefile.clean cat &&