diff --stat: mark any file larger than core.bigfilethreshold binary
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sat, 16 Aug 2014 03:08:05 +0000 (10:08 +0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 18 Aug 2014 17:16:45 +0000 (10:16 -0700)
Too large files may lead to failure to allocate memory. If it happens
here, it could impact quite a few commands that involve
diff. Moreover, too large files are inefficient to compare anyway (and
most likely non-text), so mark them binary and skip looking at their
content.

Noticed-by: Dale R. Worley <worley@alum.mit.edu>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/config.txt
Documentation/gitattributes.txt
diff.c
diffcore.h
t/t1050-large.sh
index c55c22ab7be94e798164a48622aca21dd4daa44f..3b5b24aeb7f16c03e06bd6f38fd89e9080011ce7 100644 (file)
@@ -499,7 +499,8 @@ core.bigFileThreshold::
        Files larger than this size are stored deflated, without
        attempting delta compression.  Storing large files without
        delta compression avoids excessive memory usage, at the
        Files larger than this size are stored deflated, without
        attempting delta compression.  Storing large files without
        delta compression avoids excessive memory usage, at the
-       slight expense of increased disk usage.
+       slight expense of increased disk usage. Additionally files
+       larger than this size are always treated as binary.
 +
 Default is 512 MiB on all platforms.  This should be reasonable
 for most projects as source code and other text files can still
 +
 Default is 512 MiB on all platforms.  This should be reasonable
 for most projects as source code and other text files can still
index 643c1ba9290ff10813eab57b95f7c26d8c43c7e2..9b45bda7485c7e4742b13cdf19272968592b640a 100644 (file)
@@ -440,8 +440,8 @@ Unspecified::
 
        A path to which the `diff` attribute is unspecified
        first gets its contents inspected, and if it looks like
 
        A path to which the `diff` attribute is unspecified
        first gets its contents inspected, and if it looks like
-       text, it is treated as text.  Otherwise it would
-       generate `Binary files differ`.
+       text and is smaller than core.bigFileThreshold, it is treated
+       as text. Otherwise it would generate `Binary files differ`.
 
 String::
 
 
 String::
 
diff --git a/diff.c b/diff.c
index f4b7421fa6fbfdccedadce60178d56734f42d65d..d381a6f446f36c441396077075d33d0daefd4b71 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
                        one->is_binary = one->driver->binary;
                else {
                        if (!one->data && DIFF_FILE_VALID(one))
                        one->is_binary = one->driver->binary;
                else {
                        if (!one->data && DIFF_FILE_VALID(one))
-                               diff_populate_filespec(one, 0);
-                       if (one->data)
+                               diff_populate_filespec(one, CHECK_BINARY);
+                       if (one->is_binary == -1 && one->data)
                                one->is_binary = buffer_is_binary(one->data,
                                                one->size);
                        if (one->is_binary == -1)
                                one->is_binary = buffer_is_binary(one->data,
                                                one->size);
                        if (one->is_binary == -1)
@@ -2725,6 +2725,11 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
                }
                if (size_only)
                        return 0;
                }
                if (size_only)
                        return 0;
+               if ((flags & CHECK_BINARY) &&
+                   s->size > big_file_threshold && s->is_binary == -1) {
+                       s->is_binary = 1;
+                       return 0;
+               }
                fd = open(s->path, O_RDONLY);
                if (fd < 0)
                        goto err_empty;
                fd = open(s->path, O_RDONLY);
                if (fd < 0)
                        goto err_empty;
@@ -2746,16 +2751,21 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
        }
        else {
                enum object_type type;
        }
        else {
                enum object_type type;
-               if (size_only) {
+               if (size_only || (flags & CHECK_BINARY)) {
                        type = sha1_object_info(s->sha1, &s->size);
                        if (type < 0)
                                die("unable to read %s", sha1_to_hex(s->sha1));
                        type = sha1_object_info(s->sha1, &s->size);
                        if (type < 0)
                                die("unable to read %s", sha1_to_hex(s->sha1));
-               } else {
-                       s->data = read_sha1_file(s->sha1, &type, &s->size);
-                       if (!s->data)
-                               die("unable to read %s", sha1_to_hex(s->sha1));
-                       s->should_free = 1;
+                       if (size_only)
+                               return 0;
+                       if (s->size > big_file_threshold && s->is_binary == -1) {
+                               s->is_binary = 1;
+                               return 0;
+                       }
                }
                }
+               s->data = read_sha1_file(s->sha1, &type, &s->size);
+               if (!s->data)
+                       die("unable to read %s", sha1_to_hex(s->sha1));
+               s->should_free = 1;
        }
        return 0;
 }
        }
        return 0;
 }
index c80df18f3cafb692483eaac8c72088c25c5a568e..33ea2de348803b29a08a6713ae4cab1345f874d9 100644 (file)
@@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
                          int, unsigned short);
 
 #define CHECK_SIZE_ONLY 1
                          int, unsigned short);
 
 #define CHECK_SIZE_ONLY 1
+#define CHECK_BINARY    2
 extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
 extern void diff_free_filespec_data(struct diff_filespec *);
 extern void diff_free_filespec_blob(struct diff_filespec *);
 extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
 extern void diff_free_filespec_data(struct diff_filespec *);
 extern void diff_free_filespec_blob(struct diff_filespec *);
index 5642f84b8355debe955fe055c85ea6d3ca13c398..00d2f33df0d9ff6110a649d06c954bde6aaf83ed 100755 (executable)
@@ -112,6 +112,10 @@ test_expect_success 'diff --raw' '
        git diff --raw HEAD^
 '
 
        git diff --raw HEAD^
 '
 
+test_expect_success 'diff --stat' '
+       git diff --stat HEAD^ HEAD
+'
+
 test_expect_success 'hash-object' '
        git hash-object large1
 '
 test_expect_success 'hash-object' '
        git hash-object large1
 '