Merge branch 'jk/big-and-future-archive-tar'
authorJunio C Hamano <gitster@pobox.com>
Wed, 13 Jul 2016 18:24:18 +0000 (11:24 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 13 Jul 2016 18:24:18 +0000 (11:24 -0700)
"git archive" learned to handle files that are larger than 8GB and
commits far in the future than expressible by the traditional US-TAR
format.

* jk/big-and-future-archive-tar:
archive-tar: drop return value
archive-tar: write extended headers for far-future mtime
archive-tar: write extended headers for file sizes >= 8GB
t5000: test tar files that overflow ustar headers
t9300: factor out portable "head -c" replacement

archive-tar.c
t/t5000-tar-tree.sh
t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a [new file with mode: 0644]
t/t5000/huge-and-future.tar [new file with mode: 0644]
t/t9300-fast-import.sh
t/test-lib-functions.sh
index cb99df28142f164da62f5a728eb9019e57804157..7ea4e90814d26b99c9e2de598daa874ec4b216fd 100644 (file)
@@ -18,6 +18,16 @@ static int tar_umask = 002;
 static int write_tar_filter_archive(const struct archiver *ar,
                                    struct archiver_args *args);
 
+/*
+ * This is the max value that a ustar size header can specify, as it is fixed
+ * at 11 octal digits. POSIX specifies that we switch to extended headers at
+ * this size.
+ *
+ * Likewise for the mtime (which happens to use a buffer of the same size).
+ */
+#define USTAR_MAX_SIZE 077777777777UL
+#define USTAR_MAX_MTIME 077777777777UL
+
 /* writes out the whole block, but only if it is full */
 static void write_if_needed(void)
 {
@@ -137,6 +147,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword,
        strbuf_addch(sb, '\n');
 }
 
+/*
+ * Like strbuf_append_ext_header, but for numeric values.
+ */
+static void strbuf_append_ext_header_uint(struct strbuf *sb,
+                                         const char *keyword,
+                                         uintmax_t value)
+{
+       char buf[40]; /* big enough for 2^128 in decimal, plus NUL */
+       int len;
+
+       len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value);
+       strbuf_append_ext_header(sb, keyword, buf, len);
+}
+
 static unsigned int ustar_header_chksum(const struct ustar_header *header)
 {
        const unsigned char *p = (const unsigned char *)header;
@@ -208,7 +232,7 @@ static int write_tar_entry(struct archiver_args *args,
        struct ustar_header header;
        struct strbuf ext_header = STRBUF_INIT;
        unsigned int old_mode = mode;
-       unsigned long size;
+       unsigned long size, size_in_header;
        void *buffer;
        int err = 0;
 
@@ -267,7 +291,13 @@ static int write_tar_entry(struct archiver_args *args,
                        memcpy(header.linkname, buffer, size);
        }
 
-       prepare_header(args, &header, mode, size);
+       size_in_header = size;
+       if (S_ISREG(mode) && size > USTAR_MAX_SIZE) {
+               size_in_header = 0;
+               strbuf_append_ext_header_uint(&ext_header, "size", size);
+       }
+
+       prepare_header(args, &header, mode, size_in_header);
 
        if (ext_header.len > 0) {
                err = write_extended_header(args, sha1, ext_header.buf,
@@ -289,15 +319,25 @@ static int write_tar_entry(struct archiver_args *args,
        return err;
 }
 
-static int write_global_extended_header(struct archiver_args *args)
+static void write_global_extended_header(struct archiver_args *args)
 {
        const unsigned char *sha1 = args->commit_sha1;
        struct strbuf ext_header = STRBUF_INIT;
        struct ustar_header header;
        unsigned int mode;
-       int err = 0;
 
-       strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40);
+       if (sha1)
+               strbuf_append_ext_header(&ext_header, "comment",
+                                        sha1_to_hex(sha1), 40);
+       if (args->time > USTAR_MAX_MTIME) {
+               strbuf_append_ext_header_uint(&ext_header, "mtime",
+                                             args->time);
+               args->time = USTAR_MAX_MTIME;
+       }
+
+       if (!ext_header.len)
+               return;
+
        memset(&header, 0, sizeof(header));
        *header.typeflag = TYPEFLAG_GLOBAL_HEADER;
        mode = 0100666;
@@ -306,7 +346,6 @@ static int write_global_extended_header(struct archiver_args *args)
        write_blocked(&header, sizeof(header));
        write_blocked(ext_header.buf, ext_header.len);
        strbuf_release(&ext_header);
-       return err;
 }
 
 static struct archiver **tar_filters;
@@ -382,10 +421,8 @@ static int write_tar_archive(const struct archiver *ar,
 {
        int err = 0;
 
-       if (args->commit_sha1)
-               err = write_global_extended_header(args);
-       if (!err)
-               err = write_archive_entries(args, write_tar_entry);
+       write_global_extended_header(args);
+       err = write_archive_entries(args, write_tar_entry);
        if (!err)
                write_trailer();
        return err;
index 4b68bbafbe9016d66ce4f96f2058ac67640ed147..96d208da25ef0b1be793b9d6c0e0e5fb72c14489 100755 (executable)
@@ -319,4 +319,78 @@ test_expect_success 'catch non-matching pathspec' '
        test_must_fail git archive -v HEAD -- "*.abc" >/dev/null
 '
 
+# Pull the size and date of each entry in a tarfile using the system tar.
+#
+# We'll pull out only the year from the date; that avoids any question of
+# timezones impacting the result (as long as we keep our test times away from a
+# year boundary; our reference times are all in August).
+#
+# The output of tar_info is expected to be "<size> <year>", both in decimal. It
+# ignores the return value of tar. We have to do this, because some of our test
+# input is only partial (the real data is 64GB in some cases).
+tar_info () {
+       "$TAR" tvf "$1" |
+       awk '{
+               split($4, date, "-")
+               print $3 " " date[1]
+       }'
+}
+
+# See if our system tar can handle a tar file with huge sizes and dates far in
+# the future, and that we can actually parse its output.
+#
+# The reference file was generated by GNU tar, and the magic time and size are
+# both octal 01000000000001, which overflows normal ustar fields.
+test_lazy_prereq TAR_HUGE '
+       echo "68719476737 4147" >expect &&
+       tar_info "$TEST_DIRECTORY"/t5000/huge-and-future.tar >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'set up repository with huge blob' '
+       obj_d=19 &&
+       obj_f=f9c8273ec45a8938e6999cb59b3ff66739902a &&
+       obj=${obj_d}${obj_f} &&
+       mkdir -p .git/objects/$obj_d &&
+       cp "$TEST_DIRECTORY"/t5000/$obj .git/objects/$obj_d/$obj_f &&
+       rm -f .git/index &&
+       git update-index --add --cacheinfo 100644,$obj,huge &&
+       git commit -m huge
+'
+
+# We expect git to die with SIGPIPE here (otherwise we
+# would generate the whole 64GB).
+test_expect_success 'generate tar with huge size' '
+       {
+               git archive HEAD
+               echo $? >exit-code
+       } | test_copy_bytes 4096 >huge.tar &&
+       echo 141 >expect &&
+       test_cmp expect exit-code
+'
+
+test_expect_success TAR_HUGE 'system tar can read our huge size' '
+       echo 68719476737 >expect &&
+       tar_info huge.tar | cut -d" " -f1 >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'set up repository with far-future commit' '
+       rm -f .git/index &&
+       echo content >file &&
+       git add file &&
+       GIT_COMMITTER_DATE="@68719476737 +0000" \
+               git commit -m "tempori parendum"
+'
+
+test_expect_success 'generate tar with future mtime' '
+       git archive HEAD >future.tar
+'
+
+test_expect_success TAR_HUGE 'system tar can read our future mtime' '
+       echo 4147 >expect &&
+       tar_info future.tar | cut -d" " -f2 >actual &&
+       test_cmp expect actual
+'
+
 test_done
diff --git a/t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a b/t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a
new file mode 100644 (file)
index 0000000..5cbe9ec
Binary files /dev/null and b/t/t5000/19f9c8273ec45a8938e6999cb59b3ff66739902a differ
diff --git a/t/t5000/huge-and-future.tar b/t/t5000/huge-and-future.tar
new file mode 100644 (file)
index 0000000..63155e1
Binary files /dev/null and b/t/t5000/huge-and-future.tar differ
index 74d740de41bbd489dd0ce9fb811f9ea1c08b248c..2e0ba3ebd8ab3273144743dc38675e2b5d429ea1 100755 (executable)
@@ -7,23 +7,6 @@ test_description='test git fast-import utility'
 . ./test-lib.sh
 . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash
 
-# Print $1 bytes from stdin to stdout.
-#
-# This could be written as "head -c $1", but IRIX "head" does not
-# support the -c option.
-head_c () {
-       perl -e '
-               my $len = $ARGV[1];
-               while ($len > 0) {
-                       my $s;
-                       my $nread = sysread(STDIN, $s, $len);
-                       die "cannot read: $!" unless defined($nread);
-                       print $s;
-                       $len -= $nread;
-               }
-       ' - "$1"
-}
-
 verify_packs () {
        for p in .git/objects/pack/*.pack
        do
@@ -2481,7 +2464,7 @@ test_expect_success PIPE 'R: copy using cat-file' '
 
                read blob_id type size <&3 &&
                echo "$blob_id $type $size" >response &&
-               head_c $size >blob <&3 &&
+               test_copy_bytes $size >blob <&3 &&
                read newline <&3 &&
 
                cat <<-EOF &&
@@ -2524,7 +2507,7 @@ test_expect_success PIPE 'R: print blob mid-commit' '
                EOF
 
                read blob_id type size <&3 &&
-               head_c $size >actual <&3 &&
+               test_copy_bytes $size >actual <&3 &&
                read newline <&3 &&
 
                echo
@@ -2559,7 +2542,7 @@ test_expect_success PIPE 'R: print staged blob within commit' '
                echo "cat-blob $to_get" &&
 
                read blob_id type size <&3 &&
-               head_c $size >actual <&3 &&
+               test_copy_bytes $size >actual <&3 &&
                read newline <&3 &&
 
                echo deleteall
index 48884d520813998a735d89d406b45c63b0bf748d..90856d67e52dcdaabd13e78abd29c7c3d3949643 100644 (file)
@@ -961,3 +961,17 @@ test_env () {
                done
        )
 }
+
+# Read up to "$1" bytes (or to EOF) from stdin and write them to stdout.
+test_copy_bytes () {
+       perl -e '
+               my $len = $ARGV[1];
+               while ($len > 0) {
+                       my $s;
+                       my $nread = sysread(STDIN, $s, $len);
+                       die "cannot read: $!" unless defined($nread);
+                       print $s;
+                       $len -= $nread;
+               }
+       ' - "$1"
+}