import-tars: read overlong names from pax extended header
authorPedro Alvarez Piedehierro <palvarez89@gmail.com>
Wed, 23 May 2018 22:54:17 +0000 (23:54 +0100)
committerJunio C Hamano <gitster@pobox.com>
Wed, 23 May 2018 23:35:51 +0000 (08:35 +0900)
Importing gcc tarballs[1] with import-tars script (in contrib) fails
when hitting a pax extended header.

Make sure we always read the extended attributes from the pax entries,
and store the 'path' value if found to be used in the next ustar entry.

The code to parse pax extended headers was written consulting the Pax
Pax Interchange Format documentation [2].

[1] http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.xz
[2] https://www.freebsd.org/cgi/man.cgi?manpath=FreeBSD+8-current&query=tar&sektion=5

Signed-off-by: Pedro Alvarez <palvarez89@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
contrib/fast-import/import-tars.perl
index d60b4315ed60ad10e849408c6986d1ea5b47b32b..e800d9f5c9cf25e1aae3a0b87ad35f46b2a973c3 100755 (executable)
@@ -63,6 +63,8 @@
        my $have_top_dir = 1;
        my ($top_dir, %files);
 
+       my $next_path = '';
+
        while (read(I, $_, 512) == 512) {
                my ($name, $mode, $uid, $gid, $size, $mtime,
                        $chksum, $typeflag, $linkname, $magic,
                        $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
                        Z8 Z1 Z100 Z6
                        Z2 Z32 Z32 Z8 Z8 Z*', $_;
+
+               unless ($next_path eq '') {
+                       # Recover name from previous extended header
+                       $name = $next_path;
+                       $next_path = '';
+               }
+
                last unless length($name);
                if ($name eq '././@LongLink') {
                        # GNU tar extension
                        Z8 Z1 Z100 Z6
                        Z2 Z32 Z32 Z8 Z8 Z*', $_;
                }
-               next if $name =~ m{/\z};
                $mode = oct $mode;
                $size = oct $size;
                $mtime = oct $mtime;
                next if $typeflag == 5; # directory
 
-               if ($typeflag != 1) { # handle hard links later
+               if ($typeflag eq 'x') { # extended header
+                       # If extended header, check for path
+                       my $pax_header = '';
+                       while ($size > 0 && read(I, $_, 512) == 512) {
+                               $pax_header = $pax_header . substr($_, 0, $size);
+                               $size -= 512;
+                       }
+
+                       my @lines = split /\n/, $pax_header;
+                       foreach my $line (@lines) {
+                               my ($len, $entry) = split / /, $line;
+                               my ($key, $value) = split /=/, $entry;
+                               if ($key eq 'path') {
+                                       $next_path = $value;
+                               }
+                       }
+                       next;
+               } elsif ($name =~ m{/\z}) { # directory
+                       next;
+               } elsif ($typeflag != 1) { # handle hard links later
                        print FI "blob\n", "mark :$next_mark\n";
                        if ($typeflag == 2) { # symbolic link
                                print FI "data ", length($linkname), "\n",