contrib / fast-import / import-tars.perlon commit t/perf: add infrastructure for measuring sizes (22bec79)
   1#!/usr/bin/perl
   2
   3## tar archive frontend for git-fast-import
   4##
   5## For example:
   6##
   7##  mkdir project; cd project; git init
   8##  perl import-tars.perl *.tar.bz2
   9##  git whatchanged import-tars
  10##
  11## Use --metainfo to specify the extension for a meta data file, where
  12## import-tars can read the commit message and optionally author and
  13## committer information.
  14##
  15##  echo 'This is the commit message' > myfile.tar.bz2.msg
  16##  perl import-tars.perl --metainfo=msg myfile.tar.bz2
  17
  18use strict;
  19use Getopt::Long;
  20
  21my $metaext = '';
  22
  23die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,lzma,xz,Z}\n"
  24        unless GetOptions('metainfo=s' => \$metaext) && @ARGV;
  25
  26my $branch_name = 'import-tars';
  27my $branch_ref = "refs/heads/$branch_name";
  28my $author_name = $ENV{'GIT_AUTHOR_NAME'} || 'T Ar Creator';
  29my $author_email = $ENV{'GIT_AUTHOR_EMAIL'} || 'tar@example.com';
  30my $committer_name = $ENV{'GIT_COMMITTER_NAME'} || `git config --get user.name`;
  31my $committer_email = $ENV{'GIT_COMMITTER_EMAIL'} || `git config --get user.email`;
  32
  33chomp($committer_name, $committer_email);
  34
  35open(FI, '|-', 'git', 'fast-import', '--quiet')
  36        or die "Unable to start git fast-import: $!\n";
  37foreach my $tar_file (@ARGV)
  38{
  39        my $commit_time = time;
  40        $tar_file =~ m,([^/]+)$,;
  41        my $tar_name = $1;
  42
  43        if ($tar_name =~ s/\.(tar\.gz|tgz)$//) {
  44                open(I, '-|', 'gunzip', '-c', $tar_file)
  45                        or die "Unable to gunzip -c $tar_file: $!\n";
  46        } elsif ($tar_name =~ s/\.(tar\.bz2|tbz2)$//) {
  47                open(I, '-|', 'bunzip2', '-c', $tar_file)
  48                        or die "Unable to bunzip2 -c $tar_file: $!\n";
  49        } elsif ($tar_name =~ s/\.tar\.Z$//) {
  50                open(I, '-|', 'uncompress', '-c', $tar_file)
  51                        or die "Unable to uncompress -c $tar_file: $!\n";
  52        } elsif ($tar_name =~ s/\.(tar\.(lzma|xz)|(tlz|txz))$//) {
  53                open(I, '-|', 'xz', '-dc', $tar_file)
  54                        or die "Unable to xz -dc $tar_file: $!\n";
  55        } elsif ($tar_name =~ s/\.tar$//) {
  56                open(I, $tar_file) or die "Unable to open $tar_file: $!\n";
  57        } else {
  58                die "Unrecognized compression format: $tar_file\n";
  59        }
  60
  61        my $author_time = 0;
  62        my $next_mark = 1;
  63        my $have_top_dir = 1;
  64        my ($top_dir, %files);
  65
  66        my $next_path = '';
  67
  68        while (read(I, $_, 512) == 512) {
  69                my ($name, $mode, $uid, $gid, $size, $mtime,
  70                        $chksum, $typeflag, $linkname, $magic,
  71                        $version, $uname, $gname, $devmajor, $devminor,
  72                        $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
  73                        Z8 Z1 Z100 Z6
  74                        Z2 Z32 Z32 Z8 Z8 Z*', $_;
  75
  76                unless ($next_path eq '') {
  77                        # Recover name from previous extended header
  78                        $name = $next_path;
  79                        $next_path = '';
  80                }
  81
  82                last unless length($name);
  83                if ($name eq '././@LongLink') {
  84                        # GNU tar extension
  85                        if (read(I, $_, 512) != 512) {
  86                                die ('Short archive');
  87                        }
  88                        $name = unpack 'Z257', $_;
  89                        next unless $name;
  90
  91                        my $dummy;
  92                        if (read(I, $_, 512) != 512) {
  93                                die ('Short archive');
  94                        }
  95                        ($dummy, $mode, $uid, $gid, $size, $mtime,
  96                        $chksum, $typeflag, $linkname, $magic,
  97                        $version, $uname, $gname, $devmajor, $devminor,
  98                        $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
  99                        Z8 Z1 Z100 Z6
 100                        Z2 Z32 Z32 Z8 Z8 Z*', $_;
 101                }
 102                $mode = oct $mode;
 103                $size = oct $size;
 104                $mtime = oct $mtime;
 105                next if $typeflag == 5; # directory
 106
 107                if ($typeflag eq 'x') { # extended header
 108                        # If extended header, check for path
 109                        my $pax_header = '';
 110                        while ($size > 0 && read(I, $_, 512) == 512) {
 111                                $pax_header = $pax_header . substr($_, 0, $size);
 112                                $size -= 512;
 113                        }
 114
 115                        my @lines = split /\n/, $pax_header;
 116                        foreach my $line (@lines) {
 117                                my ($len, $entry) = split / /, $line;
 118                                my ($key, $value) = split /=/, $entry;
 119                                if ($key eq 'path') {
 120                                        $next_path = $value;
 121                                }
 122                        }
 123                        next;
 124                } elsif ($name =~ m{/\z}) { # directory
 125                        next;
 126                } elsif ($typeflag != 1) { # handle hard links later
 127                        print FI "blob\n", "mark :$next_mark\n";
 128                        if ($typeflag == 2) { # symbolic link
 129                                print FI "data ", length($linkname), "\n",
 130                                        $linkname;
 131                                $mode = 0120000;
 132                        } else {
 133                                print FI "data $size\n";
 134                                while ($size > 0 && read(I, $_, 512) == 512) {
 135                                        print FI substr($_, 0, $size);
 136                                        $size -= 512;
 137                                }
 138                        }
 139                        print FI "\n";
 140                }
 141
 142                my $path;
 143                if ($prefix) {
 144                        $path = "$prefix/$name";
 145                } else {
 146                        $path = "$name";
 147                }
 148
 149                if ($typeflag == 1) { # hard link
 150                        $linkname = "$prefix/$linkname" if $prefix;
 151                        $files{$path} = [ $files{$linkname}->[0], $mode ];
 152                } else {
 153                        $files{$path} = [$next_mark++, $mode];
 154                }
 155
 156                $author_time = $mtime if $mtime > $author_time;
 157                $path =~ m,^([^/]+)/,;
 158                $top_dir = $1 unless $top_dir;
 159                $have_top_dir = 0 if $top_dir ne $1;
 160        }
 161
 162        my $commit_msg = "Imported from $tar_file.";
 163        my $this_committer_name = $committer_name;
 164        my $this_committer_email = $committer_email;
 165        my $this_author_name = $author_name;
 166        my $this_author_email = $author_email;
 167        if ($metaext ne '') {
 168                # Optionally read a commit message from <filename.tar>.msg
 169                # Add a line on the form "Committer: name <e-mail>" to override
 170                # the committer and "Author: name <e-mail>" to override the
 171                # author for this tar ball.
 172                if (open MSG, '<', "${tar_file}.${metaext}") {
 173                        my $header_done = 0;
 174                        $commit_msg = '';
 175                        while (<MSG>) {
 176                                if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) {
 177                                        $this_committer_name = $1;
 178                                        $this_committer_email = $2;
 179                                } elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) {
 180                                        $this_author_name = $1;
 181                                        $this_author_email = $2;
 182                                } elsif (!$header_done && /^$/) { # empty line ends header.
 183                                        $header_done = 1;
 184                                } else {
 185                                        $commit_msg .= $_;
 186                                        $header_done = 1;
 187                                }
 188                        }
 189                        close MSG;
 190                }
 191        }
 192
 193        print FI <<EOF;
 194commit $branch_ref
 195author $this_author_name <$this_author_email> $author_time +0000
 196committer $this_committer_name <$this_committer_email> $commit_time +0000
 197data <<END_OF_COMMIT_MESSAGE
 198$commit_msg
 199END_OF_COMMIT_MESSAGE
 200
 201deleteall
 202EOF
 203
 204        foreach my $path (keys %files)
 205        {
 206                my ($mark, $mode) = @{$files{$path}};
 207                $path =~ s,^([^/]+)/,, if $have_top_dir;
 208                $mode = $mode & 0111 ? 0755 : 0644 unless $mode == 0120000;
 209                printf FI "M %o :%i %s\n", $mode, $mark, $path;
 210        }
 211        print FI "\n";
 212
 213        print FI <<EOF;
 214tag $tar_name
 215from $branch_ref
 216tagger $author_name <$author_email> $author_time +0000
 217data <<END_OF_TAG_MESSAGE
 218Package $tar_name
 219END_OF_TAG_MESSAGE
 220
 221EOF
 222
 223        close I;
 224}
 225close FI;