contrib / fast-import / import-tars.perlon commit fetch: Speed up fetch of large numbers of refs (b1a01e1)
   1#!/usr/bin/perl
   2
   3## tar archive frontend for git-fast-import
   4##
   5## For example:
   6##
   7##  mkdir project; cd project; git init
   8##  perl import-tars.perl *.tar.bz2
   9##  git whatchanged import-tars
  10##
  11## Use --metainfo to specify the extension for a meta data file, where
  12## import-tars can read the commit message and optionally author and
  13## committer information.
  14##
  15##  echo 'This is the commit message' > myfile.tar.bz2.msg
  16##  perl import-tars.perl --metainfo=msg myfile.tar.bz2
  17
  18use strict;
  19use Getopt::Long;
  20
  21my $metaext = '';
  22
  23die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,Z}\n"
  24        unless GetOptions('metainfo=s' => \$metaext) && @ARGV;
  25
  26my $branch_name = 'import-tars';
  27my $branch_ref = "refs/heads/$branch_name";
  28my $author_name = $ENV{'GIT_AUTHOR_NAME'} || 'T Ar Creator';
  29my $author_email = $ENV{'GIT_AUTHOR_EMAIL'} || 'tar@example.com';
  30my $committer_name = $ENV{'GIT_COMMITTER_NAME'} || `git config --get user.name`;
  31my $committer_email = $ENV{'GIT_COMMITTER_EMAIL'} || `git config --get user.email`;
  32
  33chomp($committer_name, $committer_email);
  34
  35open(FI, '|-', 'git', 'fast-import', '--quiet')
  36        or die "Unable to start git fast-import: $!\n";
  37foreach my $tar_file (@ARGV)
  38{
  39        my $commit_time = time;
  40        $tar_file =~ m,([^/]+)$,;
  41        my $tar_name = $1;
  42
  43        if ($tar_name =~ s/\.(tar\.gz|tgz)$//) {
  44                open(I, '-|', 'gunzip', '-c', $tar_file)
  45                        or die "Unable to gunzip -c $tar_file: $!\n";
  46        } elsif ($tar_name =~ s/\.(tar\.bz2|tbz2)$//) {
  47                open(I, '-|', 'bunzip2', '-c', $tar_file)
  48                        or die "Unable to bunzip2 -c $tar_file: $!\n";
  49        } elsif ($tar_name =~ s/\.tar\.Z$//) {
  50                open(I, '-|', 'uncompress', '-c', $tar_file)
  51                        or die "Unable to uncompress -c $tar_file: $!\n";
  52        } elsif ($tar_name =~ s/\.tar$//) {
  53                open(I, $tar_file) or die "Unable to open $tar_file: $!\n";
  54        } else {
  55                die "Unrecognized compression format: $tar_file\n";
  56        }
  57
  58        my $author_time = 0;
  59        my $next_mark = 1;
  60        my $have_top_dir = 1;
  61        my ($top_dir, %files);
  62
  63        while (read(I, $_, 512) == 512) {
  64                my ($name, $mode, $uid, $gid, $size, $mtime,
  65                        $chksum, $typeflag, $linkname, $magic,
  66                        $version, $uname, $gname, $devmajor, $devminor,
  67                        $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
  68                        Z8 Z1 Z100 Z6
  69                        Z2 Z32 Z32 Z8 Z8 Z*', $_;
  70                last unless length($name);
  71                if ($name eq '././@LongLink') {
  72                        # GNU tar extension
  73                        if (read(I, $_, 512) != 512) {
  74                                die ('Short archive');
  75                        }
  76                        $name = unpack 'Z257', $_;
  77                        next unless $name;
  78
  79                        my $dummy;
  80                        if (read(I, $_, 512) != 512) {
  81                                die ('Short archive');
  82                        }
  83                        ($dummy, $mode, $uid, $gid, $size, $mtime,
  84                        $chksum, $typeflag, $linkname, $magic,
  85                        $version, $uname, $gname, $devmajor, $devminor,
  86                        $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
  87                        Z8 Z1 Z100 Z6
  88                        Z2 Z32 Z32 Z8 Z8 Z*', $_;
  89                }
  90                next if $name =~ m{/\z};
  91                $mode = oct $mode;
  92                $size = oct $size;
  93                $mtime = oct $mtime;
  94                next if $typeflag == 5; # directory
  95
  96                print FI "blob\n", "mark :$next_mark\n";
  97                if ($typeflag == 2) { # symbolic link
  98                        print FI "data ", length($linkname), "\n", $linkname;
  99                        $mode = 0120000;
 100                } else {
 101                        print FI "data $size\n";
 102                        while ($size > 0 && read(I, $_, 512) == 512) {
 103                                print FI substr($_, 0, $size);
 104                                $size -= 512;
 105                        }
 106                }
 107                print FI "\n";
 108
 109                my $path;
 110                if ($prefix) {
 111                        $path = "$prefix/$name";
 112                } else {
 113                        $path = "$name";
 114                }
 115                $files{$path} = [$next_mark++, $mode];
 116
 117                $author_time = $mtime if $mtime > $author_time;
 118                $path =~ m,^([^/]+)/,;
 119                $top_dir = $1 unless $top_dir;
 120                $have_top_dir = 0 if $top_dir ne $1;
 121        }
 122
 123        my $commit_msg = "Imported from $tar_file.";
 124        my $this_committer_name = $committer_name;
 125        my $this_committer_email = $committer_email;
 126        my $this_author_name = $author_name;
 127        my $this_author_email = $author_email;
 128        if ($metaext ne '') {
 129                # Optionally read a commit message from <filename.tar>.msg
 130                # Add a line on the form "Committer: name <e-mail>" to override
 131                # the committer and "Author: name <e-mail>" to override the
 132                # author for this tar ball.
 133                if (open MSG, '<', "${tar_file}.${metaext}") {
 134                        my $header_done = 0;
 135                        $commit_msg = '';
 136                        while (<MSG>) {
 137                                if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) {
 138                                        $this_committer_name = $1;
 139                                        $this_committer_email = $2;
 140                                } elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) {
 141                                        $this_author_name = $1;
 142                                        $this_author_email = $2;
 143                                } elsif (!$header_done && /^$/ { # empty line ends header.
 144                                        $header_done = 1;
 145                                } else {
 146                                        $commit_msg .= $_;
 147                                        $header_done = 1;
 148                                }
 149                        }
 150                        close MSG;
 151                }
 152        }
 153
 154        print FI <<EOF;
 155commit $branch_ref
 156author $this_author_name <$this_author_email> $author_time +0000
 157committer $this_committer_name <$this_committer_email> $commit_time +0000
 158data <<END_OF_COMMIT_MESSAGE
 159$commit_msg
 160END_OF_COMMIT_MESSAGE
 161
 162deleteall
 163EOF
 164
 165        foreach my $path (keys %files)
 166        {
 167                my ($mark, $mode) = @{$files{$path}};
 168                $path =~ s,^([^/]+)/,, if $have_top_dir;
 169                $mode = $mode & 0111 ? 0755 : 0644 unless $mode == 0120000;
 170                printf FI "M %o :%i %s\n", $mode, $mark, $path;
 171        }
 172        print FI "\n";
 173
 174        print FI <<EOF;
 175tag $tar_name
 176from $branch_ref
 177tagger $author_name <$author_email> $author_time +0000
 178data <<END_OF_TAG_MESSAGE
 179Package $tar_name
 180END_OF_TAG_MESSAGE
 181
 182EOF
 183
 184        close I;
 185}
 186close FI;