git-cvsimport.perlon commit t0021: avoid getting filter killed with SIGPIPE (4290f69)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # However, we also read from CVSROOT/users format
  63                # to ease migration.
  64                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  65                        my $mapped;
  66                        ($user, $mapped) = ($1, $3);
  67                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  68                                $conv_author_name{$user} = $1;
  69                                $conv_author_email{$user} = $2;
  70                        }
  71                        elsif ($mapped =~ /^<?(.*)>?$/) {
  72                                $conv_author_name{$user} = $user;
  73                                $conv_author_email{$user} = $1;
  74                        }
  75                }
  76                # NEEDSWORK: Maybe warn on unrecognized lines?
  77        }
  78        close ($f);
  79}
  80
  81sub write_author_info($) {
  82        my ($file) = @_;
  83        open my $f, '>', $file or
  84          die("Failed to open $file for writing: $!");
  85
  86        foreach (keys %conv_author_name) {
  87                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  88        }
  89        close ($f);
  90}
  91
  92# convert getopts specs for use by git config
  93sub read_repo_config {
  94    # Split the string between characters, unless there is a ':'
  95    # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
  96        my @opts = split(/ *(?!:)/, shift);
  97        foreach my $o (@opts) {
  98                my $key = $o;
  99                $key =~ s/://g;
 100                my $arg = 'git config';
 101                $arg .= ' --bool' if ($o !~ /:$/);
 102
 103        chomp(my $tmp = `$arg --get cvsimport.$key`);
 104                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 105            no strict 'refs';
 106            my $opt_name = "opt_" . $key;
 107            if (!$$opt_name) {
 108                $$opt_name = $tmp;
 109            }
 110                }
 111        }
 112}
 113
 114my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 115read_repo_config($opts);
 116Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 117
 118# turn the Getopt::Std specification in a Getopt::Long one,
 119# with support for multiple -M options
 120GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 121    or usage();
 122usage if $opt_h;
 123
 124if (@ARGV == 0) {
 125                chomp(my $module = `git config --get cvsimport.module`);
 126                push(@ARGV, $module) if $? == 0;
 127}
 128@ARGV <= 1 or usage("You can't specify more than one CVS module");
 129
 130if ($opt_d) {
 131        $ENV{"CVSROOT"} = $opt_d;
 132} elsif (-f 'CVS/Root') {
 133        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 134        $opt_d = <$f>;
 135        chomp $opt_d;
 136        close $f;
 137        $ENV{"CVSROOT"} = $opt_d;
 138} elsif ($ENV{"CVSROOT"}) {
 139        $opt_d = $ENV{"CVSROOT"};
 140} else {
 141        usage("CVSROOT needs to be set");
 142}
 143$opt_s ||= "-";
 144$opt_a ||= 0;
 145
 146my $git_tree = $opt_C;
 147$git_tree ||= ".";
 148
 149my $remote;
 150if (defined $opt_r) {
 151        $remote = 'refs/remotes/' . $opt_r;
 152        $opt_o ||= "master";
 153} else {
 154        $opt_o ||= "origin";
 155        $remote = 'refs/heads';
 156}
 157
 158my $cvs_tree;
 159if ($#ARGV == 0) {
 160        $cvs_tree = $ARGV[0];
 161} elsif (-f 'CVS/Repository') {
 162        open my $f, '<', 'CVS/Repository' or
 163            die 'Failed to open CVS/Repository';
 164        $cvs_tree = <$f>;
 165        chomp $cvs_tree;
 166        close $f;
 167} else {
 168        usage("CVS module has to be specified");
 169}
 170
 171our @mergerx = ();
 172if ($opt_m) {
 173        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 174}
 175if (@opt_M) {
 176        push (@mergerx, map { qr/$_/ } @opt_M);
 177}
 178
 179# Remember UTC of our starting time
 180# we'll want to avoid importing commits
 181# that are too recent
 182our $starttime = time();
 183
 184select(STDERR); $|=1; select(STDOUT);
 185
 186
 187package CVSconn;
 188# Basic CVS dialog.
 189# We're only interested in connecting and downloading, so ...
 190
 191use File::Spec;
 192use File::Temp qw(tempfile);
 193use POSIX qw(strftime dup2);
 194
 195sub new {
 196        my ($what,$repo,$subdir) = @_;
 197        $what=ref($what) if ref($what);
 198
 199        my $self = {};
 200        $self->{'buffer'} = "";
 201        bless($self,$what);
 202
 203        $repo =~ s#/+$##;
 204        $self->{'fullrep'} = $repo;
 205        $self->conn();
 206
 207        $self->{'subdir'} = $subdir;
 208        $self->{'lines'} = undef;
 209
 210        return $self;
 211}
 212
 213sub conn {
 214        my $self = shift;
 215        my $repo = $self->{'fullrep'};
 216        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 217                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 218
 219                my ($proxyhost,$proxyport);
 220                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 221                        $proxyhost = $1;
 222                        # Default proxyport, if not specified, is 8080.
 223                        $proxyport = 8080;
 224                        if ($ENV{"CVS_PROXY_PORT"}) {
 225                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 226                        }
 227                        if ($param =~ m/proxyport=([^;]+)/) {
 228                                $proxyport = $1;
 229                        }
 230                }
 231                $repo ||= '/';
 232
 233                # if username is not explicit in CVSROOT, then use current user, as cvs would
 234                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 235                my $rr2 = "-";
 236                unless ($port) {
 237                        $rr2 = ":pserver:$user\@$serv:$repo";
 238                        $port=2401;
 239                }
 240                my $rr = ":pserver:$user\@$serv:$port$repo";
 241
 242                if ($pass) {
 243                        $pass = $self->_scramble($pass);
 244                } else {
 245                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 246                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 247                                while (<H>) {
 248                                        chomp;
 249                                        s/^\/\d+\s+//;
 250                                        my ($w,$p) = split(/\s/,$_,2);
 251                                        if ($w eq $rr or $w eq $rr2) {
 252                                                $pass = $p;
 253                                                last;
 254                                        }
 255                                }
 256                        };
 257                        $pass = "A" unless $pass;
 258                }
 259
 260                my ($s, $rep);
 261                if ($proxyhost) {
 262
 263                        # Use a HTTP Proxy. Only works for HTTP proxies that
 264                        # don't require user authentication
 265                        #
 266                        # See: http://www.ietf.org/rfc/rfc2817.txt
 267
 268                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 269                        die "Socket to $proxyhost: $!\n" unless defined $s;
 270                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 271                                or die "Write to $proxyhost: $!\n";
 272                        $s->flush();
 273
 274                        $rep = <$s>;
 275
 276                        # The answer should look like 'HTTP/1.x 2yy ....'
 277                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 278                                die "Proxy connect: $rep\n";
 279                        }
 280                        # Skip up to the empty line of the proxy server output
 281                        # including the response headers.
 282                        while ($rep = <$s>) {
 283                                last if (!defined $rep ||
 284                                         $rep eq "\n" ||
 285                                         $rep eq "\r\n");
 286                        }
 287                } else {
 288                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 289                        die "Socket to $serv: $!\n" unless defined $s;
 290                }
 291
 292                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 293                        or die "Write to $serv: $!\n";
 294                $s->flush();
 295
 296                $rep = <$s>;
 297
 298                if ($rep ne "I LOVE YOU\n") {
 299                        $rep="<unknown>" unless $rep;
 300                        die "AuthReply: $rep\n";
 301                }
 302                $self->{'socketo'} = $s;
 303                $self->{'socketi'} = $s;
 304        } else { # local or ext: Fork off our own cvs server.
 305                my $pr = IO::Pipe->new();
 306                my $pw = IO::Pipe->new();
 307                my $pid = fork();
 308                die "Fork: $!\n" unless defined $pid;
 309                my $cvs = 'cvs';
 310                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 311                my $rsh = 'rsh';
 312                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 313
 314                my @cvs = ($cvs, 'server');
 315                my ($local, $user, $host);
 316                $local = $repo =~ s/:local://;
 317                if (!$local) {
 318                    $repo =~ s/:ext://;
 319                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 320                    ($user, $host) = ($1, $2);
 321                }
 322                if (!$local) {
 323                    if ($user) {
 324                        unshift @cvs, $rsh, '-l', $user, $host;
 325                    } else {
 326                        unshift @cvs, $rsh, $host;
 327                    }
 328                }
 329
 330                unless ($pid) {
 331                        $pr->writer();
 332                        $pw->reader();
 333                        dup2($pw->fileno(),0);
 334                        dup2($pr->fileno(),1);
 335                        $pr->close();
 336                        $pw->close();
 337                        exec(@cvs);
 338                }
 339                $pw->writer();
 340                $pr->reader();
 341                $self->{'socketo'} = $pw;
 342                $self->{'socketi'} = $pr;
 343        }
 344        $self->{'socketo'}->write("Root $repo\n");
 345
 346        # Trial and error says that this probably is the minimum set
 347        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 348
 349        $self->{'socketo'}->write("valid-requests\n");
 350        $self->{'socketo'}->flush();
 351
 352        chomp(my $rep=$self->readline());
 353        if ($rep !~ s/^Valid-requests\s*//) {
 354                $rep="<unknown>" unless $rep;
 355                die "Expected Valid-requests from server, but got: $rep\n";
 356        }
 357        chomp(my $res=$self->readline());
 358        die "validReply: $res\n" if $res ne "ok";
 359
 360        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 361        $self->{'repo'} = $repo;
 362}
 363
 364sub readline {
 365        my ($self) = @_;
 366        return $self->{'socketi'}->getline();
 367}
 368
 369sub _file {
 370        # Request a file with a given revision.
 371        # Trial and error says this is a good way to do it. :-/
 372        my ($self,$fn,$rev) = @_;
 373        $self->{'socketo'}->write("Argument -N\n") or return undef;
 374        $self->{'socketo'}->write("Argument -P\n") or return undef;
 375        # -kk: Linus' version doesn't use it - defaults to off
 376        if ($opt_k) {
 377            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 378        }
 379        $self->{'socketo'}->write("Argument -r\n") or return undef;
 380        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 381        $self->{'socketo'}->write("Argument --\n") or return undef;
 382        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 383        $self->{'socketo'}->write("Directory .\n") or return undef;
 384        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 385        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 386        $self->{'socketo'}->write("co\n") or return undef;
 387        $self->{'socketo'}->flush() or return undef;
 388        $self->{'lines'} = 0;
 389        return 1;
 390}
 391sub _line {
 392        # Read a line from the server.
 393        # ... except that 'line' may be an entire file. ;-)
 394        my ($self, $fh) = @_;
 395        die "Not in lines" unless defined $self->{'lines'};
 396
 397        my $line;
 398        my $res=0;
 399        while (defined($line = $self->readline())) {
 400                # M U gnupg-cvs-rep/AUTHORS
 401                # Updated gnupg-cvs-rep/
 402                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 403                # /AUTHORS/1.1///T1.1
 404                # u=rw,g=rw,o=rw
 405                # 0
 406                # ok
 407
 408                if ($line =~ s/^(?:Created|Updated) //) {
 409                        $line = $self->readline(); # path
 410                        $line = $self->readline(); # Entries line
 411                        my $mode = $self->readline(); chomp $mode;
 412                        $self->{'mode'} = $mode;
 413                        defined (my $cnt = $self->readline())
 414                                or die "EOF from server after 'Changed'\n";
 415                        chomp $cnt;
 416                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 417                        $line="";
 418                        $res = $self->_fetchfile($fh, $cnt);
 419                } elsif ($line =~ s/^ //) {
 420                        print $fh $line;
 421                        $res += length($line);
 422                } elsif ($line =~ /^M\b/) {
 423                        # output, do nothing
 424                } elsif ($line =~ /^Mbinary\b/) {
 425                        my $cnt;
 426                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 427                        chomp $cnt;
 428                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 429                        $line="";
 430                        $res += $self->_fetchfile($fh, $cnt);
 431                } else {
 432                        chomp $line;
 433                        if ($line eq "ok") {
 434                                # print STDERR "S: ok (".length($res).")\n";
 435                                return $res;
 436                        } elsif ($line =~ s/^E //) {
 437                                # print STDERR "S: $line\n";
 438                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 439                                $line = $self->readline(); # filename
 440                                $line = $self->readline(); # OK
 441                                chomp $line;
 442                                die "Unknown: $line" if $line ne "ok";
 443                                return -1;
 444                        } else {
 445                                die "Unknown: $line\n";
 446                        }
 447                }
 448        }
 449        return undef;
 450}
 451sub file {
 452        my ($self,$fn,$rev) = @_;
 453        my $res;
 454
 455        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 456                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 457
 458        $self->_file($fn,$rev) and $res = $self->_line($fh);
 459
 460        if (!defined $res) {
 461            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 462            truncate $fh, 0;
 463            $self->conn();
 464            $self->_file($fn,$rev) or die "No file command send";
 465            $res = $self->_line($fh);
 466            die "Retry failed" unless defined $res;
 467        }
 468        close ($fh);
 469
 470        return ($name, $res);
 471}
 472sub _fetchfile {
 473        my ($self, $fh, $cnt) = @_;
 474        my $res = 0;
 475        my $bufsize = 1024 * 1024;
 476        while ($cnt) {
 477            if ($bufsize > $cnt) {
 478                $bufsize = $cnt;
 479            }
 480            my $buf;
 481            my $num = $self->{'socketi'}->read($buf,$bufsize);
 482            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 483            print $fh $buf;
 484            $res += $num;
 485            $cnt -= $num;
 486        }
 487        return $res;
 488}
 489
 490sub _scramble {
 491        my ($self, $pass) = @_;
 492        my $scrambled = "A";
 493
 494        return $scrambled unless $pass;
 495
 496        my $pass_len = length($pass);
 497        my @pass_arr = split("", $pass);
 498        my $i;
 499
 500        # from cvs/src/scramble.c
 501        my @shifts = (
 502                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 503                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 504                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 505                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 506                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 507                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 508                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 509                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 510                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 511                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 512                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 513                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 514                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 515                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 516                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 517                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 518        );
 519
 520        for ($i = 0; $i < $pass_len; $i++) {
 521                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 522        }
 523
 524        return $scrambled;
 525}
 526
 527package main;
 528
 529my $cvs = CVSconn->new($opt_d, $cvs_tree);
 530
 531
 532sub pdate($) {
 533        my ($d) = @_;
 534        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 535                or die "Unparseable date: $d\n";
 536        my $y=$1; $y-=1900 if $y>1900;
 537        return timegm($6||0,$5,$4,$3,$2-1,$y);
 538}
 539
 540sub pmode($) {
 541        my ($mode) = @_;
 542        my $m = 0;
 543        my $mm = 0;
 544        my $um = 0;
 545        for my $x(split(//,$mode)) {
 546                if ($x eq ",") {
 547                        $m |= $mm&$um;
 548                        $mm = 0;
 549                        $um = 0;
 550                } elsif ($x eq "u") { $um |= 0700;
 551                } elsif ($x eq "g") { $um |= 0070;
 552                } elsif ($x eq "o") { $um |= 0007;
 553                } elsif ($x eq "r") { $mm |= 0444;
 554                } elsif ($x eq "w") { $mm |= 0222;
 555                } elsif ($x eq "x") { $mm |= 0111;
 556                } elsif ($x eq "=") { # do nothing
 557                } else { die "Unknown mode: $mode\n";
 558                }
 559        }
 560        $m |= $mm&$um;
 561        return $m;
 562}
 563
 564sub getwd() {
 565        my $pwd = `pwd`;
 566        chomp $pwd;
 567        return $pwd;
 568}
 569
 570sub is_sha1 {
 571        my $s = shift;
 572        return $s =~ /^[a-f0-9]{40}$/;
 573}
 574
 575sub get_headref ($) {
 576        my $name = shift;
 577        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 578        return undef unless $? == 0;
 579        chomp $r;
 580        return $r;
 581}
 582
 583my $user_filename_prepend = '';
 584sub munge_user_filename {
 585        my $name = shift;
 586        return File::Spec->file_name_is_absolute($name) ?
 587                $name :
 588                $user_filename_prepend . $name;
 589}
 590
 591-d $git_tree
 592        or mkdir($git_tree,0777)
 593        or die "Could not create $git_tree: $!";
 594if ($git_tree ne '.') {
 595        $user_filename_prepend = getwd() . '/';
 596        chdir($git_tree);
 597}
 598
 599my $last_branch = "";
 600my $orig_branch = "";
 601my %branch_date;
 602my $tip_at_start = undef;
 603
 604my $git_dir = $ENV{"GIT_DIR"} || ".git";
 605$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 606$ENV{"GIT_DIR"} = $git_dir;
 607my $orig_git_index;
 608$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 609
 610my %index; # holds filenames of one index per branch
 611
 612unless (-d $git_dir) {
 613        system(qw(git init));
 614        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 615        system(qw(git read-tree --empty));
 616        die "Cannot init an empty tree: $?\n" if $?;
 617
 618        $last_branch = $opt_o;
 619        $orig_branch = "";
 620} else {
 621        open(F, "-|", qw(git symbolic-ref HEAD)) or
 622                die "Cannot run git symbolic-ref: $!\n";
 623        chomp ($last_branch = <F>);
 624        $last_branch = basename($last_branch);
 625        close(F);
 626        unless ($last_branch) {
 627                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 628                $last_branch = "master";
 629        }
 630        $orig_branch = $last_branch;
 631        $tip_at_start = `git rev-parse --verify HEAD`;
 632
 633        # Get the last import timestamps
 634        my $fmt = '($ref, $author) = (%(refname), %(author));';
 635        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 636        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 637        while (defined(my $entry = <H>)) {
 638                my ($ref, $author);
 639                eval($entry) || die "cannot eval refs list: $@";
 640                my ($head) = ($ref =~ m|^$remote/(.*)|);
 641                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 642                $branch_date{$head} = $1;
 643        }
 644        close(H);
 645        if (!exists $branch_date{$opt_o}) {
 646                die "Branch '$opt_o' does not exist.\n".
 647                       "Either use the correct '-o branch' option,\n".
 648                       "or import to a new repository.\n";
 649        }
 650}
 651
 652-d $git_dir
 653        or die "Could not create git subdir ($git_dir).\n";
 654
 655# now we read (and possibly save) author-info as well
 656-f "$git_dir/cvs-authors" and
 657  read_author_info("$git_dir/cvs-authors");
 658if ($opt_A) {
 659        read_author_info(munge_user_filename($opt_A));
 660        write_author_info("$git_dir/cvs-authors");
 661}
 662
 663# open .git/cvs-revisions, if requested
 664open my $revision_map, '>>', "$git_dir/cvs-revisions"
 665    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 666        if defined $opt_R;
 667
 668
 669#
 670# run cvsps into a file unless we are getting
 671# it passed as a file via $opt_P
 672#
 673my $cvspsfile;
 674unless ($opt_P) {
 675        print "Running cvsps...\n" if $opt_v;
 676        my $pid = open(CVSPS,"-|");
 677        my $cvspsfh;
 678        die "Cannot fork: $!\n" unless defined $pid;
 679        unless ($pid) {
 680                my @opt;
 681                @opt = split(/,/,$opt_p) if defined $opt_p;
 682                unshift @opt, '-z', $opt_z if defined $opt_z;
 683                unshift @opt, '-q'         unless defined $opt_v;
 684                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 685                        push @opt, '--cvs-direct';
 686                }
 687                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 688                die "Could not start cvsps: $!\n";
 689        }
 690        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 691                                          DIR => File::Spec->tmpdir());
 692        while (<CVSPS>) {
 693            print $cvspsfh $_;
 694        }
 695        close CVSPS;
 696        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 697        close $cvspsfh;
 698} else {
 699        $cvspsfile = munge_user_filename($opt_P);
 700}
 701
 702open(CVS, "<$cvspsfile") or die $!;
 703
 704## cvsps output:
 705#---------------------
 706#PatchSet 314
 707#Date: 1999/09/18 13:03:59
 708#Author: wkoch
 709#Branch: STABLE-BRANCH-1-0
 710#Ancestor branch: HEAD
 711#Tag: (none)
 712#Log:
 713#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 714#Members:
 715#       README:1.57->1.57.2.1
 716#       VERSION:1.96->1.96.2.1
 717#
 718#---------------------
 719
 720my $state = 0;
 721
 722sub update_index (\@\@) {
 723        my $old = shift;
 724        my $new = shift;
 725        open(my $fh, '|-', qw(git update-index -z --index-info))
 726                or die "unable to open git update-index: $!";
 727        print $fh
 728                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 729                        @$old),
 730                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 731                        @$new)
 732                or die "unable to write to git update-index: $!";
 733        close $fh
 734                or die "unable to write to git update-index: $!";
 735        $? and die "git update-index reported error: $?";
 736}
 737
 738sub write_tree () {
 739        open(my $fh, '-|', qw(git write-tree))
 740                or die "unable to open git write-tree: $!";
 741        chomp(my $tree = <$fh>);
 742        is_sha1($tree)
 743                or die "Cannot get tree id ($tree): $!";
 744        close($fh)
 745                or die "Error running git write-tree: $?\n";
 746        print "Tree ID $tree\n" if $opt_v;
 747        return $tree;
 748}
 749
 750my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 751my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 752
 753# commits that cvsps cannot place anywhere...
 754$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 755
 756sub commit {
 757        if ($branch eq $opt_o && !$index{branch} &&
 758                !get_headref("$remote/$branch")) {
 759            # looks like an initial commit
 760            # use the index primed by git init
 761            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 762            $index{$branch} = "$git_dir/index";
 763        } else {
 764            # use an index per branch to speed up
 765            # imports of projects with many branches
 766            unless ($index{$branch}) {
 767                $index{$branch} = tmpnam();
 768                $ENV{GIT_INDEX_FILE} = $index{$branch};
 769                if ($ancestor) {
 770                    system("git", "read-tree", "$remote/$ancestor");
 771                } else {
 772                    system("git", "read-tree", "$remote/$branch");
 773                }
 774                die "read-tree failed: $?\n" if $?;
 775            }
 776        }
 777        $ENV{GIT_INDEX_FILE} = $index{$branch};
 778
 779        update_index(@old, @new);
 780        @old = @new = ();
 781        my $tree = write_tree();
 782        my $parent = get_headref("$remote/$last_branch");
 783        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 784
 785        my @commit_args;
 786        push @commit_args, ("-p", $parent) if $parent;
 787
 788        # loose detection of merges
 789        # based on the commit msg
 790        foreach my $rx (@mergerx) {
 791                next unless $logmsg =~ $rx && $1;
 792                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 793                if (my $sha1 = get_headref("$remote/$mparent")) {
 794                        push @commit_args, '-p', "$remote/$mparent";
 795                        print "Merge parent branch: $mparent\n" if $opt_v;
 796                }
 797        }
 798
 799        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 800        $ENV{GIT_AUTHOR_NAME} = $author_name;
 801        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 802        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 803        $ENV{GIT_COMMITTER_NAME} = $author_name;
 804        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 805        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 806        my $pid = open2(my $commit_read, my $commit_write,
 807                'git', 'commit-tree', $tree, @commit_args);
 808
 809        # compatibility with git2cvs
 810        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 811        $logmsg =~ s/[\s\n]+\z//;
 812
 813        if (@skipped) {
 814            $logmsg .= "\n\n\nSKIPPED:\n\t";
 815            $logmsg .= join("\n\t", @skipped) . "\n";
 816            @skipped = ();
 817        }
 818
 819        print($commit_write "$logmsg\n") && close($commit_write)
 820                or die "Error writing to git commit-tree: $!\n";
 821
 822        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 823        chomp(my $cid = <$commit_read>);
 824        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 825        print "Commit ID $cid\n" if $opt_v;
 826        close($commit_read);
 827
 828        waitpid($pid,0);
 829        die "Error running git commit-tree: $?\n" if $?;
 830
 831        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 832                or die "Cannot write branch $branch for update: $!\n";
 833
 834        if ($revision_map) {
 835                print $revision_map "@$_ $cid\n" for @commit_revisions;
 836        }
 837        @commit_revisions = ();
 838
 839        if ($tag) {
 840                my ($xtag) = $tag;
 841                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 842                $xtag =~ tr/_/\./ if ( $opt_u );
 843                $xtag =~ s/[\/]/$opt_s/g;
 844                $xtag =~ s/\[//g;
 845
 846                system('git' , 'tag', '-f', $xtag, $cid) == 0
 847                        or die "Cannot create tag $xtag: $!\n";
 848
 849                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 850        }
 851};
 852
 853my $commitcount = 1;
 854while (<CVS>) {
 855        chomp;
 856        if ($state == 0 and /^-+$/) {
 857                $state = 1;
 858        } elsif ($state == 0) {
 859                $state = 1;
 860                redo;
 861        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 862                $patchset = 0+$_;
 863                $state=2;
 864        } elsif ($state == 2 and s/^Date:\s+//) {
 865                $date = pdate($_);
 866                unless ($date) {
 867                        print STDERR "Could not parse date: $_\n";
 868                        $state=0;
 869                        next;
 870                }
 871                $state=3;
 872        } elsif ($state == 3 and s/^Author:\s+//) {
 873                s/\s+$//;
 874                if (/^(.*?)\s+<(.*)>/) {
 875                    ($author_name, $author_email) = ($1, $2);
 876                } elsif ($conv_author_name{$_}) {
 877                        $author_name = $conv_author_name{$_};
 878                        $author_email = $conv_author_email{$_};
 879                } else {
 880                    $author_name = $author_email = $_;
 881                }
 882                $state = 4;
 883        } elsif ($state == 4 and s/^Branch:\s+//) {
 884                s/\s+$//;
 885                tr/_/\./ if ( $opt_u );
 886                s/[\/]/$opt_s/g;
 887                $branch = $_;
 888                $state = 5;
 889        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 890                s/\s+$//;
 891                $ancestor = $_;
 892                $ancestor = $opt_o if $ancestor eq "HEAD";
 893                $state = 6;
 894        } elsif ($state == 5) {
 895                $ancestor = undef;
 896                $state = 6;
 897                redo;
 898        } elsif ($state == 6 and s/^Tag:\s+//) {
 899                s/\s+$//;
 900                if ($_ eq "(none)") {
 901                        $tag = undef;
 902                } else {
 903                        $tag = $_;
 904                }
 905                $state = 7;
 906        } elsif ($state == 7 and /^Log:/) {
 907                $logmsg = "";
 908                $state = 8;
 909        } elsif ($state == 8 and /^Members:/) {
 910                $branch = $opt_o if $branch eq "HEAD";
 911                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 912                        # skip
 913                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 914                        $state = 11;
 915                        next;
 916                }
 917                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 918                        # skip if the commit is too recent
 919                        # given that the cvsps default fuzz is 300s, we give ourselves another
 920                        # 300s just in case -- this also prevents skipping commits
 921                        # due to server clock drift
 922                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 923                        $state = 11;
 924                        next;
 925                }
 926                if (exists $ignorebranch{$branch}) {
 927                        print STDERR "Skipping $branch\n";
 928                        $state = 11;
 929                        next;
 930                }
 931                if ($ancestor) {
 932                        if ($ancestor eq $branch) {
 933                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 934                                $ancestor = $opt_o;
 935                        }
 936                        if (defined get_headref("$remote/$branch")) {
 937                                print STDERR "Branch $branch already exists!\n";
 938                                $state=11;
 939                                next;
 940                        }
 941                        my $id = get_headref("$remote/$ancestor");
 942                        if (!$id) {
 943                                print STDERR "Branch $ancestor does not exist!\n";
 944                                $ignorebranch{$branch} = 1;
 945                                $state=11;
 946                                next;
 947                        }
 948
 949                        system(qw(git update-ref -m cvsimport),
 950                                "$remote/$branch", $id);
 951                        if($? != 0) {
 952                                print STDERR "Could not create branch $branch\n";
 953                                $ignorebranch{$branch} = 1;
 954                                $state=11;
 955                                next;
 956                        }
 957                }
 958                $last_branch = $branch if $branch ne $last_branch;
 959                $state = 9;
 960        } elsif ($state == 8) {
 961                $logmsg .= "$_\n";
 962        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 963#       VERSION:1.96->1.96.2.1
 964                my $init = ($2 eq "INITIAL");
 965                my $fn = $1;
 966                my $rev = $3;
 967                $fn =~ s#^/+##;
 968                if ($opt_S && $fn =~ m/$opt_S/) {
 969                    print "SKIPPING $fn v $rev\n";
 970                    push(@skipped, $fn);
 971                    next;
 972                }
 973                push @commit_revisions, [$fn, $rev];
 974                print "Fetching $fn   v $rev\n" if $opt_v;
 975                my ($tmpname, $size) = $cvs->file($fn,$rev);
 976                if ($size == -1) {
 977                        push(@old,$fn);
 978                        print "Drop $fn\n" if $opt_v;
 979                } else {
 980                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 981                        my $pid = open(my $F, '-|');
 982                        die $! unless defined $pid;
 983                        if (!$pid) {
 984                            exec("git", "hash-object", "-w", $tmpname)
 985                                or die "Cannot create object: $!\n";
 986                        }
 987                        my $sha = <$F>;
 988                        chomp $sha;
 989                        close $F;
 990                        my $mode = pmode($cvs->{'mode'});
 991                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
 992                }
 993                unlink($tmpname);
 994        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
 995                my $fn = $1;
 996                my $rev = $2;
 997                $fn =~ s#^/+##;
 998                push @commit_revisions, [$fn, $rev];
 999                push(@old,$fn);
1000                print "Delete $fn\n" if $opt_v;
1001        } elsif ($state == 9 and /^\s*$/) {
1002                $state = 10;
1003        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1004                $commitcount++;
1005                if ($opt_L && $commitcount > $opt_L) {
1006                        last;
1007                }
1008                commit();
1009                if (($commitcount & 1023) == 0) {
1010                        system(qw(git repack -a -d));
1011                }
1012                $state = 1;
1013        } elsif ($state == 11 and /^-+$/) {
1014                $state = 1;
1015        } elsif (/^-+$/) { # end of unknown-line processing
1016                $state = 1;
1017        } elsif ($state != 11) { # ignore stuff when skipping
1018                print STDERR "* UNKNOWN LINE * $_\n";
1019        }
1020}
1021commit() if $branch and $state != 11;
1022
1023unless ($opt_P) {
1024        unlink($cvspsfile);
1025}
1026
1027# The heuristic of repacking every 1024 commits can leave a
1028# lot of unpacked data.  If there is more than 1MB worth of
1029# not-packed objects, repack once more.
1030my $line = `git count-objects`;
1031if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1032  my ($n_objects, $kb) = ($1, $2);
1033  1024 < $kb
1034    and system(qw(git repack -a -d));
1035}
1036
1037foreach my $git_index (values %index) {
1038    if ($git_index ne "$git_dir/index") {
1039        unlink($git_index);
1040    }
1041}
1042
1043if (defined $orig_git_index) {
1044        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1045} else {
1046        delete $ENV{GIT_INDEX_FILE};
1047}
1048
1049# Now switch back to the branch we were in before all of this happened
1050if ($orig_branch) {
1051        print "DONE.\n" if $opt_v;
1052        if ($opt_i) {
1053                exit 0;
1054        }
1055        my $tip_at_end = `git rev-parse --verify HEAD`;
1056        if ($tip_at_start ne $tip_at_end) {
1057                for ($tip_at_start, $tip_at_end) { chomp; }
1058                print "Fetched into the current branch.\n" if $opt_v;
1059                system(qw(git read-tree -u -m),
1060                       $tip_at_start, $tip_at_end);
1061                die "Fast-forward update failed: $?\n" if $?;
1062        }
1063        else {
1064                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1065                die "Could not merge $opt_o into the current branch.\n" if $?;
1066        }
1067} else {
1068        $orig_branch = "master";
1069        print "DONE; creating $orig_branch branch\n" if $opt_v;
1070        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1071                unless defined get_headref('refs/heads/master');
1072        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1073                if ($opt_r && $opt_o ne 'HEAD');
1074        system('git', 'update-ref', 'HEAD', "$orig_branch");
1075        unless ($opt_i) {
1076                system(qw(git checkout -f));
1077                die "checkout failed: $?\n" if $?;
1078        }
1079}