git-cvsimport.perlon commit apply: don't segfault on binary files with missing data (24305cd)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use strict;
  17use warnings;
  18use Getopt::Long;
  19use File::Spec;
  20use File::Temp qw(tempfile tmpnam);
  21use File::Path qw(mkpath);
  22use File::Basename qw(basename dirname);
  23use Time::Local;
  24use IO::Socket;
  25use IO::Pipe;
  26use POSIX qw(strftime dup2 ENOENT);
  27use IPC::Open2;
  28
  29$SIG{'PIPE'}="IGNORE";
  30$ENV{'TZ'}="UTC";
  31
  32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  33my (%conv_author_name, %conv_author_email);
  34
  35sub usage(;$) {
  36        my $msg = shift;
  37        print(STDERR "Error: $msg\n") if $msg;
  38        print STDERR <<END;
  39Usage: git cvsimport     # fetch/update GIT from CVS
  40       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  41       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  42       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  43       [-r remote] [-R] [CVS_module]
  44END
  45        exit(1);
  46}
  47
  48sub read_author_info($) {
  49        my ($file) = @_;
  50        my $user;
  51        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  52
  53        while (<$f>) {
  54                # Expected format is this:
  55                #   exon=Andreas Ericsson <ae@op5.se>
  56                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  57                        $user = $1;
  58                        $conv_author_name{$user} = $2;
  59                        $conv_author_email{$user} = $3;
  60                }
  61                # However, we also read from CVSROOT/users format
  62                # to ease migration.
  63                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  64                        my $mapped;
  65                        ($user, $mapped) = ($1, $3);
  66                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  67                                $conv_author_name{$user} = $1;
  68                                $conv_author_email{$user} = $2;
  69                        }
  70                        elsif ($mapped =~ /^<?(.*)>?$/) {
  71                                $conv_author_name{$user} = $user;
  72                                $conv_author_email{$user} = $1;
  73                        }
  74                }
  75                # NEEDSWORK: Maybe warn on unrecognized lines?
  76        }
  77        close ($f);
  78}
  79
  80sub write_author_info($) {
  81        my ($file) = @_;
  82        open my $f, '>', $file or
  83          die("Failed to open $file for writing: $!");
  84
  85        foreach (keys %conv_author_name) {
  86                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  87        }
  88        close ($f);
  89}
  90
  91# convert getopts specs for use by git config
  92sub read_repo_config {
  93    # Split the string between characters, unless there is a ':'
  94    # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
  95        my @opts = split(/ *(?!:)/, shift);
  96        foreach my $o (@opts) {
  97                my $key = $o;
  98                $key =~ s/://g;
  99                my $arg = 'git config';
 100                $arg .= ' --bool' if ($o !~ /:$/);
 101
 102        chomp(my $tmp = `$arg --get cvsimport.$key`);
 103                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 104            no strict 'refs';
 105            my $opt_name = "opt_" . $key;
 106            if (!$$opt_name) {
 107                $$opt_name = $tmp;
 108            }
 109                }
 110        }
 111}
 112
 113my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 114read_repo_config($opts);
 115Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 116
 117# turn the Getopt::Std specification in a Getopt::Long one,
 118# with support for multiple -M options
 119GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 120    or usage();
 121usage if $opt_h;
 122
 123if (@ARGV == 0) {
 124                chomp(my $module = `git config --get cvsimport.module`);
 125                push(@ARGV, $module) if $? == 0;
 126}
 127@ARGV <= 1 or usage("You can't specify more than one CVS module");
 128
 129if ($opt_d) {
 130        $ENV{"CVSROOT"} = $opt_d;
 131} elsif (-f 'CVS/Root') {
 132        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 133        $opt_d = <$f>;
 134        chomp $opt_d;
 135        close $f;
 136        $ENV{"CVSROOT"} = $opt_d;
 137} elsif ($ENV{"CVSROOT"}) {
 138        $opt_d = $ENV{"CVSROOT"};
 139} else {
 140        usage("CVSROOT needs to be set");
 141}
 142$opt_s ||= "-";
 143$opt_a ||= 0;
 144
 145my $git_tree = $opt_C;
 146$git_tree ||= ".";
 147
 148my $remote;
 149if (defined $opt_r) {
 150        $remote = 'refs/remotes/' . $opt_r;
 151        $opt_o ||= "master";
 152} else {
 153        $opt_o ||= "origin";
 154        $remote = 'refs/heads';
 155}
 156
 157my $cvs_tree;
 158if ($#ARGV == 0) {
 159        $cvs_tree = $ARGV[0];
 160} elsif (-f 'CVS/Repository') {
 161        open my $f, '<', 'CVS/Repository' or
 162            die 'Failed to open CVS/Repository';
 163        $cvs_tree = <$f>;
 164        chomp $cvs_tree;
 165        close $f;
 166} else {
 167        usage("CVS module has to be specified");
 168}
 169
 170our @mergerx = ();
 171if ($opt_m) {
 172        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 173}
 174if (@opt_M) {
 175        push (@mergerx, map { qr/$_/ } @opt_M);
 176}
 177
 178# Remember UTC of our starting time
 179# we'll want to avoid importing commits
 180# that are too recent
 181our $starttime = time();
 182
 183select(STDERR); $|=1; select(STDOUT);
 184
 185
 186package CVSconn;
 187# Basic CVS dialog.
 188# We're only interested in connecting and downloading, so ...
 189
 190use File::Spec;
 191use File::Temp qw(tempfile);
 192use POSIX qw(strftime dup2);
 193
 194sub new {
 195        my ($what,$repo,$subdir) = @_;
 196        $what=ref($what) if ref($what);
 197
 198        my $self = {};
 199        $self->{'buffer'} = "";
 200        bless($self,$what);
 201
 202        $repo =~ s#/+$##;
 203        $self->{'fullrep'} = $repo;
 204        $self->conn();
 205
 206        $self->{'subdir'} = $subdir;
 207        $self->{'lines'} = undef;
 208
 209        return $self;
 210}
 211
 212sub conn {
 213        my $self = shift;
 214        my $repo = $self->{'fullrep'};
 215        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 216                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 217
 218                my ($proxyhost,$proxyport);
 219                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 220                        $proxyhost = $1;
 221                        # Default proxyport, if not specified, is 8080.
 222                        $proxyport = 8080;
 223                        if ($ENV{"CVS_PROXY_PORT"}) {
 224                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 225                        }
 226                        if ($param =~ m/proxyport=([^;]+)/) {
 227                                $proxyport = $1;
 228                        }
 229                }
 230                $repo ||= '/';
 231
 232                # if username is not explicit in CVSROOT, then use current user, as cvs would
 233                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 234                my $rr2 = "-";
 235                unless ($port) {
 236                        $rr2 = ":pserver:$user\@$serv:$repo";
 237                        $port=2401;
 238                }
 239                my $rr = ":pserver:$user\@$serv:$port$repo";
 240
 241                if ($pass) {
 242                        $pass = $self->_scramble($pass);
 243                } else {
 244                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 245                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 246                                while (<H>) {
 247                                        chomp;
 248                                        s/^\/\d+\s+//;
 249                                        my ($w,$p) = split(/\s/,$_,2);
 250                                        if ($w eq $rr or $w eq $rr2) {
 251                                                $pass = $p;
 252                                                last;
 253                                        }
 254                                }
 255                        };
 256                        $pass = "A" unless $pass;
 257                }
 258
 259                my ($s, $rep);
 260                if ($proxyhost) {
 261
 262                        # Use a HTTP Proxy. Only works for HTTP proxies that
 263                        # don't require user authentication
 264                        #
 265                        # See: http://www.ietf.org/rfc/rfc2817.txt
 266
 267                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 268                        die "Socket to $proxyhost: $!\n" unless defined $s;
 269                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 270                                or die "Write to $proxyhost: $!\n";
 271                        $s->flush();
 272
 273                        $rep = <$s>;
 274
 275                        # The answer should look like 'HTTP/1.x 2yy ....'
 276                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 277                                die "Proxy connect: $rep\n";
 278                        }
 279                        # Skip up to the empty line of the proxy server output
 280                        # including the response headers.
 281                        while ($rep = <$s>) {
 282                                last if (!defined $rep ||
 283                                         $rep eq "\n" ||
 284                                         $rep eq "\r\n");
 285                        }
 286                } else {
 287                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 288                        die "Socket to $serv: $!\n" unless defined $s;
 289                }
 290
 291                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 292                        or die "Write to $serv: $!\n";
 293                $s->flush();
 294
 295                $rep = <$s>;
 296
 297                if ($rep ne "I LOVE YOU\n") {
 298                        $rep="<unknown>" unless $rep;
 299                        die "AuthReply: $rep\n";
 300                }
 301                $self->{'socketo'} = $s;
 302                $self->{'socketi'} = $s;
 303        } else { # local or ext: Fork off our own cvs server.
 304                my $pr = IO::Pipe->new();
 305                my $pw = IO::Pipe->new();
 306                my $pid = fork();
 307                die "Fork: $!\n" unless defined $pid;
 308                my $cvs = 'cvs';
 309                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 310                my $rsh = 'rsh';
 311                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 312
 313                my @cvs = ($cvs, 'server');
 314                my ($local, $user, $host);
 315                $local = $repo =~ s/:local://;
 316                if (!$local) {
 317                    $repo =~ s/:ext://;
 318                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 319                    ($user, $host) = ($1, $2);
 320                }
 321                if (!$local) {
 322                    if ($user) {
 323                        unshift @cvs, $rsh, '-l', $user, $host;
 324                    } else {
 325                        unshift @cvs, $rsh, $host;
 326                    }
 327                }
 328
 329                unless ($pid) {
 330                        $pr->writer();
 331                        $pw->reader();
 332                        dup2($pw->fileno(),0);
 333                        dup2($pr->fileno(),1);
 334                        $pr->close();
 335                        $pw->close();
 336                        exec(@cvs);
 337                }
 338                $pw->writer();
 339                $pr->reader();
 340                $self->{'socketo'} = $pw;
 341                $self->{'socketi'} = $pr;
 342        }
 343        $self->{'socketo'}->write("Root $repo\n");
 344
 345        # Trial and error says that this probably is the minimum set
 346        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 347
 348        $self->{'socketo'}->write("valid-requests\n");
 349        $self->{'socketo'}->flush();
 350
 351        chomp(my $rep=$self->readline());
 352        if ($rep !~ s/^Valid-requests\s*//) {
 353                $rep="<unknown>" unless $rep;
 354                die "Expected Valid-requests from server, but got: $rep\n";
 355        }
 356        chomp(my $res=$self->readline());
 357        die "validReply: $res\n" if $res ne "ok";
 358
 359        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 360        $self->{'repo'} = $repo;
 361}
 362
 363sub readline {
 364        my ($self) = @_;
 365        return $self->{'socketi'}->getline();
 366}
 367
 368sub _file {
 369        # Request a file with a given revision.
 370        # Trial and error says this is a good way to do it. :-/
 371        my ($self,$fn,$rev) = @_;
 372        $self->{'socketo'}->write("Argument -N\n") or return undef;
 373        $self->{'socketo'}->write("Argument -P\n") or return undef;
 374        # -kk: Linus' version doesn't use it - defaults to off
 375        if ($opt_k) {
 376            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 377        }
 378        $self->{'socketo'}->write("Argument -r\n") or return undef;
 379        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 380        $self->{'socketo'}->write("Argument --\n") or return undef;
 381        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 382        $self->{'socketo'}->write("Directory .\n") or return undef;
 383        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 384        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 385        $self->{'socketo'}->write("co\n") or return undef;
 386        $self->{'socketo'}->flush() or return undef;
 387        $self->{'lines'} = 0;
 388        return 1;
 389}
 390sub _line {
 391        # Read a line from the server.
 392        # ... except that 'line' may be an entire file. ;-)
 393        my ($self, $fh) = @_;
 394        die "Not in lines" unless defined $self->{'lines'};
 395
 396        my $line;
 397        my $res=0;
 398        while (defined($line = $self->readline())) {
 399                # M U gnupg-cvs-rep/AUTHORS
 400                # Updated gnupg-cvs-rep/
 401                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 402                # /AUTHORS/1.1///T1.1
 403                # u=rw,g=rw,o=rw
 404                # 0
 405                # ok
 406
 407                if ($line =~ s/^(?:Created|Updated) //) {
 408                        $line = $self->readline(); # path
 409                        $line = $self->readline(); # Entries line
 410                        my $mode = $self->readline(); chomp $mode;
 411                        $self->{'mode'} = $mode;
 412                        defined (my $cnt = $self->readline())
 413                                or die "EOF from server after 'Changed'\n";
 414                        chomp $cnt;
 415                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 416                        $line="";
 417                        $res = $self->_fetchfile($fh, $cnt);
 418                } elsif ($line =~ s/^ //) {
 419                        print $fh $line;
 420                        $res += length($line);
 421                } elsif ($line =~ /^M\b/) {
 422                        # output, do nothing
 423                } elsif ($line =~ /^Mbinary\b/) {
 424                        my $cnt;
 425                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 426                        chomp $cnt;
 427                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 428                        $line="";
 429                        $res += $self->_fetchfile($fh, $cnt);
 430                } else {
 431                        chomp $line;
 432                        if ($line eq "ok") {
 433                                # print STDERR "S: ok (".length($res).")\n";
 434                                return $res;
 435                        } elsif ($line =~ s/^E //) {
 436                                # print STDERR "S: $line\n";
 437                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 438                                $line = $self->readline(); # filename
 439                                $line = $self->readline(); # OK
 440                                chomp $line;
 441                                die "Unknown: $line" if $line ne "ok";
 442                                return -1;
 443                        } else {
 444                                die "Unknown: $line\n";
 445                        }
 446                }
 447        }
 448        return undef;
 449}
 450sub file {
 451        my ($self,$fn,$rev) = @_;
 452        my $res;
 453
 454        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 455                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 456
 457        $self->_file($fn,$rev) and $res = $self->_line($fh);
 458
 459        if (!defined $res) {
 460            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 461            truncate $fh, 0;
 462            $self->conn();
 463            $self->_file($fn,$rev) or die "No file command send";
 464            $res = $self->_line($fh);
 465            die "Retry failed" unless defined $res;
 466        }
 467        close ($fh);
 468
 469        return ($name, $res);
 470}
 471sub _fetchfile {
 472        my ($self, $fh, $cnt) = @_;
 473        my $res = 0;
 474        my $bufsize = 1024 * 1024;
 475        while ($cnt) {
 476            if ($bufsize > $cnt) {
 477                $bufsize = $cnt;
 478            }
 479            my $buf;
 480            my $num = $self->{'socketi'}->read($buf,$bufsize);
 481            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 482            print $fh $buf;
 483            $res += $num;
 484            $cnt -= $num;
 485        }
 486        return $res;
 487}
 488
 489sub _scramble {
 490        my ($self, $pass) = @_;
 491        my $scrambled = "A";
 492
 493        return $scrambled unless $pass;
 494
 495        my $pass_len = length($pass);
 496        my @pass_arr = split("", $pass);
 497        my $i;
 498
 499        # from cvs/src/scramble.c
 500        my @shifts = (
 501                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 502                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 503                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 504                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 505                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 506                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 507                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 508                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 509                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 510                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 511                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 512                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 513                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 514                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 515                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 516                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 517        );
 518
 519        for ($i = 0; $i < $pass_len; $i++) {
 520                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 521        }
 522
 523        return $scrambled;
 524}
 525
 526package main;
 527
 528my $cvs = CVSconn->new($opt_d, $cvs_tree);
 529
 530
 531sub pdate($) {
 532        my ($d) = @_;
 533        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 534                or die "Unparseable date: $d\n";
 535        my $y=$1; $y-=1900 if $y>1900;
 536        return timegm($6||0,$5,$4,$3,$2-1,$y);
 537}
 538
 539sub pmode($) {
 540        my ($mode) = @_;
 541        my $m = 0;
 542        my $mm = 0;
 543        my $um = 0;
 544        for my $x(split(//,$mode)) {
 545                if ($x eq ",") {
 546                        $m |= $mm&$um;
 547                        $mm = 0;
 548                        $um = 0;
 549                } elsif ($x eq "u") { $um |= 0700;
 550                } elsif ($x eq "g") { $um |= 0070;
 551                } elsif ($x eq "o") { $um |= 0007;
 552                } elsif ($x eq "r") { $mm |= 0444;
 553                } elsif ($x eq "w") { $mm |= 0222;
 554                } elsif ($x eq "x") { $mm |= 0111;
 555                } elsif ($x eq "=") { # do nothing
 556                } else { die "Unknown mode: $mode\n";
 557                }
 558        }
 559        $m |= $mm&$um;
 560        return $m;
 561}
 562
 563sub getwd() {
 564        my $pwd = `pwd`;
 565        chomp $pwd;
 566        return $pwd;
 567}
 568
 569sub is_sha1 {
 570        my $s = shift;
 571        return $s =~ /^[a-f0-9]{40}$/;
 572}
 573
 574sub get_headref ($) {
 575        my $name = shift;
 576        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 577        return undef unless $? == 0;
 578        chomp $r;
 579        return $r;
 580}
 581
 582my $user_filename_prepend = '';
 583sub munge_user_filename {
 584        my $name = shift;
 585        return File::Spec->file_name_is_absolute($name) ?
 586                $name :
 587                $user_filename_prepend . $name;
 588}
 589
 590-d $git_tree
 591        or mkdir($git_tree,0777)
 592        or die "Could not create $git_tree: $!";
 593if ($git_tree ne '.') {
 594        $user_filename_prepend = getwd() . '/';
 595        chdir($git_tree);
 596}
 597
 598my $last_branch = "";
 599my $orig_branch = "";
 600my %branch_date;
 601my $tip_at_start = undef;
 602
 603my $git_dir = $ENV{"GIT_DIR"} || ".git";
 604$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 605$ENV{"GIT_DIR"} = $git_dir;
 606my $orig_git_index;
 607$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 608
 609my %index; # holds filenames of one index per branch
 610
 611unless (-d $git_dir) {
 612        system(qw(git init));
 613        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 614        system(qw(git read-tree));
 615        die "Cannot init an empty tree: $?\n" if $?;
 616
 617        $last_branch = $opt_o;
 618        $orig_branch = "";
 619} else {
 620        open(F, "-|", qw(git symbolic-ref HEAD)) or
 621                die "Cannot run git symbolic-ref: $!\n";
 622        chomp ($last_branch = <F>);
 623        $last_branch = basename($last_branch);
 624        close(F);
 625        unless ($last_branch) {
 626                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 627                $last_branch = "master";
 628        }
 629        $orig_branch = $last_branch;
 630        $tip_at_start = `git rev-parse --verify HEAD`;
 631
 632        # Get the last import timestamps
 633        my $fmt = '($ref, $author) = (%(refname), %(author));';
 634        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 635        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 636        while (defined(my $entry = <H>)) {
 637                my ($ref, $author);
 638                eval($entry) || die "cannot eval refs list: $@";
 639                my ($head) = ($ref =~ m|^$remote/(.*)|);
 640                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 641                $branch_date{$head} = $1;
 642        }
 643        close(H);
 644        if (!exists $branch_date{$opt_o}) {
 645                die "Branch '$opt_o' does not exist.\n".
 646                       "Either use the correct '-o branch' option,\n".
 647                       "or import to a new repository.\n";
 648        }
 649}
 650
 651-d $git_dir
 652        or die "Could not create git subdir ($git_dir).\n";
 653
 654# now we read (and possibly save) author-info as well
 655-f "$git_dir/cvs-authors" and
 656  read_author_info("$git_dir/cvs-authors");
 657if ($opt_A) {
 658        read_author_info(munge_user_filename($opt_A));
 659        write_author_info("$git_dir/cvs-authors");
 660}
 661
 662# open .git/cvs-revisions, if requested
 663open my $revision_map, '>>', "$git_dir/cvs-revisions"
 664    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 665        if defined $opt_R;
 666
 667
 668#
 669# run cvsps into a file unless we are getting
 670# it passed as a file via $opt_P
 671#
 672my $cvspsfile;
 673unless ($opt_P) {
 674        print "Running cvsps...\n" if $opt_v;
 675        my $pid = open(CVSPS,"-|");
 676        my $cvspsfh;
 677        die "Cannot fork: $!\n" unless defined $pid;
 678        unless ($pid) {
 679                my @opt;
 680                @opt = split(/,/,$opt_p) if defined $opt_p;
 681                unshift @opt, '-z', $opt_z if defined $opt_z;
 682                unshift @opt, '-q'         unless defined $opt_v;
 683                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 684                        push @opt, '--cvs-direct';
 685                }
 686                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 687                die "Could not start cvsps: $!\n";
 688        }
 689        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 690                                          DIR => File::Spec->tmpdir());
 691        while (<CVSPS>) {
 692            print $cvspsfh $_;
 693        }
 694        close CVSPS;
 695        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 696        close $cvspsfh;
 697} else {
 698        $cvspsfile = munge_user_filename($opt_P);
 699}
 700
 701open(CVS, "<$cvspsfile") or die $!;
 702
 703## cvsps output:
 704#---------------------
 705#PatchSet 314
 706#Date: 1999/09/18 13:03:59
 707#Author: wkoch
 708#Branch: STABLE-BRANCH-1-0
 709#Ancestor branch: HEAD
 710#Tag: (none)
 711#Log:
 712#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 713#Members:
 714#       README:1.57->1.57.2.1
 715#       VERSION:1.96->1.96.2.1
 716#
 717#---------------------
 718
 719my $state = 0;
 720
 721sub update_index (\@\@) {
 722        my $old = shift;
 723        my $new = shift;
 724        open(my $fh, '|-', qw(git update-index -z --index-info))
 725                or die "unable to open git update-index: $!";
 726        print $fh
 727                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 728                        @$old),
 729                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 730                        @$new)
 731                or die "unable to write to git update-index: $!";
 732        close $fh
 733                or die "unable to write to git update-index: $!";
 734        $? and die "git update-index reported error: $?";
 735}
 736
 737sub write_tree () {
 738        open(my $fh, '-|', qw(git write-tree))
 739                or die "unable to open git write-tree: $!";
 740        chomp(my $tree = <$fh>);
 741        is_sha1($tree)
 742                or die "Cannot get tree id ($tree): $!";
 743        close($fh)
 744                or die "Error running git write-tree: $?\n";
 745        print "Tree ID $tree\n" if $opt_v;
 746        return $tree;
 747}
 748
 749my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 750my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 751
 752# commits that cvsps cannot place anywhere...
 753$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 754
 755sub commit {
 756        if ($branch eq $opt_o && !$index{branch} &&
 757                !get_headref("$remote/$branch")) {
 758            # looks like an initial commit
 759            # use the index primed by git init
 760            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 761            $index{$branch} = "$git_dir/index";
 762        } else {
 763            # use an index per branch to speed up
 764            # imports of projects with many branches
 765            unless ($index{$branch}) {
 766                $index{$branch} = tmpnam();
 767                $ENV{GIT_INDEX_FILE} = $index{$branch};
 768                if ($ancestor) {
 769                    system("git", "read-tree", "$remote/$ancestor");
 770                } else {
 771                    system("git", "read-tree", "$remote/$branch");
 772                }
 773                die "read-tree failed: $?\n" if $?;
 774            }
 775        }
 776        $ENV{GIT_INDEX_FILE} = $index{$branch};
 777
 778        update_index(@old, @new);
 779        @old = @new = ();
 780        my $tree = write_tree();
 781        my $parent = get_headref("$remote/$last_branch");
 782        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 783
 784        my @commit_args;
 785        push @commit_args, ("-p", $parent) if $parent;
 786
 787        # loose detection of merges
 788        # based on the commit msg
 789        foreach my $rx (@mergerx) {
 790                next unless $logmsg =~ $rx && $1;
 791                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 792                if (my $sha1 = get_headref("$remote/$mparent")) {
 793                        push @commit_args, '-p', "$remote/$mparent";
 794                        print "Merge parent branch: $mparent\n" if $opt_v;
 795                }
 796        }
 797
 798        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 799        $ENV{GIT_AUTHOR_NAME} = $author_name;
 800        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 801        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 802        $ENV{GIT_COMMITTER_NAME} = $author_name;
 803        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 804        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 805        my $pid = open2(my $commit_read, my $commit_write,
 806                'git', 'commit-tree', $tree, @commit_args);
 807
 808        # compatibility with git2cvs
 809        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 810        $logmsg =~ s/[\s\n]+\z//;
 811
 812        if (@skipped) {
 813            $logmsg .= "\n\n\nSKIPPED:\n\t";
 814            $logmsg .= join("\n\t", @skipped) . "\n";
 815            @skipped = ();
 816        }
 817
 818        print($commit_write "$logmsg\n") && close($commit_write)
 819                or die "Error writing to git commit-tree: $!\n";
 820
 821        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 822        chomp(my $cid = <$commit_read>);
 823        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 824        print "Commit ID $cid\n" if $opt_v;
 825        close($commit_read);
 826
 827        waitpid($pid,0);
 828        die "Error running git commit-tree: $?\n" if $?;
 829
 830        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 831                or die "Cannot write branch $branch for update: $!\n";
 832
 833        if ($revision_map) {
 834                print $revision_map "@$_ $cid\n" for @commit_revisions;
 835        }
 836        @commit_revisions = ();
 837
 838        if ($tag) {
 839                my ($xtag) = $tag;
 840                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 841                $xtag =~ tr/_/\./ if ( $opt_u );
 842                $xtag =~ s/[\/]/$opt_s/g;
 843                $xtag =~ s/\[//g;
 844
 845                system('git' , 'tag', '-f', $xtag, $cid) == 0
 846                        or die "Cannot create tag $xtag: $!\n";
 847
 848                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 849        }
 850};
 851
 852my $commitcount = 1;
 853while (<CVS>) {
 854        chomp;
 855        if ($state == 0 and /^-+$/) {
 856                $state = 1;
 857        } elsif ($state == 0) {
 858                $state = 1;
 859                redo;
 860        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 861                $patchset = 0+$_;
 862                $state=2;
 863        } elsif ($state == 2 and s/^Date:\s+//) {
 864                $date = pdate($_);
 865                unless ($date) {
 866                        print STDERR "Could not parse date: $_\n";
 867                        $state=0;
 868                        next;
 869                }
 870                $state=3;
 871        } elsif ($state == 3 and s/^Author:\s+//) {
 872                s/\s+$//;
 873                if (/^(.*?)\s+<(.*)>/) {
 874                    ($author_name, $author_email) = ($1, $2);
 875                } elsif ($conv_author_name{$_}) {
 876                        $author_name = $conv_author_name{$_};
 877                        $author_email = $conv_author_email{$_};
 878                } else {
 879                    $author_name = $author_email = $_;
 880                }
 881                $state = 4;
 882        } elsif ($state == 4 and s/^Branch:\s+//) {
 883                s/\s+$//;
 884                tr/_/\./ if ( $opt_u );
 885                s/[\/]/$opt_s/g;
 886                $branch = $_;
 887                $state = 5;
 888        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 889                s/\s+$//;
 890                $ancestor = $_;
 891                $ancestor = $opt_o if $ancestor eq "HEAD";
 892                $state = 6;
 893        } elsif ($state == 5) {
 894                $ancestor = undef;
 895                $state = 6;
 896                redo;
 897        } elsif ($state == 6 and s/^Tag:\s+//) {
 898                s/\s+$//;
 899                if ($_ eq "(none)") {
 900                        $tag = undef;
 901                } else {
 902                        $tag = $_;
 903                }
 904                $state = 7;
 905        } elsif ($state == 7 and /^Log:/) {
 906                $logmsg = "";
 907                $state = 8;
 908        } elsif ($state == 8 and /^Members:/) {
 909                $branch = $opt_o if $branch eq "HEAD";
 910                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 911                        # skip
 912                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 913                        $state = 11;
 914                        next;
 915                }
 916                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 917                        # skip if the commit is too recent
 918                        # given that the cvsps default fuzz is 300s, we give ourselves another
 919                        # 300s just in case -- this also prevents skipping commits
 920                        # due to server clock drift
 921                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 922                        $state = 11;
 923                        next;
 924                }
 925                if (exists $ignorebranch{$branch}) {
 926                        print STDERR "Skipping $branch\n";
 927                        $state = 11;
 928                        next;
 929                }
 930                if ($ancestor) {
 931                        if ($ancestor eq $branch) {
 932                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 933                                $ancestor = $opt_o;
 934                        }
 935                        if (defined get_headref("$remote/$branch")) {
 936                                print STDERR "Branch $branch already exists!\n";
 937                                $state=11;
 938                                next;
 939                        }
 940                        my $id = get_headref("$remote/$ancestor");
 941                        if (!$id) {
 942                                print STDERR "Branch $ancestor does not exist!\n";
 943                                $ignorebranch{$branch} = 1;
 944                                $state=11;
 945                                next;
 946                        }
 947
 948                        system(qw(git update-ref -m cvsimport),
 949                                "$remote/$branch", $id);
 950                        if($? != 0) {
 951                                print STDERR "Could not create branch $branch\n";
 952                                $ignorebranch{$branch} = 1;
 953                                $state=11;
 954                                next;
 955                        }
 956                }
 957                $last_branch = $branch if $branch ne $last_branch;
 958                $state = 9;
 959        } elsif ($state == 8) {
 960                $logmsg .= "$_\n";
 961        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 962#       VERSION:1.96->1.96.2.1
 963                my $init = ($2 eq "INITIAL");
 964                my $fn = $1;
 965                my $rev = $3;
 966                $fn =~ s#^/+##;
 967                if ($opt_S && $fn =~ m/$opt_S/) {
 968                    print "SKIPPING $fn v $rev\n";
 969                    push(@skipped, $fn);
 970                    next;
 971                }
 972                push @commit_revisions, [$fn, $rev];
 973                print "Fetching $fn   v $rev\n" if $opt_v;
 974                my ($tmpname, $size) = $cvs->file($fn,$rev);
 975                if ($size == -1) {
 976                        push(@old,$fn);
 977                        print "Drop $fn\n" if $opt_v;
 978                } else {
 979                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 980                        my $pid = open(my $F, '-|');
 981                        die $! unless defined $pid;
 982                        if (!$pid) {
 983                            exec("git", "hash-object", "-w", $tmpname)
 984                                or die "Cannot create object: $!\n";
 985                        }
 986                        my $sha = <$F>;
 987                        chomp $sha;
 988                        close $F;
 989                        my $mode = pmode($cvs->{'mode'});
 990                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
 991                }
 992                unlink($tmpname);
 993        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
 994                my $fn = $1;
 995                my $rev = $2;
 996                $fn =~ s#^/+##;
 997                push @commit_revisions, [$fn, $rev];
 998                push(@old,$fn);
 999                print "Delete $fn\n" if $opt_v;
1000        } elsif ($state == 9 and /^\s*$/) {
1001                $state = 10;
1002        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1003                $commitcount++;
1004                if ($opt_L && $commitcount > $opt_L) {
1005                        last;
1006                }
1007                commit();
1008                if (($commitcount & 1023) == 0) {
1009                        system(qw(git repack -a -d));
1010                }
1011                $state = 1;
1012        } elsif ($state == 11 and /^-+$/) {
1013                $state = 1;
1014        } elsif (/^-+$/) { # end of unknown-line processing
1015                $state = 1;
1016        } elsif ($state != 11) { # ignore stuff when skipping
1017                print STDERR "* UNKNOWN LINE * $_\n";
1018        }
1019}
1020commit() if $branch and $state != 11;
1021
1022unless ($opt_P) {
1023        unlink($cvspsfile);
1024}
1025
1026# The heuristic of repacking every 1024 commits can leave a
1027# lot of unpacked data.  If there is more than 1MB worth of
1028# not-packed objects, repack once more.
1029my $line = `git count-objects`;
1030if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1031  my ($n_objects, $kb) = ($1, $2);
1032  1024 < $kb
1033    and system(qw(git repack -a -d));
1034}
1035
1036foreach my $git_index (values %index) {
1037    if ($git_index ne "$git_dir/index") {
1038        unlink($git_index);
1039    }
1040}
1041
1042if (defined $orig_git_index) {
1043        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1044} else {
1045        delete $ENV{GIT_INDEX_FILE};
1046}
1047
1048# Now switch back to the branch we were in before all of this happened
1049if ($orig_branch) {
1050        print "DONE.\n" if $opt_v;
1051        if ($opt_i) {
1052                exit 0;
1053        }
1054        my $tip_at_end = `git rev-parse --verify HEAD`;
1055        if ($tip_at_start ne $tip_at_end) {
1056                for ($tip_at_start, $tip_at_end) { chomp; }
1057                print "Fetched into the current branch.\n" if $opt_v;
1058                system(qw(git read-tree -u -m),
1059                       $tip_at_start, $tip_at_end);
1060                die "Fast-forward update failed: $?\n" if $?;
1061        }
1062        else {
1063                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1064                die "Could not merge $opt_o into the current branch.\n" if $?;
1065        }
1066} else {
1067        $orig_branch = "master";
1068        print "DONE; creating $orig_branch branch\n" if $opt_v;
1069        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1070                unless defined get_headref('refs/heads/master');
1071        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1072                if ($opt_r && $opt_o ne 'HEAD');
1073        system('git', 'update-ref', 'HEAD', "$orig_branch");
1074        unless ($opt_i) {
1075                system(qw(git checkout -f));
1076                die "checkout failed: $?\n" if $?;
1077        }
1078}