git-cvsimport.perlon commit Merge branch 'maint' (42fa6df)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use strict;
  17use warnings;
  18use Getopt::Long;
  19use File::Spec;
  20use File::Temp qw(tempfile tmpnam);
  21use File::Path qw(mkpath);
  22use File::Basename qw(basename dirname);
  23use Time::Local;
  24use IO::Socket;
  25use IO::Pipe;
  26use POSIX qw(strftime dup2 ENOENT);
  27use IPC::Open2;
  28
  29$SIG{'PIPE'}="IGNORE";
  30$ENV{'TZ'}="UTC";
  31
  32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
  33my (%conv_author_name, %conv_author_email);
  34
  35sub usage(;$) {
  36        my $msg = shift;
  37        print(STDERR "Error: $msg\n") if $msg;
  38        print STDERR <<END;
  39Usage: git cvsimport     # fetch/update GIT from CVS
  40       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  41       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  42       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  43       [-r remote] [CVS_module]
  44END
  45        exit(1);
  46}
  47
  48sub read_author_info($) {
  49        my ($file) = @_;
  50        my $user;
  51        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  52
  53        while (<$f>) {
  54                # Expected format is this:
  55                #   exon=Andreas Ericsson <ae@op5.se>
  56                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  57                        $user = $1;
  58                        $conv_author_name{$user} = $2;
  59                        $conv_author_email{$user} = $3;
  60                }
  61                # However, we also read from CVSROOT/users format
  62                # to ease migration.
  63                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  64                        my $mapped;
  65                        ($user, $mapped) = ($1, $3);
  66                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  67                                $conv_author_name{$user} = $1;
  68                                $conv_author_email{$user} = $2;
  69                        }
  70                        elsif ($mapped =~ /^<?(.*)>?$/) {
  71                                $conv_author_name{$user} = $user;
  72                                $conv_author_email{$user} = $1;
  73                        }
  74                }
  75                # NEEDSWORK: Maybe warn on unrecognized lines?
  76        }
  77        close ($f);
  78}
  79
  80sub write_author_info($) {
  81        my ($file) = @_;
  82        open my $f, '>', $file or
  83          die("Failed to open $file for writing: $!");
  84
  85        foreach (keys %conv_author_name) {
  86                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  87        }
  88        close ($f);
  89}
  90
  91# convert getopts specs for use by git config
  92sub read_repo_config {
  93    # Split the string between characters, unless there is a ':'
  94    # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
  95        my @opts = split(/ *(?!:)/, shift);
  96        foreach my $o (@opts) {
  97                my $key = $o;
  98                $key =~ s/://g;
  99                my $arg = 'git config';
 100                $arg .= ' --bool' if ($o !~ /:$/);
 101
 102        chomp(my $tmp = `$arg --get cvsimport.$key`);
 103                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 104            no strict 'refs';
 105            my $opt_name = "opt_" . $key;
 106            if (!$$opt_name) {
 107                $$opt_name = $tmp;
 108            }
 109                }
 110        }
 111}
 112
 113my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
 114read_repo_config($opts);
 115Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 116
 117# turn the Getopt::Std specification in a Getopt::Long one,
 118# with support for multiple -M options
 119GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 120    or usage();
 121usage if $opt_h;
 122
 123if (@ARGV == 0) {
 124                chomp(my $module = `git config --get cvsimport.module`);
 125                push(@ARGV, $module) if $? == 0;
 126}
 127@ARGV <= 1 or usage("You can't specify more than one CVS module");
 128
 129if ($opt_d) {
 130        $ENV{"CVSROOT"} = $opt_d;
 131} elsif (-f 'CVS/Root') {
 132        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 133        $opt_d = <$f>;
 134        chomp $opt_d;
 135        close $f;
 136        $ENV{"CVSROOT"} = $opt_d;
 137} elsif ($ENV{"CVSROOT"}) {
 138        $opt_d = $ENV{"CVSROOT"};
 139} else {
 140        usage("CVSROOT needs to be set");
 141}
 142$opt_s ||= "-";
 143$opt_a ||= 0;
 144
 145my $git_tree = $opt_C;
 146$git_tree ||= ".";
 147
 148my $remote;
 149if (defined $opt_r) {
 150        $remote = 'refs/remotes/' . $opt_r;
 151        $opt_o ||= "master";
 152} else {
 153        $opt_o ||= "origin";
 154        $remote = 'refs/heads';
 155}
 156
 157my $cvs_tree;
 158if ($#ARGV == 0) {
 159        $cvs_tree = $ARGV[0];
 160} elsif (-f 'CVS/Repository') {
 161        open my $f, '<', 'CVS/Repository' or
 162            die 'Failed to open CVS/Repository';
 163        $cvs_tree = <$f>;
 164        chomp $cvs_tree;
 165        close $f;
 166} else {
 167        usage("CVS module has to be specified");
 168}
 169
 170our @mergerx = ();
 171if ($opt_m) {
 172        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 173}
 174if (@opt_M) {
 175        push (@mergerx, map { qr/$_/ } @opt_M);
 176}
 177
 178# Remember UTC of our starting time
 179# we'll want to avoid importing commits
 180# that are too recent
 181our $starttime = time();
 182
 183select(STDERR); $|=1; select(STDOUT);
 184
 185
 186package CVSconn;
 187# Basic CVS dialog.
 188# We're only interested in connecting and downloading, so ...
 189
 190use File::Spec;
 191use File::Temp qw(tempfile);
 192use POSIX qw(strftime dup2);
 193
 194sub new {
 195        my ($what,$repo,$subdir) = @_;
 196        $what=ref($what) if ref($what);
 197
 198        my $self = {};
 199        $self->{'buffer'} = "";
 200        bless($self,$what);
 201
 202        $repo =~ s#/+$##;
 203        $self->{'fullrep'} = $repo;
 204        $self->conn();
 205
 206        $self->{'subdir'} = $subdir;
 207        $self->{'lines'} = undef;
 208
 209        return $self;
 210}
 211
 212sub conn {
 213        my $self = shift;
 214        my $repo = $self->{'fullrep'};
 215        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 216                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 217
 218                my ($proxyhost,$proxyport);
 219                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 220                        $proxyhost = $1;
 221                        # Default proxyport, if not specified, is 8080.
 222                        $proxyport = 8080;
 223                        if ($ENV{"CVS_PROXY_PORT"}) {
 224                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 225                        }
 226                        if ($param =~ m/proxyport=([^;]+)/) {
 227                                $proxyport = $1;
 228                        }
 229                }
 230                $repo ||= '/';
 231
 232                # if username is not explicit in CVSROOT, then use current user, as cvs would
 233                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 234                my $rr2 = "-";
 235                unless ($port) {
 236                        $rr2 = ":pserver:$user\@$serv:$repo";
 237                        $port=2401;
 238                }
 239                my $rr = ":pserver:$user\@$serv:$port$repo";
 240
 241                unless ($pass) {
 242                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 243                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 244                                while (<H>) {
 245                                        chomp;
 246                                        s/^\/\d+\s+//;
 247                                        my ($w,$p) = split(/\s/,$_,2);
 248                                        if ($w eq $rr or $w eq $rr2) {
 249                                                $pass = $p;
 250                                                last;
 251                                        }
 252                                }
 253                        };
 254                }
 255
 256                $pass = $self->_scramble($pass);
 257
 258                my ($s, $rep);
 259                if ($proxyhost) {
 260
 261                        # Use a HTTP Proxy. Only works for HTTP proxies that
 262                        # don't require user authentication
 263                        #
 264                        # See: http://www.ietf.org/rfc/rfc2817.txt
 265
 266                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 267                        die "Socket to $proxyhost: $!\n" unless defined $s;
 268                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 269                                or die "Write to $proxyhost: $!\n";
 270                        $s->flush();
 271
 272                        $rep = <$s>;
 273
 274                        # The answer should look like 'HTTP/1.x 2yy ....'
 275                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 276                                die "Proxy connect: $rep\n";
 277                        }
 278                        # Skip up to the empty line of the proxy server output
 279                        # including the response headers.
 280                        while ($rep = <$s>) {
 281                                last if (!defined $rep ||
 282                                         $rep eq "\n" ||
 283                                         $rep eq "\r\n");
 284                        }
 285                } else {
 286                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 287                        die "Socket to $serv: $!\n" unless defined $s;
 288                }
 289
 290                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 291                        or die "Write to $serv: $!\n";
 292                $s->flush();
 293
 294                $rep = <$s>;
 295
 296                if ($rep ne "I LOVE YOU\n") {
 297                        $rep="<unknown>" unless $rep;
 298                        die "AuthReply: $rep\n";
 299                }
 300                $self->{'socketo'} = $s;
 301                $self->{'socketi'} = $s;
 302        } else { # local or ext: Fork off our own cvs server.
 303                my $pr = IO::Pipe->new();
 304                my $pw = IO::Pipe->new();
 305                my $pid = fork();
 306                die "Fork: $!\n" unless defined $pid;
 307                my $cvs = 'cvs';
 308                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 309                my $rsh = 'rsh';
 310                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 311
 312                my @cvs = ($cvs, 'server');
 313                my ($local, $user, $host);
 314                $local = $repo =~ s/:local://;
 315                if (!$local) {
 316                    $repo =~ s/:ext://;
 317                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 318                    ($user, $host) = ($1, $2);
 319                }
 320                if (!$local) {
 321                    if ($user) {
 322                        unshift @cvs, $rsh, '-l', $user, $host;
 323                    } else {
 324                        unshift @cvs, $rsh, $host;
 325                    }
 326                }
 327
 328                unless ($pid) {
 329                        $pr->writer();
 330                        $pw->reader();
 331                        dup2($pw->fileno(),0);
 332                        dup2($pr->fileno(),1);
 333                        $pr->close();
 334                        $pw->close();
 335                        exec(@cvs);
 336                }
 337                $pw->writer();
 338                $pr->reader();
 339                $self->{'socketo'} = $pw;
 340                $self->{'socketi'} = $pr;
 341        }
 342        $self->{'socketo'}->write("Root $repo\n");
 343
 344        # Trial and error says that this probably is the minimum set
 345        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 346
 347        $self->{'socketo'}->write("valid-requests\n");
 348        $self->{'socketo'}->flush();
 349
 350        chomp(my $rep=$self->readline());
 351        if ($rep !~ s/^Valid-requests\s*//) {
 352                $rep="<unknown>" unless $rep;
 353                die "Expected Valid-requests from server, but got: $rep\n";
 354        }
 355        chomp(my $res=$self->readline());
 356        die "validReply: $res\n" if $res ne "ok";
 357
 358        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 359        $self->{'repo'} = $repo;
 360}
 361
 362sub readline {
 363        my ($self) = @_;
 364        return $self->{'socketi'}->getline();
 365}
 366
 367sub _file {
 368        # Request a file with a given revision.
 369        # Trial and error says this is a good way to do it. :-/
 370        my ($self,$fn,$rev) = @_;
 371        $self->{'socketo'}->write("Argument -N\n") or return undef;
 372        $self->{'socketo'}->write("Argument -P\n") or return undef;
 373        # -kk: Linus' version doesn't use it - defaults to off
 374        if ($opt_k) {
 375            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 376        }
 377        $self->{'socketo'}->write("Argument -r\n") or return undef;
 378        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 379        $self->{'socketo'}->write("Argument --\n") or return undef;
 380        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 381        $self->{'socketo'}->write("Directory .\n") or return undef;
 382        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 383        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 384        $self->{'socketo'}->write("co\n") or return undef;
 385        $self->{'socketo'}->flush() or return undef;
 386        $self->{'lines'} = 0;
 387        return 1;
 388}
 389sub _line {
 390        # Read a line from the server.
 391        # ... except that 'line' may be an entire file. ;-)
 392        my ($self, $fh) = @_;
 393        die "Not in lines" unless defined $self->{'lines'};
 394
 395        my $line;
 396        my $res=0;
 397        while (defined($line = $self->readline())) {
 398                # M U gnupg-cvs-rep/AUTHORS
 399                # Updated gnupg-cvs-rep/
 400                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 401                # /AUTHORS/1.1///T1.1
 402                # u=rw,g=rw,o=rw
 403                # 0
 404                # ok
 405
 406                if ($line =~ s/^(?:Created|Updated) //) {
 407                        $line = $self->readline(); # path
 408                        $line = $self->readline(); # Entries line
 409                        my $mode = $self->readline(); chomp $mode;
 410                        $self->{'mode'} = $mode;
 411                        defined (my $cnt = $self->readline())
 412                                or die "EOF from server after 'Changed'\n";
 413                        chomp $cnt;
 414                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 415                        $line="";
 416                        $res = $self->_fetchfile($fh, $cnt);
 417                } elsif ($line =~ s/^ //) {
 418                        print $fh $line;
 419                        $res += length($line);
 420                } elsif ($line =~ /^M\b/) {
 421                        # output, do nothing
 422                } elsif ($line =~ /^Mbinary\b/) {
 423                        my $cnt;
 424                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 425                        chomp $cnt;
 426                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 427                        $line="";
 428                        $res += $self->_fetchfile($fh, $cnt);
 429                } else {
 430                        chomp $line;
 431                        if ($line eq "ok") {
 432                                # print STDERR "S: ok (".length($res).")\n";
 433                                return $res;
 434                        } elsif ($line =~ s/^E //) {
 435                                # print STDERR "S: $line\n";
 436                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 437                                $line = $self->readline(); # filename
 438                                $line = $self->readline(); # OK
 439                                chomp $line;
 440                                die "Unknown: $line" if $line ne "ok";
 441                                return -1;
 442                        } else {
 443                                die "Unknown: $line\n";
 444                        }
 445                }
 446        }
 447        return undef;
 448}
 449sub file {
 450        my ($self,$fn,$rev) = @_;
 451        my $res;
 452
 453        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 454                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 455
 456        $self->_file($fn,$rev) and $res = $self->_line($fh);
 457
 458        if (!defined $res) {
 459            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 460            truncate $fh, 0;
 461            $self->conn();
 462            $self->_file($fn,$rev) or die "No file command send";
 463            $res = $self->_line($fh);
 464            die "Retry failed" unless defined $res;
 465        }
 466        close ($fh);
 467
 468        return ($name, $res);
 469}
 470sub _fetchfile {
 471        my ($self, $fh, $cnt) = @_;
 472        my $res = 0;
 473        my $bufsize = 1024 * 1024;
 474        while ($cnt) {
 475            if ($bufsize > $cnt) {
 476                $bufsize = $cnt;
 477            }
 478            my $buf;
 479            my $num = $self->{'socketi'}->read($buf,$bufsize);
 480            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 481            print $fh $buf;
 482            $res += $num;
 483            $cnt -= $num;
 484        }
 485        return $res;
 486}
 487
 488sub _scramble {
 489        my ($self, $pass) = @_;
 490        my $scrambled = "A";
 491
 492        return $scrambled unless $pass;
 493
 494        my $pass_len = length($pass);
 495        my @pass_arr = split("", $pass);
 496        my $i;
 497
 498        # from cvs/src/scramble.c
 499        my @shifts = (
 500                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 501                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 502                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 503                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 504                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 505                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 506                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 507                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 508                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 509                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 510                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 511                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 512                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 513                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 514                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 515                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 516        );
 517
 518        for ($i = 0; $i < $pass_len; $i++) {
 519                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 520        }
 521
 522        return $scrambled;
 523}
 524
 525package main;
 526
 527my $cvs = CVSconn->new($opt_d, $cvs_tree);
 528
 529
 530sub pdate($) {
 531        my ($d) = @_;
 532        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 533                or die "Unparseable date: $d\n";
 534        my $y=$1; $y-=1900 if $y>1900;
 535        return timegm($6||0,$5,$4,$3,$2-1,$y);
 536}
 537
 538sub pmode($) {
 539        my ($mode) = @_;
 540        my $m = 0;
 541        my $mm = 0;
 542        my $um = 0;
 543        for my $x(split(//,$mode)) {
 544                if ($x eq ",") {
 545                        $m |= $mm&$um;
 546                        $mm = 0;
 547                        $um = 0;
 548                } elsif ($x eq "u") { $um |= 0700;
 549                } elsif ($x eq "g") { $um |= 0070;
 550                } elsif ($x eq "o") { $um |= 0007;
 551                } elsif ($x eq "r") { $mm |= 0444;
 552                } elsif ($x eq "w") { $mm |= 0222;
 553                } elsif ($x eq "x") { $mm |= 0111;
 554                } elsif ($x eq "=") { # do nothing
 555                } else { die "Unknown mode: $mode\n";
 556                }
 557        }
 558        $m |= $mm&$um;
 559        return $m;
 560}
 561
 562sub getwd() {
 563        my $pwd = `pwd`;
 564        chomp $pwd;
 565        return $pwd;
 566}
 567
 568sub is_sha1 {
 569        my $s = shift;
 570        return $s =~ /^[a-f0-9]{40}$/;
 571}
 572
 573sub get_headref ($) {
 574        my $name = shift;
 575        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 576        return undef unless $? == 0;
 577        chomp $r;
 578        return $r;
 579}
 580
 581-d $git_tree
 582        or mkdir($git_tree,0777)
 583        or die "Could not create $git_tree: $!";
 584chdir($git_tree);
 585
 586my $last_branch = "";
 587my $orig_branch = "";
 588my %branch_date;
 589my $tip_at_start = undef;
 590
 591my $git_dir = $ENV{"GIT_DIR"} || ".git";
 592$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 593$ENV{"GIT_DIR"} = $git_dir;
 594my $orig_git_index;
 595$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 596
 597my %index; # holds filenames of one index per branch
 598
 599unless (-d $git_dir) {
 600        system("git-init");
 601        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 602        system("git-read-tree");
 603        die "Cannot init an empty tree: $?\n" if $?;
 604
 605        $last_branch = $opt_o;
 606        $orig_branch = "";
 607} else {
 608        open(F, "git-symbolic-ref HEAD |") or
 609                die "Cannot run git-symbolic-ref: $!\n";
 610        chomp ($last_branch = <F>);
 611        $last_branch = basename($last_branch);
 612        close(F);
 613        unless ($last_branch) {
 614                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 615                $last_branch = "master";
 616        }
 617        $orig_branch = $last_branch;
 618        $tip_at_start = `git-rev-parse --verify HEAD`;
 619
 620        # Get the last import timestamps
 621        my $fmt = '($ref, $author) = (%(refname), %(author));';
 622        open(H, "git-for-each-ref --perl --format='$fmt' $remote |") or
 623                die "Cannot run git-for-each-ref: $!\n";
 624        while (defined(my $entry = <H>)) {
 625                my ($ref, $author);
 626                eval($entry) || die "cannot eval refs list: $@";
 627                my ($head) = ($ref =~ m|^$remote/(.*)|);
 628                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 629                $branch_date{$head} = $1;
 630        }
 631        close(H);
 632        if (!exists $branch_date{$opt_o}) {
 633                die "Branch '$opt_o' does not exist.\n".
 634                       "Either use the correct '-o branch' option,\n".
 635                       "or import to a new repository.\n";
 636        }
 637}
 638
 639-d $git_dir
 640        or die "Could not create git subdir ($git_dir).\n";
 641
 642# now we read (and possibly save) author-info as well
 643-f "$git_dir/cvs-authors" and
 644  read_author_info("$git_dir/cvs-authors");
 645if ($opt_A) {
 646        read_author_info($opt_A);
 647        write_author_info("$git_dir/cvs-authors");
 648}
 649
 650
 651#
 652# run cvsps into a file unless we are getting
 653# it passed as a file via $opt_P
 654#
 655my $cvspsfile;
 656unless ($opt_P) {
 657        print "Running cvsps...\n" if $opt_v;
 658        my $pid = open(CVSPS,"-|");
 659        my $cvspsfh;
 660        die "Cannot fork: $!\n" unless defined $pid;
 661        unless ($pid) {
 662                my @opt;
 663                @opt = split(/,/,$opt_p) if defined $opt_p;
 664                unshift @opt, '-z', $opt_z if defined $opt_z;
 665                unshift @opt, '-q'         unless defined $opt_v;
 666                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 667                        push @opt, '--cvs-direct';
 668                }
 669                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 670                die "Could not start cvsps: $!\n";
 671        }
 672        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 673                                          DIR => File::Spec->tmpdir());
 674        while (<CVSPS>) {
 675            print $cvspsfh $_;
 676        }
 677        close CVSPS;
 678        $? == 0 or die "git-cvsimport: fatal: cvsps reported error\n";
 679        close $cvspsfh;
 680} else {
 681        $cvspsfile = $opt_P;
 682}
 683
 684open(CVS, "<$cvspsfile") or die $!;
 685
 686## cvsps output:
 687#---------------------
 688#PatchSet 314
 689#Date: 1999/09/18 13:03:59
 690#Author: wkoch
 691#Branch: STABLE-BRANCH-1-0
 692#Ancestor branch: HEAD
 693#Tag: (none)
 694#Log:
 695#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 696#Members:
 697#       README:1.57->1.57.2.1
 698#       VERSION:1.96->1.96.2.1
 699#
 700#---------------------
 701
 702my $state = 0;
 703
 704sub update_index (\@\@) {
 705        my $old = shift;
 706        my $new = shift;
 707        open(my $fh, '|-', qw(git-update-index -z --index-info))
 708                or die "unable to open git-update-index: $!";
 709        print $fh
 710                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 711                        @$old),
 712                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 713                        @$new)
 714                or die "unable to write to git-update-index: $!";
 715        close $fh
 716                or die "unable to write to git-update-index: $!";
 717        $? and die "git-update-index reported error: $?";
 718}
 719
 720sub write_tree () {
 721        open(my $fh, '-|', qw(git-write-tree))
 722                or die "unable to open git-write-tree: $!";
 723        chomp(my $tree = <$fh>);
 724        is_sha1($tree)
 725                or die "Cannot get tree id ($tree): $!";
 726        close($fh)
 727                or die "Error running git-write-tree: $?\n";
 728        print "Tree ID $tree\n" if $opt_v;
 729        return $tree;
 730}
 731
 732my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 733my (@old,@new,@skipped,%ignorebranch);
 734
 735# commits that cvsps cannot place anywhere...
 736$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 737
 738sub commit {
 739        if ($branch eq $opt_o && !$index{branch} &&
 740                !get_headref("$remote/$branch")) {
 741            # looks like an initial commit
 742            # use the index primed by git-init
 743            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 744            $index{$branch} = "$git_dir/index";
 745        } else {
 746            # use an index per branch to speed up
 747            # imports of projects with many branches
 748            unless ($index{$branch}) {
 749                $index{$branch} = tmpnam();
 750                $ENV{GIT_INDEX_FILE} = $index{$branch};
 751                if ($ancestor) {
 752                    system("git-read-tree", "$remote/$ancestor");
 753                } else {
 754                    system("git-read-tree", "$remote/$branch");
 755                }
 756                die "read-tree failed: $?\n" if $?;
 757            }
 758        }
 759        $ENV{GIT_INDEX_FILE} = $index{$branch};
 760
 761        update_index(@old, @new);
 762        @old = @new = ();
 763        my $tree = write_tree();
 764        my $parent = get_headref("$remote/$last_branch");
 765        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 766
 767        my @commit_args;
 768        push @commit_args, ("-p", $parent) if $parent;
 769
 770        # loose detection of merges
 771        # based on the commit msg
 772        foreach my $rx (@mergerx) {
 773                next unless $logmsg =~ $rx && $1;
 774                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 775                if (my $sha1 = get_headref("$remote/$mparent")) {
 776                        push @commit_args, '-p', "$remote/$mparent";
 777                        print "Merge parent branch: $mparent\n" if $opt_v;
 778                }
 779        }
 780
 781        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 782        $ENV{GIT_AUTHOR_NAME} = $author_name;
 783        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 784        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 785        $ENV{GIT_COMMITTER_NAME} = $author_name;
 786        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 787        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 788        my $pid = open2(my $commit_read, my $commit_write,
 789                'git-commit-tree', $tree, @commit_args);
 790
 791        # compatibility with git2cvs
 792        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 793        $logmsg =~ s/[\s\n]+\z//;
 794
 795        if (@skipped) {
 796            $logmsg .= "\n\n\nSKIPPED:\n\t";
 797            $logmsg .= join("\n\t", @skipped) . "\n";
 798            @skipped = ();
 799        }
 800
 801        print($commit_write "$logmsg\n") && close($commit_write)
 802                or die "Error writing to git-commit-tree: $!\n";
 803
 804        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 805        chomp(my $cid = <$commit_read>);
 806        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 807        print "Commit ID $cid\n" if $opt_v;
 808        close($commit_read);
 809
 810        waitpid($pid,0);
 811        die "Error running git-commit-tree: $?\n" if $?;
 812
 813        system('git-update-ref', "$remote/$branch", $cid) == 0
 814                or die "Cannot write branch $branch for update: $!\n";
 815
 816        if ($tag) {
 817                my ($xtag) = $tag;
 818                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 819                $xtag =~ tr/_/\./ if ( $opt_u );
 820                $xtag =~ s/[\/]/$opt_s/g;
 821                $xtag =~ s/\[//g;
 822
 823                system('git-tag', '-f', $xtag, $cid) == 0
 824                        or die "Cannot create tag $xtag: $!\n";
 825
 826                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 827        }
 828};
 829
 830my $commitcount = 1;
 831while (<CVS>) {
 832        chomp;
 833        if ($state == 0 and /^-+$/) {
 834                $state = 1;
 835        } elsif ($state == 0) {
 836                $state = 1;
 837                redo;
 838        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 839                $patchset = 0+$_;
 840                $state=2;
 841        } elsif ($state == 2 and s/^Date:\s+//) {
 842                $date = pdate($_);
 843                unless ($date) {
 844                        print STDERR "Could not parse date: $_\n";
 845                        $state=0;
 846                        next;
 847                }
 848                $state=3;
 849        } elsif ($state == 3 and s/^Author:\s+//) {
 850                s/\s+$//;
 851                if (/^(.*?)\s+<(.*)>/) {
 852                    ($author_name, $author_email) = ($1, $2);
 853                } elsif ($conv_author_name{$_}) {
 854                        $author_name = $conv_author_name{$_};
 855                        $author_email = $conv_author_email{$_};
 856                } else {
 857                    $author_name = $author_email = $_;
 858                }
 859                $state = 4;
 860        } elsif ($state == 4 and s/^Branch:\s+//) {
 861                s/\s+$//;
 862                tr/_/\./ if ( $opt_u );
 863                s/[\/]/$opt_s/g;
 864                $branch = $_;
 865                $state = 5;
 866        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 867                s/\s+$//;
 868                $ancestor = $_;
 869                $ancestor = $opt_o if $ancestor eq "HEAD";
 870                $state = 6;
 871        } elsif ($state == 5) {
 872                $ancestor = undef;
 873                $state = 6;
 874                redo;
 875        } elsif ($state == 6 and s/^Tag:\s+//) {
 876                s/\s+$//;
 877                if ($_ eq "(none)") {
 878                        $tag = undef;
 879                } else {
 880                        $tag = $_;
 881                }
 882                $state = 7;
 883        } elsif ($state == 7 and /^Log:/) {
 884                $logmsg = "";
 885                $state = 8;
 886        } elsif ($state == 8 and /^Members:/) {
 887                $branch = $opt_o if $branch eq "HEAD";
 888                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 889                        # skip
 890                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 891                        $state = 11;
 892                        next;
 893                }
 894                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 895                        # skip if the commit is too recent
 896                        # given that the cvsps default fuzz is 300s, we give ourselves another
 897                        # 300s just in case -- this also prevents skipping commits
 898                        # due to server clock drift
 899                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 900                        $state = 11;
 901                        next;
 902                }
 903                if (exists $ignorebranch{$branch}) {
 904                        print STDERR "Skipping $branch\n";
 905                        $state = 11;
 906                        next;
 907                }
 908                if ($ancestor) {
 909                        if ($ancestor eq $branch) {
 910                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 911                                $ancestor = $opt_o;
 912                        }
 913                        if (defined get_headref("$remote/$branch")) {
 914                                print STDERR "Branch $branch already exists!\n";
 915                                $state=11;
 916                                next;
 917                        }
 918                        my $id = get_headref("$remote/$ancestor");
 919                        if (!$id) {
 920                                print STDERR "Branch $ancestor does not exist!\n";
 921                                $ignorebranch{$branch} = 1;
 922                                $state=11;
 923                                next;
 924                        }
 925
 926                        system(qw(git update-ref -m cvsimport),
 927                                "$remote/$branch", $id);
 928                        if($? != 0) {
 929                                print STDERR "Could not create branch $branch\n";
 930                                $ignorebranch{$branch} = 1;
 931                                $state=11;
 932                                next;
 933                        }
 934                }
 935                $last_branch = $branch if $branch ne $last_branch;
 936                $state = 9;
 937        } elsif ($state == 8) {
 938                $logmsg .= "$_\n";
 939        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 940#       VERSION:1.96->1.96.2.1
 941                my $init = ($2 eq "INITIAL");
 942                my $fn = $1;
 943                my $rev = $3;
 944                $fn =~ s#^/+##;
 945                if ($opt_S && $fn =~ m/$opt_S/) {
 946                    print "SKIPPING $fn v $rev\n";
 947                    push(@skipped, $fn);
 948                    next;
 949                }
 950                print "Fetching $fn   v $rev\n" if $opt_v;
 951                my ($tmpname, $size) = $cvs->file($fn,$rev);
 952                if ($size == -1) {
 953                        push(@old,$fn);
 954                        print "Drop $fn\n" if $opt_v;
 955                } else {
 956                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 957                        my $pid = open(my $F, '-|');
 958                        die $! unless defined $pid;
 959                        if (!$pid) {
 960                            exec("git-hash-object", "-w", $tmpname)
 961                                or die "Cannot create object: $!\n";
 962                        }
 963                        my $sha = <$F>;
 964                        chomp $sha;
 965                        close $F;
 966                        my $mode = pmode($cvs->{'mode'});
 967                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
 968                }
 969                unlink($tmpname);
 970        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
 971                my $fn = $1;
 972                $fn =~ s#^/+##;
 973                push(@old,$fn);
 974                print "Delete $fn\n" if $opt_v;
 975        } elsif ($state == 9 and /^\s*$/) {
 976                $state = 10;
 977        } elsif (($state == 9 or $state == 10) and /^-+$/) {
 978                $commitcount++;
 979                if ($opt_L && $commitcount > $opt_L) {
 980                        last;
 981                }
 982                commit();
 983                if (($commitcount & 1023) == 0) {
 984                        system("git repack -a -d");
 985                }
 986                $state = 1;
 987        } elsif ($state == 11 and /^-+$/) {
 988                $state = 1;
 989        } elsif (/^-+$/) { # end of unknown-line processing
 990                $state = 1;
 991        } elsif ($state != 11) { # ignore stuff when skipping
 992                print STDERR "* UNKNOWN LINE * $_\n";
 993        }
 994}
 995commit() if $branch and $state != 11;
 996
 997unless ($opt_P) {
 998        unlink($cvspsfile);
 999}
1000
1001# The heuristic of repacking every 1024 commits can leave a
1002# lot of unpacked data.  If there is more than 1MB worth of
1003# not-packed objects, repack once more.
1004my $line = `git-count-objects`;
1005if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1006  my ($n_objects, $kb) = ($1, $2);
1007  1024 < $kb
1008    and system("git repack -a -d");
1009}
1010
1011foreach my $git_index (values %index) {
1012    if ($git_index ne "$git_dir/index") {
1013        unlink($git_index);
1014    }
1015}
1016
1017if (defined $orig_git_index) {
1018        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1019} else {
1020        delete $ENV{GIT_INDEX_FILE};
1021}
1022
1023# Now switch back to the branch we were in before all of this happened
1024if ($orig_branch) {
1025        print "DONE.\n" if $opt_v;
1026        if ($opt_i) {
1027                exit 0;
1028        }
1029        my $tip_at_end = `git-rev-parse --verify HEAD`;
1030        if ($tip_at_start ne $tip_at_end) {
1031                for ($tip_at_start, $tip_at_end) { chomp; }
1032                print "Fetched into the current branch.\n" if $opt_v;
1033                system(qw(git-read-tree -u -m),
1034                       $tip_at_start, $tip_at_end);
1035                die "Fast-forward update failed: $?\n" if $?;
1036        }
1037        else {
1038                system(qw(git-merge cvsimport HEAD), "$remote/$opt_o");
1039                die "Could not merge $opt_o into the current branch.\n" if $?;
1040        }
1041} else {
1042        $orig_branch = "master";
1043        print "DONE; creating $orig_branch branch\n" if $opt_v;
1044        system("git-update-ref", "refs/heads/master", "$remote/$opt_o")
1045                unless defined get_headref('refs/heads/master');
1046        system("git-symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1047                if ($opt_r && $opt_o ne 'HEAD');
1048        system('git-update-ref', 'HEAD', "$orig_branch");
1049        unless ($opt_i) {
1050                system('git checkout -f');
1051                die "checkout failed: $?\n" if $?;
1052        }
1053}