git-cvsimport.perlon commit shortlog: respect commit encoding (79f7ca0)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use strict;
  17use warnings;
  18use Getopt::Long;
  19use File::Spec;
  20use File::Temp qw(tempfile tmpnam);
  21use File::Path qw(mkpath);
  22use File::Basename qw(basename dirname);
  23use Time::Local;
  24use IO::Socket;
  25use IO::Pipe;
  26use POSIX qw(strftime dup2 ENOENT);
  27use IPC::Open2;
  28
  29$SIG{'PIPE'}="IGNORE";
  30$ENV{'TZ'}="UTC";
  31
  32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
  33my (%conv_author_name, %conv_author_email);
  34
  35sub usage(;$) {
  36        my $msg = shift;
  37        print(STDERR "Error: $msg\n") if $msg;
  38        print STDERR <<END;
  39Usage: git cvsimport     # fetch/update GIT from CVS
  40       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  41       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  42       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  43       [-r remote] [CVS_module]
  44END
  45        exit(1);
  46}
  47
  48sub read_author_info($) {
  49        my ($file) = @_;
  50        my $user;
  51        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  52
  53        while (<$f>) {
  54                # Expected format is this:
  55                #   exon=Andreas Ericsson <ae@op5.se>
  56                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  57                        $user = $1;
  58                        $conv_author_name{$user} = $2;
  59                        $conv_author_email{$user} = $3;
  60                }
  61                # However, we also read from CVSROOT/users format
  62                # to ease migration.
  63                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  64                        my $mapped;
  65                        ($user, $mapped) = ($1, $3);
  66                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  67                                $conv_author_name{$user} = $1;
  68                                $conv_author_email{$user} = $2;
  69                        }
  70                        elsif ($mapped =~ /^<?(.*)>?$/) {
  71                                $conv_author_name{$user} = $user;
  72                                $conv_author_email{$user} = $1;
  73                        }
  74                }
  75                # NEEDSWORK: Maybe warn on unrecognized lines?
  76        }
  77        close ($f);
  78}
  79
  80sub write_author_info($) {
  81        my ($file) = @_;
  82        open my $f, '>', $file or
  83          die("Failed to open $file for writing: $!");
  84
  85        foreach (keys %conv_author_name) {
  86                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  87        }
  88        close ($f);
  89}
  90
  91# convert getopts specs for use by git config
  92sub read_repo_config {
  93    # Split the string between characters, unless there is a ':'
  94    # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
  95        my @opts = split(/ *(?!:)/, shift);
  96        foreach my $o (@opts) {
  97                my $key = $o;
  98                $key =~ s/://g;
  99                my $arg = 'git config';
 100                $arg .= ' --bool' if ($o !~ /:$/);
 101
 102        chomp(my $tmp = `$arg --get cvsimport.$key`);
 103                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 104            no strict 'refs';
 105            my $opt_name = "opt_" . $key;
 106            if (!$$opt_name) {
 107                $$opt_name = $tmp;
 108            }
 109                }
 110        }
 111}
 112
 113my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
 114read_repo_config($opts);
 115Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 116
 117# turn the Getopt::Std specification in a Getopt::Long one,
 118# with support for multiple -M options
 119GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 120    or usage();
 121usage if $opt_h;
 122
 123if (@ARGV == 0) {
 124                chomp(my $module = `git config --get cvsimport.module`);
 125                push(@ARGV, $module) if $? == 0;
 126}
 127@ARGV <= 1 or usage("You can't specify more than one CVS module");
 128
 129if ($opt_d) {
 130        $ENV{"CVSROOT"} = $opt_d;
 131} elsif (-f 'CVS/Root') {
 132        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 133        $opt_d = <$f>;
 134        chomp $opt_d;
 135        close $f;
 136        $ENV{"CVSROOT"} = $opt_d;
 137} elsif ($ENV{"CVSROOT"}) {
 138        $opt_d = $ENV{"CVSROOT"};
 139} else {
 140        usage("CVSROOT needs to be set");
 141}
 142$opt_s ||= "-";
 143$opt_a ||= 0;
 144
 145my $git_tree = $opt_C;
 146$git_tree ||= ".";
 147
 148my $remote;
 149if (defined $opt_r) {
 150        $remote = 'refs/remotes/' . $opt_r;
 151        $opt_o ||= "master";
 152} else {
 153        $opt_o ||= "origin";
 154        $remote = 'refs/heads';
 155}
 156
 157my $cvs_tree;
 158if ($#ARGV == 0) {
 159        $cvs_tree = $ARGV[0];
 160} elsif (-f 'CVS/Repository') {
 161        open my $f, '<', 'CVS/Repository' or
 162            die 'Failed to open CVS/Repository';
 163        $cvs_tree = <$f>;
 164        chomp $cvs_tree;
 165        close $f;
 166} else {
 167        usage("CVS module has to be specified");
 168}
 169
 170our @mergerx = ();
 171if ($opt_m) {
 172        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 173}
 174if (@opt_M) {
 175        push (@mergerx, map { qr/$_/ } @opt_M);
 176}
 177
 178# Remember UTC of our starting time
 179# we'll want to avoid importing commits
 180# that are too recent
 181our $starttime = time();
 182
 183select(STDERR); $|=1; select(STDOUT);
 184
 185
 186package CVSconn;
 187# Basic CVS dialog.
 188# We're only interested in connecting and downloading, so ...
 189
 190use File::Spec;
 191use File::Temp qw(tempfile);
 192use POSIX qw(strftime dup2);
 193
 194sub new {
 195        my ($what,$repo,$subdir) = @_;
 196        $what=ref($what) if ref($what);
 197
 198        my $self = {};
 199        $self->{'buffer'} = "";
 200        bless($self,$what);
 201
 202        $repo =~ s#/+$##;
 203        $self->{'fullrep'} = $repo;
 204        $self->conn();
 205
 206        $self->{'subdir'} = $subdir;
 207        $self->{'lines'} = undef;
 208
 209        return $self;
 210}
 211
 212sub conn {
 213        my $self = shift;
 214        my $repo = $self->{'fullrep'};
 215        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 216                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 217
 218                my ($proxyhost,$proxyport);
 219                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 220                        $proxyhost = $1;
 221                        # Default proxyport, if not specified, is 8080.
 222                        $proxyport = 8080;
 223                        if ($ENV{"CVS_PROXY_PORT"}) {
 224                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 225                        }
 226                        if ($param =~ m/proxyport=([^;]+)/) {
 227                                $proxyport = $1;
 228                        }
 229                }
 230                $repo ||= '/';
 231
 232                # if username is not explicit in CVSROOT, then use current user, as cvs would
 233                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 234                my $rr2 = "-";
 235                unless ($port) {
 236                        $rr2 = ":pserver:$user\@$serv:$repo";
 237                        $port=2401;
 238                }
 239                my $rr = ":pserver:$user\@$serv:$port$repo";
 240
 241                if ($pass) {
 242                        $pass = $self->_scramble($pass);
 243                } else {
 244                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 245                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 246                                while (<H>) {
 247                                        chomp;
 248                                        s/^\/\d+\s+//;
 249                                        my ($w,$p) = split(/\s/,$_,2);
 250                                        if ($w eq $rr or $w eq $rr2) {
 251                                                $pass = $p;
 252                                                last;
 253                                        }
 254                                }
 255                        };
 256                        $pass = "A" unless $pass;
 257                }
 258
 259                my ($s, $rep);
 260                if ($proxyhost) {
 261
 262                        # Use a HTTP Proxy. Only works for HTTP proxies that
 263                        # don't require user authentication
 264                        #
 265                        # See: http://www.ietf.org/rfc/rfc2817.txt
 266
 267                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 268                        die "Socket to $proxyhost: $!\n" unless defined $s;
 269                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 270                                or die "Write to $proxyhost: $!\n";
 271                        $s->flush();
 272
 273                        $rep = <$s>;
 274
 275                        # The answer should look like 'HTTP/1.x 2yy ....'
 276                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 277                                die "Proxy connect: $rep\n";
 278                        }
 279                        # Skip up to the empty line of the proxy server output
 280                        # including the response headers.
 281                        while ($rep = <$s>) {
 282                                last if (!defined $rep ||
 283                                         $rep eq "\n" ||
 284                                         $rep eq "\r\n");
 285                        }
 286                } else {
 287                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 288                        die "Socket to $serv: $!\n" unless defined $s;
 289                }
 290
 291                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 292                        or die "Write to $serv: $!\n";
 293                $s->flush();
 294
 295                $rep = <$s>;
 296
 297                if ($rep ne "I LOVE YOU\n") {
 298                        $rep="<unknown>" unless $rep;
 299                        die "AuthReply: $rep\n";
 300                }
 301                $self->{'socketo'} = $s;
 302                $self->{'socketi'} = $s;
 303        } else { # local or ext: Fork off our own cvs server.
 304                my $pr = IO::Pipe->new();
 305                my $pw = IO::Pipe->new();
 306                my $pid = fork();
 307                die "Fork: $!\n" unless defined $pid;
 308                my $cvs = 'cvs';
 309                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 310                my $rsh = 'rsh';
 311                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 312
 313                my @cvs = ($cvs, 'server');
 314                my ($local, $user, $host);
 315                $local = $repo =~ s/:local://;
 316                if (!$local) {
 317                    $repo =~ s/:ext://;
 318                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 319                    ($user, $host) = ($1, $2);
 320                }
 321                if (!$local) {
 322                    if ($user) {
 323                        unshift @cvs, $rsh, '-l', $user, $host;
 324                    } else {
 325                        unshift @cvs, $rsh, $host;
 326                    }
 327                }
 328
 329                unless ($pid) {
 330                        $pr->writer();
 331                        $pw->reader();
 332                        dup2($pw->fileno(),0);
 333                        dup2($pr->fileno(),1);
 334                        $pr->close();
 335                        $pw->close();
 336                        exec(@cvs);
 337                }
 338                $pw->writer();
 339                $pr->reader();
 340                $self->{'socketo'} = $pw;
 341                $self->{'socketi'} = $pr;
 342        }
 343        $self->{'socketo'}->write("Root $repo\n");
 344
 345        # Trial and error says that this probably is the minimum set
 346        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 347
 348        $self->{'socketo'}->write("valid-requests\n");
 349        $self->{'socketo'}->flush();
 350
 351        chomp(my $rep=$self->readline());
 352        if ($rep !~ s/^Valid-requests\s*//) {
 353                $rep="<unknown>" unless $rep;
 354                die "Expected Valid-requests from server, but got: $rep\n";
 355        }
 356        chomp(my $res=$self->readline());
 357        die "validReply: $res\n" if $res ne "ok";
 358
 359        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 360        $self->{'repo'} = $repo;
 361}
 362
 363sub readline {
 364        my ($self) = @_;
 365        return $self->{'socketi'}->getline();
 366}
 367
 368sub _file {
 369        # Request a file with a given revision.
 370        # Trial and error says this is a good way to do it. :-/
 371        my ($self,$fn,$rev) = @_;
 372        $self->{'socketo'}->write("Argument -N\n") or return undef;
 373        $self->{'socketo'}->write("Argument -P\n") or return undef;
 374        # -kk: Linus' version doesn't use it - defaults to off
 375        if ($opt_k) {
 376            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 377        }
 378        $self->{'socketo'}->write("Argument -r\n") or return undef;
 379        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 380        $self->{'socketo'}->write("Argument --\n") or return undef;
 381        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 382        $self->{'socketo'}->write("Directory .\n") or return undef;
 383        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 384        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 385        $self->{'socketo'}->write("co\n") or return undef;
 386        $self->{'socketo'}->flush() or return undef;
 387        $self->{'lines'} = 0;
 388        return 1;
 389}
 390sub _line {
 391        # Read a line from the server.
 392        # ... except that 'line' may be an entire file. ;-)
 393        my ($self, $fh) = @_;
 394        die "Not in lines" unless defined $self->{'lines'};
 395
 396        my $line;
 397        my $res=0;
 398        while (defined($line = $self->readline())) {
 399                # M U gnupg-cvs-rep/AUTHORS
 400                # Updated gnupg-cvs-rep/
 401                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 402                # /AUTHORS/1.1///T1.1
 403                # u=rw,g=rw,o=rw
 404                # 0
 405                # ok
 406
 407                if ($line =~ s/^(?:Created|Updated) //) {
 408                        $line = $self->readline(); # path
 409                        $line = $self->readline(); # Entries line
 410                        my $mode = $self->readline(); chomp $mode;
 411                        $self->{'mode'} = $mode;
 412                        defined (my $cnt = $self->readline())
 413                                or die "EOF from server after 'Changed'\n";
 414                        chomp $cnt;
 415                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 416                        $line="";
 417                        $res = $self->_fetchfile($fh, $cnt);
 418                } elsif ($line =~ s/^ //) {
 419                        print $fh $line;
 420                        $res += length($line);
 421                } elsif ($line =~ /^M\b/) {
 422                        # output, do nothing
 423                } elsif ($line =~ /^Mbinary\b/) {
 424                        my $cnt;
 425                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 426                        chomp $cnt;
 427                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 428                        $line="";
 429                        $res += $self->_fetchfile($fh, $cnt);
 430                } else {
 431                        chomp $line;
 432                        if ($line eq "ok") {
 433                                # print STDERR "S: ok (".length($res).")\n";
 434                                return $res;
 435                        } elsif ($line =~ s/^E //) {
 436                                # print STDERR "S: $line\n";
 437                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 438                                $line = $self->readline(); # filename
 439                                $line = $self->readline(); # OK
 440                                chomp $line;
 441                                die "Unknown: $line" if $line ne "ok";
 442                                return -1;
 443                        } else {
 444                                die "Unknown: $line\n";
 445                        }
 446                }
 447        }
 448        return undef;
 449}
 450sub file {
 451        my ($self,$fn,$rev) = @_;
 452        my $res;
 453
 454        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 455                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 456
 457        $self->_file($fn,$rev) and $res = $self->_line($fh);
 458
 459        if (!defined $res) {
 460            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 461            truncate $fh, 0;
 462            $self->conn();
 463            $self->_file($fn,$rev) or die "No file command send";
 464            $res = $self->_line($fh);
 465            die "Retry failed" unless defined $res;
 466        }
 467        close ($fh);
 468
 469        return ($name, $res);
 470}
 471sub _fetchfile {
 472        my ($self, $fh, $cnt) = @_;
 473        my $res = 0;
 474        my $bufsize = 1024 * 1024;
 475        while ($cnt) {
 476            if ($bufsize > $cnt) {
 477                $bufsize = $cnt;
 478            }
 479            my $buf;
 480            my $num = $self->{'socketi'}->read($buf,$bufsize);
 481            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 482            print $fh $buf;
 483            $res += $num;
 484            $cnt -= $num;
 485        }
 486        return $res;
 487}
 488
 489sub _scramble {
 490        my ($self, $pass) = @_;
 491        my $scrambled = "A";
 492
 493        return $scrambled unless $pass;
 494
 495        my $pass_len = length($pass);
 496        my @pass_arr = split("", $pass);
 497        my $i;
 498
 499        # from cvs/src/scramble.c
 500        my @shifts = (
 501                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 502                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 503                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 504                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 505                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 506                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 507                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 508                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 509                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 510                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 511                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 512                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 513                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 514                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 515                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 516                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 517        );
 518
 519        for ($i = 0; $i < $pass_len; $i++) {
 520                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 521        }
 522
 523        return $scrambled;
 524}
 525
 526package main;
 527
 528my $cvs = CVSconn->new($opt_d, $cvs_tree);
 529
 530
 531sub pdate($) {
 532        my ($d) = @_;
 533        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 534                or die "Unparseable date: $d\n";
 535        my $y=$1; $y-=1900 if $y>1900;
 536        return timegm($6||0,$5,$4,$3,$2-1,$y);
 537}
 538
 539sub pmode($) {
 540        my ($mode) = @_;
 541        my $m = 0;
 542        my $mm = 0;
 543        my $um = 0;
 544        for my $x(split(//,$mode)) {
 545                if ($x eq ",") {
 546                        $m |= $mm&$um;
 547                        $mm = 0;
 548                        $um = 0;
 549                } elsif ($x eq "u") { $um |= 0700;
 550                } elsif ($x eq "g") { $um |= 0070;
 551                } elsif ($x eq "o") { $um |= 0007;
 552                } elsif ($x eq "r") { $mm |= 0444;
 553                } elsif ($x eq "w") { $mm |= 0222;
 554                } elsif ($x eq "x") { $mm |= 0111;
 555                } elsif ($x eq "=") { # do nothing
 556                } else { die "Unknown mode: $mode\n";
 557                }
 558        }
 559        $m |= $mm&$um;
 560        return $m;
 561}
 562
 563sub getwd() {
 564        my $pwd = `pwd`;
 565        chomp $pwd;
 566        return $pwd;
 567}
 568
 569sub is_sha1 {
 570        my $s = shift;
 571        return $s =~ /^[a-f0-9]{40}$/;
 572}
 573
 574sub get_headref ($) {
 575        my $name = shift;
 576        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 577        return undef unless $? == 0;
 578        chomp $r;
 579        return $r;
 580}
 581
 582-d $git_tree
 583        or mkdir($git_tree,0777)
 584        or die "Could not create $git_tree: $!";
 585chdir($git_tree);
 586
 587my $last_branch = "";
 588my $orig_branch = "";
 589my %branch_date;
 590my $tip_at_start = undef;
 591
 592my $git_dir = $ENV{"GIT_DIR"} || ".git";
 593$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 594$ENV{"GIT_DIR"} = $git_dir;
 595my $orig_git_index;
 596$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 597
 598my %index; # holds filenames of one index per branch
 599
 600unless (-d $git_dir) {
 601        system("git-init");
 602        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 603        system("git-read-tree");
 604        die "Cannot init an empty tree: $?\n" if $?;
 605
 606        $last_branch = $opt_o;
 607        $orig_branch = "";
 608} else {
 609        open(F, "git-symbolic-ref HEAD |") or
 610                die "Cannot run git-symbolic-ref: $!\n";
 611        chomp ($last_branch = <F>);
 612        $last_branch = basename($last_branch);
 613        close(F);
 614        unless ($last_branch) {
 615                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 616                $last_branch = "master";
 617        }
 618        $orig_branch = $last_branch;
 619        $tip_at_start = `git-rev-parse --verify HEAD`;
 620
 621        # Get the last import timestamps
 622        my $fmt = '($ref, $author) = (%(refname), %(author));';
 623        open(H, "git-for-each-ref --perl --format='$fmt' $remote |") or
 624                die "Cannot run git-for-each-ref: $!\n";
 625        while (defined(my $entry = <H>)) {
 626                my ($ref, $author);
 627                eval($entry) || die "cannot eval refs list: $@";
 628                my ($head) = ($ref =~ m|^$remote/(.*)|);
 629                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 630                $branch_date{$head} = $1;
 631        }
 632        close(H);
 633        if (!exists $branch_date{$opt_o}) {
 634                die "Branch '$opt_o' does not exist.\n".
 635                       "Either use the correct '-o branch' option,\n".
 636                       "or import to a new repository.\n";
 637        }
 638}
 639
 640-d $git_dir
 641        or die "Could not create git subdir ($git_dir).\n";
 642
 643# now we read (and possibly save) author-info as well
 644-f "$git_dir/cvs-authors" and
 645  read_author_info("$git_dir/cvs-authors");
 646if ($opt_A) {
 647        read_author_info($opt_A);
 648        write_author_info("$git_dir/cvs-authors");
 649}
 650
 651
 652#
 653# run cvsps into a file unless we are getting
 654# it passed as a file via $opt_P
 655#
 656my $cvspsfile;
 657unless ($opt_P) {
 658        print "Running cvsps...\n" if $opt_v;
 659        my $pid = open(CVSPS,"-|");
 660        my $cvspsfh;
 661        die "Cannot fork: $!\n" unless defined $pid;
 662        unless ($pid) {
 663                my @opt;
 664                @opt = split(/,/,$opt_p) if defined $opt_p;
 665                unshift @opt, '-z', $opt_z if defined $opt_z;
 666                unshift @opt, '-q'         unless defined $opt_v;
 667                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 668                        push @opt, '--cvs-direct';
 669                }
 670                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 671                die "Could not start cvsps: $!\n";
 672        }
 673        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 674                                          DIR => File::Spec->tmpdir());
 675        while (<CVSPS>) {
 676            print $cvspsfh $_;
 677        }
 678        close CVSPS;
 679        $? == 0 or die "git-cvsimport: fatal: cvsps reported error\n";
 680        close $cvspsfh;
 681} else {
 682        $cvspsfile = $opt_P;
 683}
 684
 685open(CVS, "<$cvspsfile") or die $!;
 686
 687## cvsps output:
 688#---------------------
 689#PatchSet 314
 690#Date: 1999/09/18 13:03:59
 691#Author: wkoch
 692#Branch: STABLE-BRANCH-1-0
 693#Ancestor branch: HEAD
 694#Tag: (none)
 695#Log:
 696#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 697#Members:
 698#       README:1.57->1.57.2.1
 699#       VERSION:1.96->1.96.2.1
 700#
 701#---------------------
 702
 703my $state = 0;
 704
 705sub update_index (\@\@) {
 706        my $old = shift;
 707        my $new = shift;
 708        open(my $fh, '|-', qw(git-update-index -z --index-info))
 709                or die "unable to open git-update-index: $!";
 710        print $fh
 711                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 712                        @$old),
 713                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 714                        @$new)
 715                or die "unable to write to git-update-index: $!";
 716        close $fh
 717                or die "unable to write to git-update-index: $!";
 718        $? and die "git-update-index reported error: $?";
 719}
 720
 721sub write_tree () {
 722        open(my $fh, '-|', qw(git-write-tree))
 723                or die "unable to open git-write-tree: $!";
 724        chomp(my $tree = <$fh>);
 725        is_sha1($tree)
 726                or die "Cannot get tree id ($tree): $!";
 727        close($fh)
 728                or die "Error running git-write-tree: $?\n";
 729        print "Tree ID $tree\n" if $opt_v;
 730        return $tree;
 731}
 732
 733my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 734my (@old,@new,@skipped,%ignorebranch);
 735
 736# commits that cvsps cannot place anywhere...
 737$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 738
 739sub commit {
 740        if ($branch eq $opt_o && !$index{branch} &&
 741                !get_headref("$remote/$branch")) {
 742            # looks like an initial commit
 743            # use the index primed by git-init
 744            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 745            $index{$branch} = "$git_dir/index";
 746        } else {
 747            # use an index per branch to speed up
 748            # imports of projects with many branches
 749            unless ($index{$branch}) {
 750                $index{$branch} = tmpnam();
 751                $ENV{GIT_INDEX_FILE} = $index{$branch};
 752                if ($ancestor) {
 753                    system("git-read-tree", "$remote/$ancestor");
 754                } else {
 755                    system("git-read-tree", "$remote/$branch");
 756                }
 757                die "read-tree failed: $?\n" if $?;
 758            }
 759        }
 760        $ENV{GIT_INDEX_FILE} = $index{$branch};
 761
 762        update_index(@old, @new);
 763        @old = @new = ();
 764        my $tree = write_tree();
 765        my $parent = get_headref("$remote/$last_branch");
 766        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 767
 768        my @commit_args;
 769        push @commit_args, ("-p", $parent) if $parent;
 770
 771        # loose detection of merges
 772        # based on the commit msg
 773        foreach my $rx (@mergerx) {
 774                next unless $logmsg =~ $rx && $1;
 775                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 776                if (my $sha1 = get_headref("$remote/$mparent")) {
 777                        push @commit_args, '-p', "$remote/$mparent";
 778                        print "Merge parent branch: $mparent\n" if $opt_v;
 779                }
 780        }
 781
 782        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 783        $ENV{GIT_AUTHOR_NAME} = $author_name;
 784        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 785        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 786        $ENV{GIT_COMMITTER_NAME} = $author_name;
 787        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 788        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 789        my $pid = open2(my $commit_read, my $commit_write,
 790                'git-commit-tree', $tree, @commit_args);
 791
 792        # compatibility with git2cvs
 793        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 794        $logmsg =~ s/[\s\n]+\z//;
 795
 796        if (@skipped) {
 797            $logmsg .= "\n\n\nSKIPPED:\n\t";
 798            $logmsg .= join("\n\t", @skipped) . "\n";
 799            @skipped = ();
 800        }
 801
 802        print($commit_write "$logmsg\n") && close($commit_write)
 803                or die "Error writing to git-commit-tree: $!\n";
 804
 805        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 806        chomp(my $cid = <$commit_read>);
 807        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 808        print "Commit ID $cid\n" if $opt_v;
 809        close($commit_read);
 810
 811        waitpid($pid,0);
 812        die "Error running git-commit-tree: $?\n" if $?;
 813
 814        system('git-update-ref', "$remote/$branch", $cid) == 0
 815                or die "Cannot write branch $branch for update: $!\n";
 816
 817        if ($tag) {
 818                my ($xtag) = $tag;
 819                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 820                $xtag =~ tr/_/\./ if ( $opt_u );
 821                $xtag =~ s/[\/]/$opt_s/g;
 822                $xtag =~ s/\[//g;
 823
 824                system('git-tag', '-f', $xtag, $cid) == 0
 825                        or die "Cannot create tag $xtag: $!\n";
 826
 827                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 828        }
 829};
 830
 831my $commitcount = 1;
 832while (<CVS>) {
 833        chomp;
 834        if ($state == 0 and /^-+$/) {
 835                $state = 1;
 836        } elsif ($state == 0) {
 837                $state = 1;
 838                redo;
 839        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 840                $patchset = 0+$_;
 841                $state=2;
 842        } elsif ($state == 2 and s/^Date:\s+//) {
 843                $date = pdate($_);
 844                unless ($date) {
 845                        print STDERR "Could not parse date: $_\n";
 846                        $state=0;
 847                        next;
 848                }
 849                $state=3;
 850        } elsif ($state == 3 and s/^Author:\s+//) {
 851                s/\s+$//;
 852                if (/^(.*?)\s+<(.*)>/) {
 853                    ($author_name, $author_email) = ($1, $2);
 854                } elsif ($conv_author_name{$_}) {
 855                        $author_name = $conv_author_name{$_};
 856                        $author_email = $conv_author_email{$_};
 857                } else {
 858                    $author_name = $author_email = $_;
 859                }
 860                $state = 4;
 861        } elsif ($state == 4 and s/^Branch:\s+//) {
 862                s/\s+$//;
 863                tr/_/\./ if ( $opt_u );
 864                s/[\/]/$opt_s/g;
 865                $branch = $_;
 866                $state = 5;
 867        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 868                s/\s+$//;
 869                $ancestor = $_;
 870                $ancestor = $opt_o if $ancestor eq "HEAD";
 871                $state = 6;
 872        } elsif ($state == 5) {
 873                $ancestor = undef;
 874                $state = 6;
 875                redo;
 876        } elsif ($state == 6 and s/^Tag:\s+//) {
 877                s/\s+$//;
 878                if ($_ eq "(none)") {
 879                        $tag = undef;
 880                } else {
 881                        $tag = $_;
 882                }
 883                $state = 7;
 884        } elsif ($state == 7 and /^Log:/) {
 885                $logmsg = "";
 886                $state = 8;
 887        } elsif ($state == 8 and /^Members:/) {
 888                $branch = $opt_o if $branch eq "HEAD";
 889                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 890                        # skip
 891                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 892                        $state = 11;
 893                        next;
 894                }
 895                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 896                        # skip if the commit is too recent
 897                        # given that the cvsps default fuzz is 300s, we give ourselves another
 898                        # 300s just in case -- this also prevents skipping commits
 899                        # due to server clock drift
 900                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 901                        $state = 11;
 902                        next;
 903                }
 904                if (exists $ignorebranch{$branch}) {
 905                        print STDERR "Skipping $branch\n";
 906                        $state = 11;
 907                        next;
 908                }
 909                if ($ancestor) {
 910                        if ($ancestor eq $branch) {
 911                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 912                                $ancestor = $opt_o;
 913                        }
 914                        if (defined get_headref("$remote/$branch")) {
 915                                print STDERR "Branch $branch already exists!\n";
 916                                $state=11;
 917                                next;
 918                        }
 919                        my $id = get_headref("$remote/$ancestor");
 920                        if (!$id) {
 921                                print STDERR "Branch $ancestor does not exist!\n";
 922                                $ignorebranch{$branch} = 1;
 923                                $state=11;
 924                                next;
 925                        }
 926
 927                        system(qw(git update-ref -m cvsimport),
 928                                "$remote/$branch", $id);
 929                        if($? != 0) {
 930                                print STDERR "Could not create branch $branch\n";
 931                                $ignorebranch{$branch} = 1;
 932                                $state=11;
 933                                next;
 934                        }
 935                }
 936                $last_branch = $branch if $branch ne $last_branch;
 937                $state = 9;
 938        } elsif ($state == 8) {
 939                $logmsg .= "$_\n";
 940        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 941#       VERSION:1.96->1.96.2.1
 942                my $init = ($2 eq "INITIAL");
 943                my $fn = $1;
 944                my $rev = $3;
 945                $fn =~ s#^/+##;
 946                if ($opt_S && $fn =~ m/$opt_S/) {
 947                    print "SKIPPING $fn v $rev\n";
 948                    push(@skipped, $fn);
 949                    next;
 950                }
 951                print "Fetching $fn   v $rev\n" if $opt_v;
 952                my ($tmpname, $size) = $cvs->file($fn,$rev);
 953                if ($size == -1) {
 954                        push(@old,$fn);
 955                        print "Drop $fn\n" if $opt_v;
 956                } else {
 957                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 958                        my $pid = open(my $F, '-|');
 959                        die $! unless defined $pid;
 960                        if (!$pid) {
 961                            exec("git-hash-object", "-w", $tmpname)
 962                                or die "Cannot create object: $!\n";
 963                        }
 964                        my $sha = <$F>;
 965                        chomp $sha;
 966                        close $F;
 967                        my $mode = pmode($cvs->{'mode'});
 968                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
 969                }
 970                unlink($tmpname);
 971        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
 972                my $fn = $1;
 973                $fn =~ s#^/+##;
 974                push(@old,$fn);
 975                print "Delete $fn\n" if $opt_v;
 976        } elsif ($state == 9 and /^\s*$/) {
 977                $state = 10;
 978        } elsif (($state == 9 or $state == 10) and /^-+$/) {
 979                $commitcount++;
 980                if ($opt_L && $commitcount > $opt_L) {
 981                        last;
 982                }
 983                commit();
 984                if (($commitcount & 1023) == 0) {
 985                        system("git repack -a -d");
 986                }
 987                $state = 1;
 988        } elsif ($state == 11 and /^-+$/) {
 989                $state = 1;
 990        } elsif (/^-+$/) { # end of unknown-line processing
 991                $state = 1;
 992        } elsif ($state != 11) { # ignore stuff when skipping
 993                print STDERR "* UNKNOWN LINE * $_\n";
 994        }
 995}
 996commit() if $branch and $state != 11;
 997
 998unless ($opt_P) {
 999        unlink($cvspsfile);
1000}
1001
1002# The heuristic of repacking every 1024 commits can leave a
1003# lot of unpacked data.  If there is more than 1MB worth of
1004# not-packed objects, repack once more.
1005my $line = `git-count-objects`;
1006if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1007  my ($n_objects, $kb) = ($1, $2);
1008  1024 < $kb
1009    and system("git repack -a -d");
1010}
1011
1012foreach my $git_index (values %index) {
1013    if ($git_index ne "$git_dir/index") {
1014        unlink($git_index);
1015    }
1016}
1017
1018if (defined $orig_git_index) {
1019        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1020} else {
1021        delete $ENV{GIT_INDEX_FILE};
1022}
1023
1024# Now switch back to the branch we were in before all of this happened
1025if ($orig_branch) {
1026        print "DONE.\n" if $opt_v;
1027        if ($opt_i) {
1028                exit 0;
1029        }
1030        my $tip_at_end = `git-rev-parse --verify HEAD`;
1031        if ($tip_at_start ne $tip_at_end) {
1032                for ($tip_at_start, $tip_at_end) { chomp; }
1033                print "Fetched into the current branch.\n" if $opt_v;
1034                system(qw(git-read-tree -u -m),
1035                       $tip_at_start, $tip_at_end);
1036                die "Fast-forward update failed: $?\n" if $?;
1037        }
1038        else {
1039                system(qw(git-merge cvsimport HEAD), "$remote/$opt_o");
1040                die "Could not merge $opt_o into the current branch.\n" if $?;
1041        }
1042} else {
1043        $orig_branch = "master";
1044        print "DONE; creating $orig_branch branch\n" if $opt_v;
1045        system("git-update-ref", "refs/heads/master", "$remote/$opt_o")
1046                unless defined get_headref('refs/heads/master');
1047        system("git-symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1048                if ($opt_r && $opt_o ne 'HEAD');
1049        system('git-update-ref', 'HEAD', "$orig_branch");
1050        unless ($opt_i) {
1051                system('git checkout -f');
1052                die "checkout failed: $?\n" if $?;
1053        }
1054}