git-cvsimport.perlon commit magic pathspec: add tentative ":/path/from/top/level" pathspec support (8a42c98)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # However, we also read from CVSROOT/users format
  63                # to ease migration.
  64                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  65                        my $mapped;
  66                        ($user, $mapped) = ($1, $3);
  67                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  68                                $conv_author_name{$user} = $1;
  69                                $conv_author_email{$user} = $2;
  70                        }
  71                        elsif ($mapped =~ /^<?(.*)>?$/) {
  72                                $conv_author_name{$user} = $user;
  73                                $conv_author_email{$user} = $1;
  74                        }
  75                }
  76                # NEEDSWORK: Maybe warn on unrecognized lines?
  77        }
  78        close ($f);
  79}
  80
  81sub write_author_info($) {
  82        my ($file) = @_;
  83        open my $f, '>', $file or
  84          die("Failed to open $file for writing: $!");
  85
  86        foreach (keys %conv_author_name) {
  87                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  88        }
  89        close ($f);
  90}
  91
  92# convert getopts specs for use by git config
  93my %longmap = (
  94        'A:' => 'authors-file',
  95        'M:' => 'merge-regex',
  96        'P:' => undef,
  97        'R' => 'track-revisions',
  98        'S:' => 'ignore-paths',
  99);
 100
 101sub read_repo_config {
 102        # Split the string between characters, unless there is a ':'
 103        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 104        my @opts = split(/ *(?!:)/, shift);
 105        foreach my $o (@opts) {
 106                my $key = $o;
 107                $key =~ s/://g;
 108                my $arg = 'git config';
 109                $arg .= ' --bool' if ($o !~ /:$/);
 110                my $ckey = $key;
 111
 112                if (exists $longmap{$o}) {
 113                        # An uppercase option like -R cannot be
 114                        # expressed in the configuration, as the
 115                        # variable names are downcased.
 116                        $ckey = $longmap{$o};
 117                        next if (! defined $ckey);
 118                        $ckey =~ s/-//g;
 119                }
 120                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 121                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 122                        no strict 'refs';
 123                        my $opt_name = "opt_" . $key;
 124                        if (!$$opt_name) {
 125                                $$opt_name = $tmp;
 126                        }
 127                }
 128        }
 129}
 130
 131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 132read_repo_config($opts);
 133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 134
 135# turn the Getopt::Std specification in a Getopt::Long one,
 136# with support for multiple -M options
 137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 138    or usage();
 139usage if $opt_h;
 140
 141if (@ARGV == 0) {
 142                chomp(my $module = `git config --get cvsimport.module`);
 143                push(@ARGV, $module) if $? == 0;
 144}
 145@ARGV <= 1 or usage("You can't specify more than one CVS module");
 146
 147if ($opt_d) {
 148        $ENV{"CVSROOT"} = $opt_d;
 149} elsif (-f 'CVS/Root') {
 150        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 151        $opt_d = <$f>;
 152        chomp $opt_d;
 153        close $f;
 154        $ENV{"CVSROOT"} = $opt_d;
 155} elsif ($ENV{"CVSROOT"}) {
 156        $opt_d = $ENV{"CVSROOT"};
 157} else {
 158        usage("CVSROOT needs to be set");
 159}
 160$opt_s ||= "-";
 161$opt_a ||= 0;
 162
 163my $git_tree = $opt_C;
 164$git_tree ||= ".";
 165
 166my $remote;
 167if (defined $opt_r) {
 168        $remote = 'refs/remotes/' . $opt_r;
 169        $opt_o ||= "master";
 170} else {
 171        $opt_o ||= "origin";
 172        $remote = 'refs/heads';
 173}
 174
 175my $cvs_tree;
 176if ($#ARGV == 0) {
 177        $cvs_tree = $ARGV[0];
 178} elsif (-f 'CVS/Repository') {
 179        open my $f, '<', 'CVS/Repository' or
 180            die 'Failed to open CVS/Repository';
 181        $cvs_tree = <$f>;
 182        chomp $cvs_tree;
 183        close $f;
 184} else {
 185        usage("CVS module has to be specified");
 186}
 187
 188our @mergerx = ();
 189if ($opt_m) {
 190        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 191}
 192if (@opt_M) {
 193        push (@mergerx, map { qr/$_/ } @opt_M);
 194}
 195
 196# Remember UTC of our starting time
 197# we'll want to avoid importing commits
 198# that are too recent
 199our $starttime = time();
 200
 201select(STDERR); $|=1; select(STDOUT);
 202
 203
 204package CVSconn;
 205# Basic CVS dialog.
 206# We're only interested in connecting and downloading, so ...
 207
 208use File::Spec;
 209use File::Temp qw(tempfile);
 210use POSIX qw(strftime dup2);
 211
 212sub new {
 213        my ($what,$repo,$subdir) = @_;
 214        $what=ref($what) if ref($what);
 215
 216        my $self = {};
 217        $self->{'buffer'} = "";
 218        bless($self,$what);
 219
 220        $repo =~ s#/+$##;
 221        $self->{'fullrep'} = $repo;
 222        $self->conn();
 223
 224        $self->{'subdir'} = $subdir;
 225        $self->{'lines'} = undef;
 226
 227        return $self;
 228}
 229
 230sub conn {
 231        my $self = shift;
 232        my $repo = $self->{'fullrep'};
 233        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 234                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 235
 236                my ($proxyhost,$proxyport);
 237                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 238                        $proxyhost = $1;
 239                        # Default proxyport, if not specified, is 8080.
 240                        $proxyport = 8080;
 241                        if ($ENV{"CVS_PROXY_PORT"}) {
 242                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 243                        }
 244                        if ($param =~ m/proxyport=([^;]+)/) {
 245                                $proxyport = $1;
 246                        }
 247                }
 248                $repo ||= '/';
 249
 250                # if username is not explicit in CVSROOT, then use current user, as cvs would
 251                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 252                my $rr2 = "-";
 253                unless ($port) {
 254                        $rr2 = ":pserver:$user\@$serv:$repo";
 255                        $port=2401;
 256                }
 257                my $rr = ":pserver:$user\@$serv:$port$repo";
 258
 259                if ($pass) {
 260                        $pass = $self->_scramble($pass);
 261                } else {
 262                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 263                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 264                                while (<H>) {
 265                                        chomp;
 266                                        s/^\/\d+\s+//;
 267                                        my ($w,$p) = split(/\s/,$_,2);
 268                                        if ($w eq $rr or $w eq $rr2) {
 269                                                $pass = $p;
 270                                                last;
 271                                        }
 272                                }
 273                        };
 274                        $pass = "A" unless $pass;
 275                }
 276
 277                my ($s, $rep);
 278                if ($proxyhost) {
 279
 280                        # Use a HTTP Proxy. Only works for HTTP proxies that
 281                        # don't require user authentication
 282                        #
 283                        # See: http://www.ietf.org/rfc/rfc2817.txt
 284
 285                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 286                        die "Socket to $proxyhost: $!\n" unless defined $s;
 287                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 288                                or die "Write to $proxyhost: $!\n";
 289                        $s->flush();
 290
 291                        $rep = <$s>;
 292
 293                        # The answer should look like 'HTTP/1.x 2yy ....'
 294                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 295                                die "Proxy connect: $rep\n";
 296                        }
 297                        # Skip up to the empty line of the proxy server output
 298                        # including the response headers.
 299                        while ($rep = <$s>) {
 300                                last if (!defined $rep ||
 301                                         $rep eq "\n" ||
 302                                         $rep eq "\r\n");
 303                        }
 304                } else {
 305                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 306                        die "Socket to $serv: $!\n" unless defined $s;
 307                }
 308
 309                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 310                        or die "Write to $serv: $!\n";
 311                $s->flush();
 312
 313                $rep = <$s>;
 314
 315                if ($rep ne "I LOVE YOU\n") {
 316                        $rep="<unknown>" unless $rep;
 317                        die "AuthReply: $rep\n";
 318                }
 319                $self->{'socketo'} = $s;
 320                $self->{'socketi'} = $s;
 321        } else { # local or ext: Fork off our own cvs server.
 322                my $pr = IO::Pipe->new();
 323                my $pw = IO::Pipe->new();
 324                my $pid = fork();
 325                die "Fork: $!\n" unless defined $pid;
 326                my $cvs = 'cvs';
 327                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 328                my $rsh = 'rsh';
 329                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 330
 331                my @cvs = ($cvs, 'server');
 332                my ($local, $user, $host);
 333                $local = $repo =~ s/:local://;
 334                if (!$local) {
 335                    $repo =~ s/:ext://;
 336                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 337                    ($user, $host) = ($1, $2);
 338                }
 339                if (!$local) {
 340                    if ($user) {
 341                        unshift @cvs, $rsh, '-l', $user, $host;
 342                    } else {
 343                        unshift @cvs, $rsh, $host;
 344                    }
 345                }
 346
 347                unless ($pid) {
 348                        $pr->writer();
 349                        $pw->reader();
 350                        dup2($pw->fileno(),0);
 351                        dup2($pr->fileno(),1);
 352                        $pr->close();
 353                        $pw->close();
 354                        exec(@cvs);
 355                }
 356                $pw->writer();
 357                $pr->reader();
 358                $self->{'socketo'} = $pw;
 359                $self->{'socketi'} = $pr;
 360        }
 361        $self->{'socketo'}->write("Root $repo\n");
 362
 363        # Trial and error says that this probably is the minimum set
 364        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 365
 366        $self->{'socketo'}->write("valid-requests\n");
 367        $self->{'socketo'}->flush();
 368
 369        my $rep=$self->readline();
 370        die "Failed to read from server" unless defined $rep;
 371        chomp($rep);
 372        if ($rep !~ s/^Valid-requests\s*//) {
 373                $rep="<unknown>" unless $rep;
 374                die "Expected Valid-requests from server, but got: $rep\n";
 375        }
 376        chomp(my $res=$self->readline());
 377        die "validReply: $res\n" if $res ne "ok";
 378
 379        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 380        $self->{'repo'} = $repo;
 381}
 382
 383sub readline {
 384        my ($self) = @_;
 385        return $self->{'socketi'}->getline();
 386}
 387
 388sub _file {
 389        # Request a file with a given revision.
 390        # Trial and error says this is a good way to do it. :-/
 391        my ($self,$fn,$rev) = @_;
 392        $self->{'socketo'}->write("Argument -N\n") or return undef;
 393        $self->{'socketo'}->write("Argument -P\n") or return undef;
 394        # -kk: Linus' version doesn't use it - defaults to off
 395        if ($opt_k) {
 396            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 397        }
 398        $self->{'socketo'}->write("Argument -r\n") or return undef;
 399        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 400        $self->{'socketo'}->write("Argument --\n") or return undef;
 401        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 402        $self->{'socketo'}->write("Directory .\n") or return undef;
 403        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 404        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 405        $self->{'socketo'}->write("co\n") or return undef;
 406        $self->{'socketo'}->flush() or return undef;
 407        $self->{'lines'} = 0;
 408        return 1;
 409}
 410sub _line {
 411        # Read a line from the server.
 412        # ... except that 'line' may be an entire file. ;-)
 413        my ($self, $fh) = @_;
 414        die "Not in lines" unless defined $self->{'lines'};
 415
 416        my $line;
 417        my $res=0;
 418        while (defined($line = $self->readline())) {
 419                # M U gnupg-cvs-rep/AUTHORS
 420                # Updated gnupg-cvs-rep/
 421                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 422                # /AUTHORS/1.1///T1.1
 423                # u=rw,g=rw,o=rw
 424                # 0
 425                # ok
 426
 427                if ($line =~ s/^(?:Created|Updated) //) {
 428                        $line = $self->readline(); # path
 429                        $line = $self->readline(); # Entries line
 430                        my $mode = $self->readline(); chomp $mode;
 431                        $self->{'mode'} = $mode;
 432                        defined (my $cnt = $self->readline())
 433                                or die "EOF from server after 'Changed'\n";
 434                        chomp $cnt;
 435                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 436                        $line="";
 437                        $res = $self->_fetchfile($fh, $cnt);
 438                } elsif ($line =~ s/^ //) {
 439                        print $fh $line;
 440                        $res += length($line);
 441                } elsif ($line =~ /^M\b/) {
 442                        # output, do nothing
 443                } elsif ($line =~ /^Mbinary\b/) {
 444                        my $cnt;
 445                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 446                        chomp $cnt;
 447                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 448                        $line="";
 449                        $res += $self->_fetchfile($fh, $cnt);
 450                } else {
 451                        chomp $line;
 452                        if ($line eq "ok") {
 453                                # print STDERR "S: ok (".length($res).")\n";
 454                                return $res;
 455                        } elsif ($line =~ s/^E //) {
 456                                # print STDERR "S: $line\n";
 457                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 458                                $line = $self->readline(); # filename
 459                                $line = $self->readline(); # OK
 460                                chomp $line;
 461                                die "Unknown: $line" if $line ne "ok";
 462                                return -1;
 463                        } else {
 464                                die "Unknown: $line\n";
 465                        }
 466                }
 467        }
 468        return undef;
 469}
 470sub file {
 471        my ($self,$fn,$rev) = @_;
 472        my $res;
 473
 474        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 475                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 476
 477        $self->_file($fn,$rev) and $res = $self->_line($fh);
 478
 479        if (!defined $res) {
 480            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 481            truncate $fh, 0;
 482            $self->conn();
 483            $self->_file($fn,$rev) or die "No file command send";
 484            $res = $self->_line($fh);
 485            die "Retry failed" unless defined $res;
 486        }
 487        close ($fh);
 488
 489        return ($name, $res);
 490}
 491sub _fetchfile {
 492        my ($self, $fh, $cnt) = @_;
 493        my $res = 0;
 494        my $bufsize = 1024 * 1024;
 495        while ($cnt) {
 496            if ($bufsize > $cnt) {
 497                $bufsize = $cnt;
 498            }
 499            my $buf;
 500            my $num = $self->{'socketi'}->read($buf,$bufsize);
 501            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 502            print $fh $buf;
 503            $res += $num;
 504            $cnt -= $num;
 505        }
 506        return $res;
 507}
 508
 509sub _scramble {
 510        my ($self, $pass) = @_;
 511        my $scrambled = "A";
 512
 513        return $scrambled unless $pass;
 514
 515        my $pass_len = length($pass);
 516        my @pass_arr = split("", $pass);
 517        my $i;
 518
 519        # from cvs/src/scramble.c
 520        my @shifts = (
 521                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 522                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 523                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 524                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 525                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 526                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 527                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 528                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 529                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 530                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 531                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 532                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 533                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 534                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 535                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 536                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 537        );
 538
 539        for ($i = 0; $i < $pass_len; $i++) {
 540                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 541        }
 542
 543        return $scrambled;
 544}
 545
 546package main;
 547
 548my $cvs = CVSconn->new($opt_d, $cvs_tree);
 549
 550
 551sub pdate($) {
 552        my ($d) = @_;
 553        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 554                or die "Unparseable date: $d\n";
 555        my $y=$1; $y-=1900 if $y>1900;
 556        return timegm($6||0,$5,$4,$3,$2-1,$y);
 557}
 558
 559sub pmode($) {
 560        my ($mode) = @_;
 561        my $m = 0;
 562        my $mm = 0;
 563        my $um = 0;
 564        for my $x(split(//,$mode)) {
 565                if ($x eq ",") {
 566                        $m |= $mm&$um;
 567                        $mm = 0;
 568                        $um = 0;
 569                } elsif ($x eq "u") { $um |= 0700;
 570                } elsif ($x eq "g") { $um |= 0070;
 571                } elsif ($x eq "o") { $um |= 0007;
 572                } elsif ($x eq "r") { $mm |= 0444;
 573                } elsif ($x eq "w") { $mm |= 0222;
 574                } elsif ($x eq "x") { $mm |= 0111;
 575                } elsif ($x eq "=") { # do nothing
 576                } else { die "Unknown mode: $mode\n";
 577                }
 578        }
 579        $m |= $mm&$um;
 580        return $m;
 581}
 582
 583sub getwd() {
 584        my $pwd = `pwd`;
 585        chomp $pwd;
 586        return $pwd;
 587}
 588
 589sub is_sha1 {
 590        my $s = shift;
 591        return $s =~ /^[a-f0-9]{40}$/;
 592}
 593
 594sub get_headref ($) {
 595        my $name = shift;
 596        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 597        return undef unless $? == 0;
 598        chomp $r;
 599        return $r;
 600}
 601
 602my $user_filename_prepend = '';
 603sub munge_user_filename {
 604        my $name = shift;
 605        return File::Spec->file_name_is_absolute($name) ?
 606                $name :
 607                $user_filename_prepend . $name;
 608}
 609
 610-d $git_tree
 611        or mkdir($git_tree,0777)
 612        or die "Could not create $git_tree: $!";
 613if ($git_tree ne '.') {
 614        $user_filename_prepend = getwd() . '/';
 615        chdir($git_tree);
 616}
 617
 618my $last_branch = "";
 619my $orig_branch = "";
 620my %branch_date;
 621my $tip_at_start = undef;
 622
 623my $git_dir = $ENV{"GIT_DIR"} || ".git";
 624$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 625$ENV{"GIT_DIR"} = $git_dir;
 626my $orig_git_index;
 627$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 628
 629my %index; # holds filenames of one index per branch
 630
 631unless (-d $git_dir) {
 632        system(qw(git init));
 633        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 634        system(qw(git read-tree --empty));
 635        die "Cannot init an empty tree: $?\n" if $?;
 636
 637        $last_branch = $opt_o;
 638        $orig_branch = "";
 639} else {
 640        open(F, "-|", qw(git symbolic-ref HEAD)) or
 641                die "Cannot run git symbolic-ref: $!\n";
 642        chomp ($last_branch = <F>);
 643        $last_branch = basename($last_branch);
 644        close(F);
 645        unless ($last_branch) {
 646                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 647                $last_branch = "master";
 648        }
 649        $orig_branch = $last_branch;
 650        $tip_at_start = `git rev-parse --verify HEAD`;
 651
 652        # Get the last import timestamps
 653        my $fmt = '($ref, $author) = (%(refname), %(author));';
 654        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 655        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 656        while (defined(my $entry = <H>)) {
 657                my ($ref, $author);
 658                eval($entry) || die "cannot eval refs list: $@";
 659                my ($head) = ($ref =~ m|^$remote/(.*)|);
 660                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 661                $branch_date{$head} = $1;
 662        }
 663        close(H);
 664        if (!exists $branch_date{$opt_o}) {
 665                die "Branch '$opt_o' does not exist.\n".
 666                       "Either use the correct '-o branch' option,\n".
 667                       "or import to a new repository.\n";
 668        }
 669}
 670
 671-d $git_dir
 672        or die "Could not create git subdir ($git_dir).\n";
 673
 674# now we read (and possibly save) author-info as well
 675-f "$git_dir/cvs-authors" and
 676  read_author_info("$git_dir/cvs-authors");
 677if ($opt_A) {
 678        read_author_info(munge_user_filename($opt_A));
 679        write_author_info("$git_dir/cvs-authors");
 680}
 681
 682# open .git/cvs-revisions, if requested
 683open my $revision_map, '>>', "$git_dir/cvs-revisions"
 684    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 685        if defined $opt_R;
 686
 687
 688#
 689# run cvsps into a file unless we are getting
 690# it passed as a file via $opt_P
 691#
 692my $cvspsfile;
 693unless ($opt_P) {
 694        print "Running cvsps...\n" if $opt_v;
 695        my $pid = open(CVSPS,"-|");
 696        my $cvspsfh;
 697        die "Cannot fork: $!\n" unless defined $pid;
 698        unless ($pid) {
 699                my @opt;
 700                @opt = split(/,/,$opt_p) if defined $opt_p;
 701                unshift @opt, '-z', $opt_z if defined $opt_z;
 702                unshift @opt, '-q'         unless defined $opt_v;
 703                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 704                        push @opt, '--cvs-direct';
 705                }
 706                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 707                die "Could not start cvsps: $!\n";
 708        }
 709        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 710                                          DIR => File::Spec->tmpdir());
 711        while (<CVSPS>) {
 712            print $cvspsfh $_;
 713        }
 714        close CVSPS;
 715        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 716        close $cvspsfh;
 717} else {
 718        $cvspsfile = munge_user_filename($opt_P);
 719}
 720
 721open(CVS, "<$cvspsfile") or die $!;
 722
 723## cvsps output:
 724#---------------------
 725#PatchSet 314
 726#Date: 1999/09/18 13:03:59
 727#Author: wkoch
 728#Branch: STABLE-BRANCH-1-0
 729#Ancestor branch: HEAD
 730#Tag: (none)
 731#Log:
 732#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 733#Members:
 734#       README:1.57->1.57.2.1
 735#       VERSION:1.96->1.96.2.1
 736#
 737#---------------------
 738
 739my $state = 0;
 740
 741sub update_index (\@\@) {
 742        my $old = shift;
 743        my $new = shift;
 744        open(my $fh, '|-', qw(git update-index -z --index-info))
 745                or die "unable to open git update-index: $!";
 746        print $fh
 747                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 748                        @$old),
 749                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 750                        @$new)
 751                or die "unable to write to git update-index: $!";
 752        close $fh
 753                or die "unable to write to git update-index: $!";
 754        $? and die "git update-index reported error: $?";
 755}
 756
 757sub write_tree () {
 758        open(my $fh, '-|', qw(git write-tree))
 759                or die "unable to open git write-tree: $!";
 760        chomp(my $tree = <$fh>);
 761        is_sha1($tree)
 762                or die "Cannot get tree id ($tree): $!";
 763        close($fh)
 764                or die "Error running git write-tree: $?\n";
 765        print "Tree ID $tree\n" if $opt_v;
 766        return $tree;
 767}
 768
 769my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 770my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 771
 772# commits that cvsps cannot place anywhere...
 773$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 774
 775sub commit {
 776        if ($branch eq $opt_o && !$index{branch} &&
 777                !get_headref("$remote/$branch")) {
 778            # looks like an initial commit
 779            # use the index primed by git init
 780            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 781            $index{$branch} = "$git_dir/index";
 782        } else {
 783            # use an index per branch to speed up
 784            # imports of projects with many branches
 785            unless ($index{$branch}) {
 786                $index{$branch} = tmpnam();
 787                $ENV{GIT_INDEX_FILE} = $index{$branch};
 788                if ($ancestor) {
 789                    system("git", "read-tree", "$remote/$ancestor");
 790                } else {
 791                    system("git", "read-tree", "$remote/$branch");
 792                }
 793                die "read-tree failed: $?\n" if $?;
 794            }
 795        }
 796        $ENV{GIT_INDEX_FILE} = $index{$branch};
 797
 798        update_index(@old, @new);
 799        @old = @new = ();
 800        my $tree = write_tree();
 801        my $parent = get_headref("$remote/$last_branch");
 802        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 803
 804        my @commit_args;
 805        push @commit_args, ("-p", $parent) if $parent;
 806
 807        # loose detection of merges
 808        # based on the commit msg
 809        foreach my $rx (@mergerx) {
 810                next unless $logmsg =~ $rx && $1;
 811                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 812                if (my $sha1 = get_headref("$remote/$mparent")) {
 813                        push @commit_args, '-p', "$remote/$mparent";
 814                        print "Merge parent branch: $mparent\n" if $opt_v;
 815                }
 816        }
 817
 818        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 819        $ENV{GIT_AUTHOR_NAME} = $author_name;
 820        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 821        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 822        $ENV{GIT_COMMITTER_NAME} = $author_name;
 823        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 824        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 825        my $pid = open2(my $commit_read, my $commit_write,
 826                'git', 'commit-tree', $tree, @commit_args);
 827
 828        # compatibility with git2cvs
 829        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 830        $logmsg =~ s/[\s\n]+\z//;
 831
 832        if (@skipped) {
 833            $logmsg .= "\n\n\nSKIPPED:\n\t";
 834            $logmsg .= join("\n\t", @skipped) . "\n";
 835            @skipped = ();
 836        }
 837
 838        print($commit_write "$logmsg\n") && close($commit_write)
 839                or die "Error writing to git commit-tree: $!\n";
 840
 841        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 842        chomp(my $cid = <$commit_read>);
 843        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 844        print "Commit ID $cid\n" if $opt_v;
 845        close($commit_read);
 846
 847        waitpid($pid,0);
 848        die "Error running git commit-tree: $?\n" if $?;
 849
 850        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 851                or die "Cannot write branch $branch for update: $!\n";
 852
 853        if ($revision_map) {
 854                print $revision_map "@$_ $cid\n" for @commit_revisions;
 855        }
 856        @commit_revisions = ();
 857
 858        if ($tag) {
 859                my ($xtag) = $tag;
 860                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 861                $xtag =~ tr/_/\./ if ( $opt_u );
 862                $xtag =~ s/[\/]/$opt_s/g;
 863                $xtag =~ s/\[//g;
 864
 865                system('git' , 'tag', '-f', $xtag, $cid) == 0
 866                        or die "Cannot create tag $xtag: $!\n";
 867
 868                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 869        }
 870};
 871
 872my $commitcount = 1;
 873while (<CVS>) {
 874        chomp;
 875        if ($state == 0 and /^-+$/) {
 876                $state = 1;
 877        } elsif ($state == 0) {
 878                $state = 1;
 879                redo;
 880        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 881                $patchset = 0+$_;
 882                $state=2;
 883        } elsif ($state == 2 and s/^Date:\s+//) {
 884                $date = pdate($_);
 885                unless ($date) {
 886                        print STDERR "Could not parse date: $_\n";
 887                        $state=0;
 888                        next;
 889                }
 890                $state=3;
 891        } elsif ($state == 3 and s/^Author:\s+//) {
 892                s/\s+$//;
 893                if (/^(.*?)\s+<(.*)>/) {
 894                    ($author_name, $author_email) = ($1, $2);
 895                } elsif ($conv_author_name{$_}) {
 896                        $author_name = $conv_author_name{$_};
 897                        $author_email = $conv_author_email{$_};
 898                } else {
 899                    $author_name = $author_email = $_;
 900                }
 901                $state = 4;
 902        } elsif ($state == 4 and s/^Branch:\s+//) {
 903                s/\s+$//;
 904                tr/_/\./ if ( $opt_u );
 905                s/[\/]/$opt_s/g;
 906                $branch = $_;
 907                $state = 5;
 908        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 909                s/\s+$//;
 910                $ancestor = $_;
 911                $ancestor = $opt_o if $ancestor eq "HEAD";
 912                $state = 6;
 913        } elsif ($state == 5) {
 914                $ancestor = undef;
 915                $state = 6;
 916                redo;
 917        } elsif ($state == 6 and s/^Tag:\s+//) {
 918                s/\s+$//;
 919                if ($_ eq "(none)") {
 920                        $tag = undef;
 921                } else {
 922                        $tag = $_;
 923                }
 924                $state = 7;
 925        } elsif ($state == 7 and /^Log:/) {
 926                $logmsg = "";
 927                $state = 8;
 928        } elsif ($state == 8 and /^Members:/) {
 929                $branch = $opt_o if $branch eq "HEAD";
 930                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 931                        # skip
 932                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 933                        $state = 11;
 934                        next;
 935                }
 936                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 937                        # skip if the commit is too recent
 938                        # given that the cvsps default fuzz is 300s, we give ourselves another
 939                        # 300s just in case -- this also prevents skipping commits
 940                        # due to server clock drift
 941                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 942                        $state = 11;
 943                        next;
 944                }
 945                if (exists $ignorebranch{$branch}) {
 946                        print STDERR "Skipping $branch\n";
 947                        $state = 11;
 948                        next;
 949                }
 950                if ($ancestor) {
 951                        if ($ancestor eq $branch) {
 952                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 953                                $ancestor = $opt_o;
 954                        }
 955                        if (defined get_headref("$remote/$branch")) {
 956                                print STDERR "Branch $branch already exists!\n";
 957                                $state=11;
 958                                next;
 959                        }
 960                        my $id = get_headref("$remote/$ancestor");
 961                        if (!$id) {
 962                                print STDERR "Branch $ancestor does not exist!\n";
 963                                $ignorebranch{$branch} = 1;
 964                                $state=11;
 965                                next;
 966                        }
 967
 968                        system(qw(git update-ref -m cvsimport),
 969                                "$remote/$branch", $id);
 970                        if($? != 0) {
 971                                print STDERR "Could not create branch $branch\n";
 972                                $ignorebranch{$branch} = 1;
 973                                $state=11;
 974                                next;
 975                        }
 976                }
 977                $last_branch = $branch if $branch ne $last_branch;
 978                $state = 9;
 979        } elsif ($state == 8) {
 980                $logmsg .= "$_\n";
 981        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 982#       VERSION:1.96->1.96.2.1
 983                my $init = ($2 eq "INITIAL");
 984                my $fn = $1;
 985                my $rev = $3;
 986                $fn =~ s#^/+##;
 987                if ($opt_S && $fn =~ m/$opt_S/) {
 988                    print "SKIPPING $fn v $rev\n";
 989                    push(@skipped, $fn);
 990                    next;
 991                }
 992                push @commit_revisions, [$fn, $rev];
 993                print "Fetching $fn   v $rev\n" if $opt_v;
 994                my ($tmpname, $size) = $cvs->file($fn,$rev);
 995                if ($size == -1) {
 996                        push(@old,$fn);
 997                        print "Drop $fn\n" if $opt_v;
 998                } else {
 999                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1000                        my $pid = open(my $F, '-|');
1001                        die $! unless defined $pid;
1002                        if (!$pid) {
1003                            exec("git", "hash-object", "-w", $tmpname)
1004                                or die "Cannot create object: $!\n";
1005                        }
1006                        my $sha = <$F>;
1007                        chomp $sha;
1008                        close $F;
1009                        my $mode = pmode($cvs->{'mode'});
1010                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1011                }
1012                unlink($tmpname);
1013        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1014                my $fn = $1;
1015                my $rev = $2;
1016                $fn =~ s#^/+##;
1017                push @commit_revisions, [$fn, $rev];
1018                push(@old,$fn);
1019                print "Delete $fn\n" if $opt_v;
1020        } elsif ($state == 9 and /^\s*$/) {
1021                $state = 10;
1022        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1023                $commitcount++;
1024                if ($opt_L && $commitcount > $opt_L) {
1025                        last;
1026                }
1027                commit();
1028                if (($commitcount & 1023) == 0) {
1029                        system(qw(git repack -a -d));
1030                }
1031                $state = 1;
1032        } elsif ($state == 11 and /^-+$/) {
1033                $state = 1;
1034        } elsif (/^-+$/) { # end of unknown-line processing
1035                $state = 1;
1036        } elsif ($state != 11) { # ignore stuff when skipping
1037                print STDERR "* UNKNOWN LINE * $_\n";
1038        }
1039}
1040commit() if $branch and $state != 11;
1041
1042unless ($opt_P) {
1043        unlink($cvspsfile);
1044}
1045
1046# The heuristic of repacking every 1024 commits can leave a
1047# lot of unpacked data.  If there is more than 1MB worth of
1048# not-packed objects, repack once more.
1049my $line = `git count-objects`;
1050if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1051  my ($n_objects, $kb) = ($1, $2);
1052  1024 < $kb
1053    and system(qw(git repack -a -d));
1054}
1055
1056foreach my $git_index (values %index) {
1057    if ($git_index ne "$git_dir/index") {
1058        unlink($git_index);
1059    }
1060}
1061
1062if (defined $orig_git_index) {
1063        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1064} else {
1065        delete $ENV{GIT_INDEX_FILE};
1066}
1067
1068# Now switch back to the branch we were in before all of this happened
1069if ($orig_branch) {
1070        print "DONE.\n" if $opt_v;
1071        if ($opt_i) {
1072                exit 0;
1073        }
1074        my $tip_at_end = `git rev-parse --verify HEAD`;
1075        if ($tip_at_start ne $tip_at_end) {
1076                for ($tip_at_start, $tip_at_end) { chomp; }
1077                print "Fetched into the current branch.\n" if $opt_v;
1078                system(qw(git read-tree -u -m),
1079                       $tip_at_start, $tip_at_end);
1080                die "Fast-forward update failed: $?\n" if $?;
1081        }
1082        else {
1083                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1084                die "Could not merge $opt_o into the current branch.\n" if $?;
1085        }
1086} else {
1087        $orig_branch = "master";
1088        print "DONE; creating $orig_branch branch\n" if $opt_v;
1089        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1090                unless defined get_headref('refs/heads/master');
1091        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1092                if ($opt_r && $opt_o ne 'HEAD');
1093        system('git', 'update-ref', 'HEAD', "$orig_branch");
1094        unless ($opt_i) {
1095                system(qw(git checkout -f));
1096                die "checkout failed: $?\n" if $?;
1097        }
1098}