git-cvsimport.perlon commit get_remote_url(): use the same data source as ls-remote to get remote urls (45781ad)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # However, we also read from CVSROOT/users format
  63                # to ease migration.
  64                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  65                        my $mapped;
  66                        ($user, $mapped) = ($1, $3);
  67                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  68                                $conv_author_name{$user} = $1;
  69                                $conv_author_email{$user} = $2;
  70                        }
  71                        elsif ($mapped =~ /^<?(.*)>?$/) {
  72                                $conv_author_name{$user} = $user;
  73                                $conv_author_email{$user} = $1;
  74                        }
  75                }
  76                # NEEDSWORK: Maybe warn on unrecognized lines?
  77        }
  78        close ($f);
  79}
  80
  81sub write_author_info($) {
  82        my ($file) = @_;
  83        open my $f, '>', $file or
  84          die("Failed to open $file for writing: $!");
  85
  86        foreach (keys %conv_author_name) {
  87                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  88        }
  89        close ($f);
  90}
  91
  92# convert getopts specs for use by git config
  93my %longmap = (
  94        'A:' => 'authors-file',
  95        'M:' => 'merge-regex',
  96        'P:' => undef,
  97        'R' => 'track-revisions',
  98        'S:' => 'ignore-paths',
  99);
 100
 101sub read_repo_config {
 102        # Split the string between characters, unless there is a ':'
 103        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 104        my @opts = split(/ *(?!:)/, shift);
 105        foreach my $o (@opts) {
 106                my $key = $o;
 107                $key =~ s/://g;
 108                my $arg = 'git config';
 109                $arg .= ' --bool' if ($o !~ /:$/);
 110                my $ckey = $key;
 111
 112                if (exists $longmap{$o}) {
 113                        # An uppercase option like -R cannot be
 114                        # expressed in the configuration, as the
 115                        # variable names are downcased.
 116                        $ckey = $longmap{$o};
 117                        next if (! defined $ckey);
 118                        $ckey =~ s/-//g;
 119                }
 120                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 121                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 122                        no strict 'refs';
 123                        my $opt_name = "opt_" . $key;
 124                        if (!$$opt_name) {
 125                                $$opt_name = $tmp;
 126                        }
 127                }
 128        }
 129}
 130
 131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 132read_repo_config($opts);
 133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 134
 135# turn the Getopt::Std specification in a Getopt::Long one,
 136# with support for multiple -M options
 137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 138    or usage();
 139usage if $opt_h;
 140
 141if (@ARGV == 0) {
 142                chomp(my $module = `git config --get cvsimport.module`);
 143                push(@ARGV, $module) if $? == 0;
 144}
 145@ARGV <= 1 or usage("You can't specify more than one CVS module");
 146
 147if ($opt_d) {
 148        $ENV{"CVSROOT"} = $opt_d;
 149} elsif (-f 'CVS/Root') {
 150        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 151        $opt_d = <$f>;
 152        chomp $opt_d;
 153        close $f;
 154        $ENV{"CVSROOT"} = $opt_d;
 155} elsif ($ENV{"CVSROOT"}) {
 156        $opt_d = $ENV{"CVSROOT"};
 157} else {
 158        usage("CVSROOT needs to be set");
 159}
 160$opt_s ||= "-";
 161$opt_a ||= 0;
 162
 163my $git_tree = $opt_C;
 164$git_tree ||= ".";
 165
 166my $remote;
 167if (defined $opt_r) {
 168        $remote = 'refs/remotes/' . $opt_r;
 169        $opt_o ||= "master";
 170} else {
 171        $opt_o ||= "origin";
 172        $remote = 'refs/heads';
 173}
 174
 175my $cvs_tree;
 176if ($#ARGV == 0) {
 177        $cvs_tree = $ARGV[0];
 178} elsif (-f 'CVS/Repository') {
 179        open my $f, '<', 'CVS/Repository' or
 180            die 'Failed to open CVS/Repository';
 181        $cvs_tree = <$f>;
 182        chomp $cvs_tree;
 183        close $f;
 184} else {
 185        usage("CVS module has to be specified");
 186}
 187
 188our @mergerx = ();
 189if ($opt_m) {
 190        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 191}
 192if (@opt_M) {
 193        push (@mergerx, map { qr/$_/ } @opt_M);
 194}
 195
 196# Remember UTC of our starting time
 197# we'll want to avoid importing commits
 198# that are too recent
 199our $starttime = time();
 200
 201select(STDERR); $|=1; select(STDOUT);
 202
 203
 204package CVSconn;
 205# Basic CVS dialog.
 206# We're only interested in connecting and downloading, so ...
 207
 208use File::Spec;
 209use File::Temp qw(tempfile);
 210use POSIX qw(strftime dup2);
 211
 212sub new {
 213        my ($what,$repo,$subdir) = @_;
 214        $what=ref($what) if ref($what);
 215
 216        my $self = {};
 217        $self->{'buffer'} = "";
 218        bless($self,$what);
 219
 220        $repo =~ s#/+$##;
 221        $self->{'fullrep'} = $repo;
 222        $self->conn();
 223
 224        $self->{'subdir'} = $subdir;
 225        $self->{'lines'} = undef;
 226
 227        return $self;
 228}
 229
 230sub conn {
 231        my $self = shift;
 232        my $repo = $self->{'fullrep'};
 233        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 234                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 235
 236                my ($proxyhost,$proxyport);
 237                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 238                        $proxyhost = $1;
 239                        # Default proxyport, if not specified, is 8080.
 240                        $proxyport = 8080;
 241                        if ($ENV{"CVS_PROXY_PORT"}) {
 242                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 243                        }
 244                        if ($param =~ m/proxyport=([^;]+)/) {
 245                                $proxyport = $1;
 246                        }
 247                }
 248                $repo ||= '/';
 249
 250                # if username is not explicit in CVSROOT, then use current user, as cvs would
 251                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 252                my $rr2 = "-";
 253                unless ($port) {
 254                        $rr2 = ":pserver:$user\@$serv:$repo";
 255                        $port=2401;
 256                }
 257                my $rr = ":pserver:$user\@$serv:$port$repo";
 258
 259                if ($pass) {
 260                        $pass = $self->_scramble($pass);
 261                } else {
 262                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 263                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 264                                while (<H>) {
 265                                        chomp;
 266                                        s/^\/\d+\s+//;
 267                                        my ($w,$p) = split(/\s/,$_,2);
 268                                        if ($w eq $rr or $w eq $rr2) {
 269                                                $pass = $p;
 270                                                last;
 271                                        }
 272                                }
 273                        };
 274                        $pass = "A" unless $pass;
 275                }
 276
 277                my ($s, $rep);
 278                if ($proxyhost) {
 279
 280                        # Use a HTTP Proxy. Only works for HTTP proxies that
 281                        # don't require user authentication
 282                        #
 283                        # See: http://www.ietf.org/rfc/rfc2817.txt
 284
 285                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 286                        die "Socket to $proxyhost: $!\n" unless defined $s;
 287                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 288                                or die "Write to $proxyhost: $!\n";
 289                        $s->flush();
 290
 291                        $rep = <$s>;
 292
 293                        # The answer should look like 'HTTP/1.x 2yy ....'
 294                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 295                                die "Proxy connect: $rep\n";
 296                        }
 297                        # Skip up to the empty line of the proxy server output
 298                        # including the response headers.
 299                        while ($rep = <$s>) {
 300                                last if (!defined $rep ||
 301                                         $rep eq "\n" ||
 302                                         $rep eq "\r\n");
 303                        }
 304                } else {
 305                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 306                        die "Socket to $serv: $!\n" unless defined $s;
 307                }
 308
 309                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 310                        or die "Write to $serv: $!\n";
 311                $s->flush();
 312
 313                $rep = <$s>;
 314
 315                if ($rep ne "I LOVE YOU\n") {
 316                        $rep="<unknown>" unless $rep;
 317                        die "AuthReply: $rep\n";
 318                }
 319                $self->{'socketo'} = $s;
 320                $self->{'socketi'} = $s;
 321        } else { # local or ext: Fork off our own cvs server.
 322                my $pr = IO::Pipe->new();
 323                my $pw = IO::Pipe->new();
 324                my $pid = fork();
 325                die "Fork: $!\n" unless defined $pid;
 326                my $cvs = 'cvs';
 327                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 328                my $rsh = 'rsh';
 329                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 330
 331                my @cvs = ($cvs, 'server');
 332                my ($local, $user, $host);
 333                $local = $repo =~ s/:local://;
 334                if (!$local) {
 335                    $repo =~ s/:ext://;
 336                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 337                    ($user, $host) = ($1, $2);
 338                }
 339                if (!$local) {
 340                    if ($user) {
 341                        unshift @cvs, $rsh, '-l', $user, $host;
 342                    } else {
 343                        unshift @cvs, $rsh, $host;
 344                    }
 345                }
 346
 347                unless ($pid) {
 348                        $pr->writer();
 349                        $pw->reader();
 350                        dup2($pw->fileno(),0);
 351                        dup2($pr->fileno(),1);
 352                        $pr->close();
 353                        $pw->close();
 354                        exec(@cvs);
 355                }
 356                $pw->writer();
 357                $pr->reader();
 358                $self->{'socketo'} = $pw;
 359                $self->{'socketi'} = $pr;
 360        }
 361        $self->{'socketo'}->write("Root $repo\n");
 362
 363        # Trial and error says that this probably is the minimum set
 364        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 365
 366        $self->{'socketo'}->write("valid-requests\n");
 367        $self->{'socketo'}->flush();
 368
 369        chomp(my $rep=$self->readline());
 370        if ($rep !~ s/^Valid-requests\s*//) {
 371                $rep="<unknown>" unless $rep;
 372                die "Expected Valid-requests from server, but got: $rep\n";
 373        }
 374        chomp(my $res=$self->readline());
 375        die "validReply: $res\n" if $res ne "ok";
 376
 377        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 378        $self->{'repo'} = $repo;
 379}
 380
 381sub readline {
 382        my ($self) = @_;
 383        return $self->{'socketi'}->getline();
 384}
 385
 386sub _file {
 387        # Request a file with a given revision.
 388        # Trial and error says this is a good way to do it. :-/
 389        my ($self,$fn,$rev) = @_;
 390        $self->{'socketo'}->write("Argument -N\n") or return undef;
 391        $self->{'socketo'}->write("Argument -P\n") or return undef;
 392        # -kk: Linus' version doesn't use it - defaults to off
 393        if ($opt_k) {
 394            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 395        }
 396        $self->{'socketo'}->write("Argument -r\n") or return undef;
 397        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 398        $self->{'socketo'}->write("Argument --\n") or return undef;
 399        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 400        $self->{'socketo'}->write("Directory .\n") or return undef;
 401        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 402        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 403        $self->{'socketo'}->write("co\n") or return undef;
 404        $self->{'socketo'}->flush() or return undef;
 405        $self->{'lines'} = 0;
 406        return 1;
 407}
 408sub _line {
 409        # Read a line from the server.
 410        # ... except that 'line' may be an entire file. ;-)
 411        my ($self, $fh) = @_;
 412        die "Not in lines" unless defined $self->{'lines'};
 413
 414        my $line;
 415        my $res=0;
 416        while (defined($line = $self->readline())) {
 417                # M U gnupg-cvs-rep/AUTHORS
 418                # Updated gnupg-cvs-rep/
 419                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 420                # /AUTHORS/1.1///T1.1
 421                # u=rw,g=rw,o=rw
 422                # 0
 423                # ok
 424
 425                if ($line =~ s/^(?:Created|Updated) //) {
 426                        $line = $self->readline(); # path
 427                        $line = $self->readline(); # Entries line
 428                        my $mode = $self->readline(); chomp $mode;
 429                        $self->{'mode'} = $mode;
 430                        defined (my $cnt = $self->readline())
 431                                or die "EOF from server after 'Changed'\n";
 432                        chomp $cnt;
 433                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 434                        $line="";
 435                        $res = $self->_fetchfile($fh, $cnt);
 436                } elsif ($line =~ s/^ //) {
 437                        print $fh $line;
 438                        $res += length($line);
 439                } elsif ($line =~ /^M\b/) {
 440                        # output, do nothing
 441                } elsif ($line =~ /^Mbinary\b/) {
 442                        my $cnt;
 443                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 444                        chomp $cnt;
 445                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 446                        $line="";
 447                        $res += $self->_fetchfile($fh, $cnt);
 448                } else {
 449                        chomp $line;
 450                        if ($line eq "ok") {
 451                                # print STDERR "S: ok (".length($res).")\n";
 452                                return $res;
 453                        } elsif ($line =~ s/^E //) {
 454                                # print STDERR "S: $line\n";
 455                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 456                                $line = $self->readline(); # filename
 457                                $line = $self->readline(); # OK
 458                                chomp $line;
 459                                die "Unknown: $line" if $line ne "ok";
 460                                return -1;
 461                        } else {
 462                                die "Unknown: $line\n";
 463                        }
 464                }
 465        }
 466        return undef;
 467}
 468sub file {
 469        my ($self,$fn,$rev) = @_;
 470        my $res;
 471
 472        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 473                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 474
 475        $self->_file($fn,$rev) and $res = $self->_line($fh);
 476
 477        if (!defined $res) {
 478            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 479            truncate $fh, 0;
 480            $self->conn();
 481            $self->_file($fn,$rev) or die "No file command send";
 482            $res = $self->_line($fh);
 483            die "Retry failed" unless defined $res;
 484        }
 485        close ($fh);
 486
 487        return ($name, $res);
 488}
 489sub _fetchfile {
 490        my ($self, $fh, $cnt) = @_;
 491        my $res = 0;
 492        my $bufsize = 1024 * 1024;
 493        while ($cnt) {
 494            if ($bufsize > $cnt) {
 495                $bufsize = $cnt;
 496            }
 497            my $buf;
 498            my $num = $self->{'socketi'}->read($buf,$bufsize);
 499            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 500            print $fh $buf;
 501            $res += $num;
 502            $cnt -= $num;
 503        }
 504        return $res;
 505}
 506
 507sub _scramble {
 508        my ($self, $pass) = @_;
 509        my $scrambled = "A";
 510
 511        return $scrambled unless $pass;
 512
 513        my $pass_len = length($pass);
 514        my @pass_arr = split("", $pass);
 515        my $i;
 516
 517        # from cvs/src/scramble.c
 518        my @shifts = (
 519                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 520                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 521                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 522                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 523                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 524                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 525                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 526                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 527                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 528                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 529                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 530                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 531                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 532                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 533                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 534                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 535        );
 536
 537        for ($i = 0; $i < $pass_len; $i++) {
 538                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 539        }
 540
 541        return $scrambled;
 542}
 543
 544package main;
 545
 546my $cvs = CVSconn->new($opt_d, $cvs_tree);
 547
 548
 549sub pdate($) {
 550        my ($d) = @_;
 551        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 552                or die "Unparseable date: $d\n";
 553        my $y=$1; $y-=1900 if $y>1900;
 554        return timegm($6||0,$5,$4,$3,$2-1,$y);
 555}
 556
 557sub pmode($) {
 558        my ($mode) = @_;
 559        my $m = 0;
 560        my $mm = 0;
 561        my $um = 0;
 562        for my $x(split(//,$mode)) {
 563                if ($x eq ",") {
 564                        $m |= $mm&$um;
 565                        $mm = 0;
 566                        $um = 0;
 567                } elsif ($x eq "u") { $um |= 0700;
 568                } elsif ($x eq "g") { $um |= 0070;
 569                } elsif ($x eq "o") { $um |= 0007;
 570                } elsif ($x eq "r") { $mm |= 0444;
 571                } elsif ($x eq "w") { $mm |= 0222;
 572                } elsif ($x eq "x") { $mm |= 0111;
 573                } elsif ($x eq "=") { # do nothing
 574                } else { die "Unknown mode: $mode\n";
 575                }
 576        }
 577        $m |= $mm&$um;
 578        return $m;
 579}
 580
 581sub getwd() {
 582        my $pwd = `pwd`;
 583        chomp $pwd;
 584        return $pwd;
 585}
 586
 587sub is_sha1 {
 588        my $s = shift;
 589        return $s =~ /^[a-f0-9]{40}$/;
 590}
 591
 592sub get_headref ($) {
 593        my $name = shift;
 594        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 595        return undef unless $? == 0;
 596        chomp $r;
 597        return $r;
 598}
 599
 600my $user_filename_prepend = '';
 601sub munge_user_filename {
 602        my $name = shift;
 603        return File::Spec->file_name_is_absolute($name) ?
 604                $name :
 605                $user_filename_prepend . $name;
 606}
 607
 608-d $git_tree
 609        or mkdir($git_tree,0777)
 610        or die "Could not create $git_tree: $!";
 611if ($git_tree ne '.') {
 612        $user_filename_prepend = getwd() . '/';
 613        chdir($git_tree);
 614}
 615
 616my $last_branch = "";
 617my $orig_branch = "";
 618my %branch_date;
 619my $tip_at_start = undef;
 620
 621my $git_dir = $ENV{"GIT_DIR"} || ".git";
 622$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 623$ENV{"GIT_DIR"} = $git_dir;
 624my $orig_git_index;
 625$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 626
 627my %index; # holds filenames of one index per branch
 628
 629unless (-d $git_dir) {
 630        system(qw(git init));
 631        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 632        system(qw(git read-tree --empty));
 633        die "Cannot init an empty tree: $?\n" if $?;
 634
 635        $last_branch = $opt_o;
 636        $orig_branch = "";
 637} else {
 638        open(F, "-|", qw(git symbolic-ref HEAD)) or
 639                die "Cannot run git symbolic-ref: $!\n";
 640        chomp ($last_branch = <F>);
 641        $last_branch = basename($last_branch);
 642        close(F);
 643        unless ($last_branch) {
 644                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 645                $last_branch = "master";
 646        }
 647        $orig_branch = $last_branch;
 648        $tip_at_start = `git rev-parse --verify HEAD`;
 649
 650        # Get the last import timestamps
 651        my $fmt = '($ref, $author) = (%(refname), %(author));';
 652        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 653        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 654        while (defined(my $entry = <H>)) {
 655                my ($ref, $author);
 656                eval($entry) || die "cannot eval refs list: $@";
 657                my ($head) = ($ref =~ m|^$remote/(.*)|);
 658                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 659                $branch_date{$head} = $1;
 660        }
 661        close(H);
 662        if (!exists $branch_date{$opt_o}) {
 663                die "Branch '$opt_o' does not exist.\n".
 664                       "Either use the correct '-o branch' option,\n".
 665                       "or import to a new repository.\n";
 666        }
 667}
 668
 669-d $git_dir
 670        or die "Could not create git subdir ($git_dir).\n";
 671
 672# now we read (and possibly save) author-info as well
 673-f "$git_dir/cvs-authors" and
 674  read_author_info("$git_dir/cvs-authors");
 675if ($opt_A) {
 676        read_author_info(munge_user_filename($opt_A));
 677        write_author_info("$git_dir/cvs-authors");
 678}
 679
 680# open .git/cvs-revisions, if requested
 681open my $revision_map, '>>', "$git_dir/cvs-revisions"
 682    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 683        if defined $opt_R;
 684
 685
 686#
 687# run cvsps into a file unless we are getting
 688# it passed as a file via $opt_P
 689#
 690my $cvspsfile;
 691unless ($opt_P) {
 692        print "Running cvsps...\n" if $opt_v;
 693        my $pid = open(CVSPS,"-|");
 694        my $cvspsfh;
 695        die "Cannot fork: $!\n" unless defined $pid;
 696        unless ($pid) {
 697                my @opt;
 698                @opt = split(/,/,$opt_p) if defined $opt_p;
 699                unshift @opt, '-z', $opt_z if defined $opt_z;
 700                unshift @opt, '-q'         unless defined $opt_v;
 701                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 702                        push @opt, '--cvs-direct';
 703                }
 704                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 705                die "Could not start cvsps: $!\n";
 706        }
 707        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 708                                          DIR => File::Spec->tmpdir());
 709        while (<CVSPS>) {
 710            print $cvspsfh $_;
 711        }
 712        close CVSPS;
 713        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 714        close $cvspsfh;
 715} else {
 716        $cvspsfile = munge_user_filename($opt_P);
 717}
 718
 719open(CVS, "<$cvspsfile") or die $!;
 720
 721## cvsps output:
 722#---------------------
 723#PatchSet 314
 724#Date: 1999/09/18 13:03:59
 725#Author: wkoch
 726#Branch: STABLE-BRANCH-1-0
 727#Ancestor branch: HEAD
 728#Tag: (none)
 729#Log:
 730#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 731#Members:
 732#       README:1.57->1.57.2.1
 733#       VERSION:1.96->1.96.2.1
 734#
 735#---------------------
 736
 737my $state = 0;
 738
 739sub update_index (\@\@) {
 740        my $old = shift;
 741        my $new = shift;
 742        open(my $fh, '|-', qw(git update-index -z --index-info))
 743                or die "unable to open git update-index: $!";
 744        print $fh
 745                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 746                        @$old),
 747                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 748                        @$new)
 749                or die "unable to write to git update-index: $!";
 750        close $fh
 751                or die "unable to write to git update-index: $!";
 752        $? and die "git update-index reported error: $?";
 753}
 754
 755sub write_tree () {
 756        open(my $fh, '-|', qw(git write-tree))
 757                or die "unable to open git write-tree: $!";
 758        chomp(my $tree = <$fh>);
 759        is_sha1($tree)
 760                or die "Cannot get tree id ($tree): $!";
 761        close($fh)
 762                or die "Error running git write-tree: $?\n";
 763        print "Tree ID $tree\n" if $opt_v;
 764        return $tree;
 765}
 766
 767my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 768my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 769
 770# commits that cvsps cannot place anywhere...
 771$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 772
 773sub commit {
 774        if ($branch eq $opt_o && !$index{branch} &&
 775                !get_headref("$remote/$branch")) {
 776            # looks like an initial commit
 777            # use the index primed by git init
 778            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 779            $index{$branch} = "$git_dir/index";
 780        } else {
 781            # use an index per branch to speed up
 782            # imports of projects with many branches
 783            unless ($index{$branch}) {
 784                $index{$branch} = tmpnam();
 785                $ENV{GIT_INDEX_FILE} = $index{$branch};
 786                if ($ancestor) {
 787                    system("git", "read-tree", "$remote/$ancestor");
 788                } else {
 789                    system("git", "read-tree", "$remote/$branch");
 790                }
 791                die "read-tree failed: $?\n" if $?;
 792            }
 793        }
 794        $ENV{GIT_INDEX_FILE} = $index{$branch};
 795
 796        update_index(@old, @new);
 797        @old = @new = ();
 798        my $tree = write_tree();
 799        my $parent = get_headref("$remote/$last_branch");
 800        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 801
 802        my @commit_args;
 803        push @commit_args, ("-p", $parent) if $parent;
 804
 805        # loose detection of merges
 806        # based on the commit msg
 807        foreach my $rx (@mergerx) {
 808                next unless $logmsg =~ $rx && $1;
 809                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 810                if (my $sha1 = get_headref("$remote/$mparent")) {
 811                        push @commit_args, '-p', "$remote/$mparent";
 812                        print "Merge parent branch: $mparent\n" if $opt_v;
 813                }
 814        }
 815
 816        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 817        $ENV{GIT_AUTHOR_NAME} = $author_name;
 818        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 819        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 820        $ENV{GIT_COMMITTER_NAME} = $author_name;
 821        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 822        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 823        my $pid = open2(my $commit_read, my $commit_write,
 824                'git', 'commit-tree', $tree, @commit_args);
 825
 826        # compatibility with git2cvs
 827        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 828        $logmsg =~ s/[\s\n]+\z//;
 829
 830        if (@skipped) {
 831            $logmsg .= "\n\n\nSKIPPED:\n\t";
 832            $logmsg .= join("\n\t", @skipped) . "\n";
 833            @skipped = ();
 834        }
 835
 836        print($commit_write "$logmsg\n") && close($commit_write)
 837                or die "Error writing to git commit-tree: $!\n";
 838
 839        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 840        chomp(my $cid = <$commit_read>);
 841        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 842        print "Commit ID $cid\n" if $opt_v;
 843        close($commit_read);
 844
 845        waitpid($pid,0);
 846        die "Error running git commit-tree: $?\n" if $?;
 847
 848        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 849                or die "Cannot write branch $branch for update: $!\n";
 850
 851        if ($revision_map) {
 852                print $revision_map "@$_ $cid\n" for @commit_revisions;
 853        }
 854        @commit_revisions = ();
 855
 856        if ($tag) {
 857                my ($xtag) = $tag;
 858                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 859                $xtag =~ tr/_/\./ if ( $opt_u );
 860                $xtag =~ s/[\/]/$opt_s/g;
 861                $xtag =~ s/\[//g;
 862
 863                system('git' , 'tag', '-f', $xtag, $cid) == 0
 864                        or die "Cannot create tag $xtag: $!\n";
 865
 866                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 867        }
 868};
 869
 870my $commitcount = 1;
 871while (<CVS>) {
 872        chomp;
 873        if ($state == 0 and /^-+$/) {
 874                $state = 1;
 875        } elsif ($state == 0) {
 876                $state = 1;
 877                redo;
 878        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 879                $patchset = 0+$_;
 880                $state=2;
 881        } elsif ($state == 2 and s/^Date:\s+//) {
 882                $date = pdate($_);
 883                unless ($date) {
 884                        print STDERR "Could not parse date: $_\n";
 885                        $state=0;
 886                        next;
 887                }
 888                $state=3;
 889        } elsif ($state == 3 and s/^Author:\s+//) {
 890                s/\s+$//;
 891                if (/^(.*?)\s+<(.*)>/) {
 892                    ($author_name, $author_email) = ($1, $2);
 893                } elsif ($conv_author_name{$_}) {
 894                        $author_name = $conv_author_name{$_};
 895                        $author_email = $conv_author_email{$_};
 896                } else {
 897                    $author_name = $author_email = $_;
 898                }
 899                $state = 4;
 900        } elsif ($state == 4 and s/^Branch:\s+//) {
 901                s/\s+$//;
 902                tr/_/\./ if ( $opt_u );
 903                s/[\/]/$opt_s/g;
 904                $branch = $_;
 905                $state = 5;
 906        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 907                s/\s+$//;
 908                $ancestor = $_;
 909                $ancestor = $opt_o if $ancestor eq "HEAD";
 910                $state = 6;
 911        } elsif ($state == 5) {
 912                $ancestor = undef;
 913                $state = 6;
 914                redo;
 915        } elsif ($state == 6 and s/^Tag:\s+//) {
 916                s/\s+$//;
 917                if ($_ eq "(none)") {
 918                        $tag = undef;
 919                } else {
 920                        $tag = $_;
 921                }
 922                $state = 7;
 923        } elsif ($state == 7 and /^Log:/) {
 924                $logmsg = "";
 925                $state = 8;
 926        } elsif ($state == 8 and /^Members:/) {
 927                $branch = $opt_o if $branch eq "HEAD";
 928                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 929                        # skip
 930                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 931                        $state = 11;
 932                        next;
 933                }
 934                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 935                        # skip if the commit is too recent
 936                        # given that the cvsps default fuzz is 300s, we give ourselves another
 937                        # 300s just in case -- this also prevents skipping commits
 938                        # due to server clock drift
 939                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 940                        $state = 11;
 941                        next;
 942                }
 943                if (exists $ignorebranch{$branch}) {
 944                        print STDERR "Skipping $branch\n";
 945                        $state = 11;
 946                        next;
 947                }
 948                if ($ancestor) {
 949                        if ($ancestor eq $branch) {
 950                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 951                                $ancestor = $opt_o;
 952                        }
 953                        if (defined get_headref("$remote/$branch")) {
 954                                print STDERR "Branch $branch already exists!\n";
 955                                $state=11;
 956                                next;
 957                        }
 958                        my $id = get_headref("$remote/$ancestor");
 959                        if (!$id) {
 960                                print STDERR "Branch $ancestor does not exist!\n";
 961                                $ignorebranch{$branch} = 1;
 962                                $state=11;
 963                                next;
 964                        }
 965
 966                        system(qw(git update-ref -m cvsimport),
 967                                "$remote/$branch", $id);
 968                        if($? != 0) {
 969                                print STDERR "Could not create branch $branch\n";
 970                                $ignorebranch{$branch} = 1;
 971                                $state=11;
 972                                next;
 973                        }
 974                }
 975                $last_branch = $branch if $branch ne $last_branch;
 976                $state = 9;
 977        } elsif ($state == 8) {
 978                $logmsg .= "$_\n";
 979        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 980#       VERSION:1.96->1.96.2.1
 981                my $init = ($2 eq "INITIAL");
 982                my $fn = $1;
 983                my $rev = $3;
 984                $fn =~ s#^/+##;
 985                if ($opt_S && $fn =~ m/$opt_S/) {
 986                    print "SKIPPING $fn v $rev\n";
 987                    push(@skipped, $fn);
 988                    next;
 989                }
 990                push @commit_revisions, [$fn, $rev];
 991                print "Fetching $fn   v $rev\n" if $opt_v;
 992                my ($tmpname, $size) = $cvs->file($fn,$rev);
 993                if ($size == -1) {
 994                        push(@old,$fn);
 995                        print "Drop $fn\n" if $opt_v;
 996                } else {
 997                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 998                        my $pid = open(my $F, '-|');
 999                        die $! unless defined $pid;
1000                        if (!$pid) {
1001                            exec("git", "hash-object", "-w", $tmpname)
1002                                or die "Cannot create object: $!\n";
1003                        }
1004                        my $sha = <$F>;
1005                        chomp $sha;
1006                        close $F;
1007                        my $mode = pmode($cvs->{'mode'});
1008                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1009                }
1010                unlink($tmpname);
1011        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1012                my $fn = $1;
1013                my $rev = $2;
1014                $fn =~ s#^/+##;
1015                push @commit_revisions, [$fn, $rev];
1016                push(@old,$fn);
1017                print "Delete $fn\n" if $opt_v;
1018        } elsif ($state == 9 and /^\s*$/) {
1019                $state = 10;
1020        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1021                $commitcount++;
1022                if ($opt_L && $commitcount > $opt_L) {
1023                        last;
1024                }
1025                commit();
1026                if (($commitcount & 1023) == 0) {
1027                        system(qw(git repack -a -d));
1028                }
1029                $state = 1;
1030        } elsif ($state == 11 and /^-+$/) {
1031                $state = 1;
1032        } elsif (/^-+$/) { # end of unknown-line processing
1033                $state = 1;
1034        } elsif ($state != 11) { # ignore stuff when skipping
1035                print STDERR "* UNKNOWN LINE * $_\n";
1036        }
1037}
1038commit() if $branch and $state != 11;
1039
1040unless ($opt_P) {
1041        unlink($cvspsfile);
1042}
1043
1044# The heuristic of repacking every 1024 commits can leave a
1045# lot of unpacked data.  If there is more than 1MB worth of
1046# not-packed objects, repack once more.
1047my $line = `git count-objects`;
1048if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1049  my ($n_objects, $kb) = ($1, $2);
1050  1024 < $kb
1051    and system(qw(git repack -a -d));
1052}
1053
1054foreach my $git_index (values %index) {
1055    if ($git_index ne "$git_dir/index") {
1056        unlink($git_index);
1057    }
1058}
1059
1060if (defined $orig_git_index) {
1061        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1062} else {
1063        delete $ENV{GIT_INDEX_FILE};
1064}
1065
1066# Now switch back to the branch we were in before all of this happened
1067if ($orig_branch) {
1068        print "DONE.\n" if $opt_v;
1069        if ($opt_i) {
1070                exit 0;
1071        }
1072        my $tip_at_end = `git rev-parse --verify HEAD`;
1073        if ($tip_at_start ne $tip_at_end) {
1074                for ($tip_at_start, $tip_at_end) { chomp; }
1075                print "Fetched into the current branch.\n" if $opt_v;
1076                system(qw(git read-tree -u -m),
1077                       $tip_at_start, $tip_at_end);
1078                die "Fast-forward update failed: $?\n" if $?;
1079        }
1080        else {
1081                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1082                die "Could not merge $opt_o into the current branch.\n" if $?;
1083        }
1084} else {
1085        $orig_branch = "master";
1086        print "DONE; creating $orig_branch branch\n" if $opt_v;
1087        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1088                unless defined get_headref('refs/heads/master');
1089        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1090                if ($opt_r && $opt_o ne 'HEAD');
1091        system('git', 'update-ref', 'HEAD', "$orig_branch");
1092        unless ($opt_i) {
1093                system(qw(git checkout -f));
1094                die "checkout failed: $?\n" if $?;
1095        }
1096}