git-cvsimport.perlon commit SubmittingPatches: remove overlong checklist (7d5bf87)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # However, we also read from CVSROOT/users format
  63                # to ease migration.
  64                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  65                        my $mapped;
  66                        ($user, $mapped) = ($1, $3);
  67                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  68                                $conv_author_name{$user} = $1;
  69                                $conv_author_email{$user} = $2;
  70                        }
  71                        elsif ($mapped =~ /^<?(.*)>?$/) {
  72                                $conv_author_name{$user} = $user;
  73                                $conv_author_email{$user} = $1;
  74                        }
  75                }
  76                # NEEDSWORK: Maybe warn on unrecognized lines?
  77        }
  78        close ($f);
  79}
  80
  81sub write_author_info($) {
  82        my ($file) = @_;
  83        open my $f, '>', $file or
  84          die("Failed to open $file for writing: $!");
  85
  86        foreach (keys %conv_author_name) {
  87                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  88        }
  89        close ($f);
  90}
  91
  92# convert getopts specs for use by git config
  93my %longmap = (
  94        'A:' => 'authors-file',
  95        'M:' => 'merge-regex',
  96        'P:' => undef,
  97        'R' => 'track-revisions',
  98        'S:' => 'ignore-paths',
  99);
 100
 101sub read_repo_config {
 102        # Split the string between characters, unless there is a ':'
 103        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 104        my @opts = split(/ *(?!:)/, shift);
 105        foreach my $o (@opts) {
 106                my $key = $o;
 107                $key =~ s/://g;
 108                my $arg = 'git config';
 109                $arg .= ' --bool' if ($o !~ /:$/);
 110                my $ckey = $key;
 111
 112                if (exists $longmap{$o}) {
 113                        # An uppercase option like -R cannot be
 114                        # expressed in the configuration, as the
 115                        # variable names are downcased.
 116                        $ckey = $longmap{$o};
 117                        next if (! defined $ckey);
 118                        $ckey =~ s/-//g;
 119                }
 120                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 121                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 122                        no strict 'refs';
 123                        my $opt_name = "opt_" . $key;
 124                        if (!$$opt_name) {
 125                                $$opt_name = $tmp;
 126                        }
 127                }
 128        }
 129}
 130
 131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 132read_repo_config($opts);
 133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 134
 135# turn the Getopt::Std specification in a Getopt::Long one,
 136# with support for multiple -M options
 137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 138    or usage();
 139usage if $opt_h;
 140
 141if (@ARGV == 0) {
 142                chomp(my $module = `git config --get cvsimport.module`);
 143                push(@ARGV, $module) if $? == 0;
 144}
 145@ARGV <= 1 or usage("You can't specify more than one CVS module");
 146
 147if ($opt_d) {
 148        $ENV{"CVSROOT"} = $opt_d;
 149} elsif (-f 'CVS/Root') {
 150        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 151        $opt_d = <$f>;
 152        chomp $opt_d;
 153        close $f;
 154        $ENV{"CVSROOT"} = $opt_d;
 155} elsif ($ENV{"CVSROOT"}) {
 156        $opt_d = $ENV{"CVSROOT"};
 157} else {
 158        usage("CVSROOT needs to be set");
 159}
 160$opt_s ||= "-";
 161$opt_a ||= 0;
 162
 163my $git_tree = $opt_C;
 164$git_tree ||= ".";
 165
 166my $remote;
 167if (defined $opt_r) {
 168        $remote = 'refs/remotes/' . $opt_r;
 169        $opt_o ||= "master";
 170} else {
 171        $opt_o ||= "origin";
 172        $remote = 'refs/heads';
 173}
 174
 175my $cvs_tree;
 176if ($#ARGV == 0) {
 177        $cvs_tree = $ARGV[0];
 178} elsif (-f 'CVS/Repository') {
 179        open my $f, '<', 'CVS/Repository' or
 180            die 'Failed to open CVS/Repository';
 181        $cvs_tree = <$f>;
 182        chomp $cvs_tree;
 183        close $f;
 184} else {
 185        usage("CVS module has to be specified");
 186}
 187
 188our @mergerx = ();
 189if ($opt_m) {
 190        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 191}
 192if (@opt_M) {
 193        push (@mergerx, map { qr/$_/ } @opt_M);
 194}
 195
 196# Remember UTC of our starting time
 197# we'll want to avoid importing commits
 198# that are too recent
 199our $starttime = time();
 200
 201select(STDERR); $|=1; select(STDOUT);
 202
 203
 204package CVSconn;
 205# Basic CVS dialog.
 206# We're only interested in connecting and downloading, so ...
 207
 208use File::Spec;
 209use File::Temp qw(tempfile);
 210use POSIX qw(strftime dup2);
 211
 212sub new {
 213        my ($what,$repo,$subdir) = @_;
 214        $what=ref($what) if ref($what);
 215
 216        my $self = {};
 217        $self->{'buffer'} = "";
 218        bless($self,$what);
 219
 220        $repo =~ s#/+$##;
 221        $self->{'fullrep'} = $repo;
 222        $self->conn();
 223
 224        $self->{'subdir'} = $subdir;
 225        $self->{'lines'} = undef;
 226
 227        return $self;
 228}
 229
 230sub find_password_entry {
 231        my ($cvspass, @cvsroot) = @_;
 232        my ($file, $delim) = @$cvspass;
 233        my $pass;
 234        local ($_);
 235
 236        if (open(my $fh, $file)) {
 237                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 238                CVSPASSFILE:
 239                while (<$fh>) {
 240                        chomp;
 241                        s/^\/\d+\s+//;
 242                        my ($w, $p) = split($delim,$_,2);
 243                        for my $cvsroot (@cvsroot) {
 244                                if ($w eq $cvsroot) {
 245                                        $pass = $p;
 246                                        last CVSPASSFILE;
 247                                }
 248                        }
 249                }
 250                close($fh);
 251        }
 252        return $pass;
 253}
 254
 255sub conn {
 256        my $self = shift;
 257        my $repo = $self->{'fullrep'};
 258        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 259                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 260
 261                my ($proxyhost,$proxyport);
 262                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 263                        $proxyhost = $1;
 264                        # Default proxyport, if not specified, is 8080.
 265                        $proxyport = 8080;
 266                        if ($ENV{"CVS_PROXY_PORT"}) {
 267                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 268                        }
 269                        if ($param =~ m/proxyport=([^;]+)/) {
 270                                $proxyport = $1;
 271                        }
 272                }
 273                $repo ||= '/';
 274
 275                # if username is not explicit in CVSROOT, then use current user, as cvs would
 276                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 277                my $rr2 = "-";
 278                unless ($port) {
 279                        $rr2 = ":pserver:$user\@$serv:$repo";
 280                        $port=2401;
 281                }
 282                my $rr = ":pserver:$user\@$serv:$port$repo";
 283
 284                if ($pass) {
 285                        $pass = $self->_scramble($pass);
 286                } else {
 287                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 288                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 289                        my @loc = ();
 290                        foreach my $cvspass (@cvspass) {
 291                                my $p = find_password_entry($cvspass, $rr, $rr2);
 292                                if ($p) {
 293                                        push @loc, $cvspass->[0];
 294                                        $pass = $p;
 295                                }
 296                        }
 297
 298                        if (1 < @loc) {
 299                                die("Multiple cvs password files have ".
 300                                    "entries for CVSROOT $opt_d: @loc");
 301                        } elsif (!$pass) {
 302                                $pass = "A";
 303                        }
 304                }
 305
 306                my ($s, $rep);
 307                if ($proxyhost) {
 308
 309                        # Use a HTTP Proxy. Only works for HTTP proxies that
 310                        # don't require user authentication
 311                        #
 312                        # See: http://www.ietf.org/rfc/rfc2817.txt
 313
 314                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 315                        die "Socket to $proxyhost: $!\n" unless defined $s;
 316                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 317                                or die "Write to $proxyhost: $!\n";
 318                        $s->flush();
 319
 320                        $rep = <$s>;
 321
 322                        # The answer should look like 'HTTP/1.x 2yy ....'
 323                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 324                                die "Proxy connect: $rep\n";
 325                        }
 326                        # Skip up to the empty line of the proxy server output
 327                        # including the response headers.
 328                        while ($rep = <$s>) {
 329                                last if (!defined $rep ||
 330                                         $rep eq "\n" ||
 331                                         $rep eq "\r\n");
 332                        }
 333                } else {
 334                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 335                        die "Socket to $serv: $!\n" unless defined $s;
 336                }
 337
 338                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 339                        or die "Write to $serv: $!\n";
 340                $s->flush();
 341
 342                $rep = <$s>;
 343
 344                if ($rep ne "I LOVE YOU\n") {
 345                        $rep="<unknown>" unless $rep;
 346                        die "AuthReply: $rep\n";
 347                }
 348                $self->{'socketo'} = $s;
 349                $self->{'socketi'} = $s;
 350        } else { # local or ext: Fork off our own cvs server.
 351                my $pr = IO::Pipe->new();
 352                my $pw = IO::Pipe->new();
 353                my $pid = fork();
 354                die "Fork: $!\n" unless defined $pid;
 355                my $cvs = 'cvs';
 356                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 357                my $rsh = 'rsh';
 358                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 359
 360                my @cvs = ($cvs, 'server');
 361                my ($local, $user, $host);
 362                $local = $repo =~ s/:local://;
 363                if (!$local) {
 364                    $repo =~ s/:ext://;
 365                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 366                    ($user, $host) = ($1, $2);
 367                }
 368                if (!$local) {
 369                    if ($user) {
 370                        unshift @cvs, $rsh, '-l', $user, $host;
 371                    } else {
 372                        unshift @cvs, $rsh, $host;
 373                    }
 374                }
 375
 376                unless ($pid) {
 377                        $pr->writer();
 378                        $pw->reader();
 379                        dup2($pw->fileno(),0);
 380                        dup2($pr->fileno(),1);
 381                        $pr->close();
 382                        $pw->close();
 383                        exec(@cvs);
 384                }
 385                $pw->writer();
 386                $pr->reader();
 387                $self->{'socketo'} = $pw;
 388                $self->{'socketi'} = $pr;
 389        }
 390        $self->{'socketo'}->write("Root $repo\n");
 391
 392        # Trial and error says that this probably is the minimum set
 393        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 394
 395        $self->{'socketo'}->write("valid-requests\n");
 396        $self->{'socketo'}->flush();
 397
 398        my $rep=$self->readline();
 399        die "Failed to read from server" unless defined $rep;
 400        chomp($rep);
 401        if ($rep !~ s/^Valid-requests\s*//) {
 402                $rep="<unknown>" unless $rep;
 403                die "Expected Valid-requests from server, but got: $rep\n";
 404        }
 405        chomp(my $res=$self->readline());
 406        die "validReply: $res\n" if $res ne "ok";
 407
 408        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 409        $self->{'repo'} = $repo;
 410}
 411
 412sub readline {
 413        my ($self) = @_;
 414        return $self->{'socketi'}->getline();
 415}
 416
 417sub _file {
 418        # Request a file with a given revision.
 419        # Trial and error says this is a good way to do it. :-/
 420        my ($self,$fn,$rev) = @_;
 421        $self->{'socketo'}->write("Argument -N\n") or return undef;
 422        $self->{'socketo'}->write("Argument -P\n") or return undef;
 423        # -kk: Linus' version doesn't use it - defaults to off
 424        if ($opt_k) {
 425            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 426        }
 427        $self->{'socketo'}->write("Argument -r\n") or return undef;
 428        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 429        $self->{'socketo'}->write("Argument --\n") or return undef;
 430        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 431        $self->{'socketo'}->write("Directory .\n") or return undef;
 432        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 433        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 434        $self->{'socketo'}->write("co\n") or return undef;
 435        $self->{'socketo'}->flush() or return undef;
 436        $self->{'lines'} = 0;
 437        return 1;
 438}
 439sub _line {
 440        # Read a line from the server.
 441        # ... except that 'line' may be an entire file. ;-)
 442        my ($self, $fh) = @_;
 443        die "Not in lines" unless defined $self->{'lines'};
 444
 445        my $line;
 446        my $res=0;
 447        while (defined($line = $self->readline())) {
 448                # M U gnupg-cvs-rep/AUTHORS
 449                # Updated gnupg-cvs-rep/
 450                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 451                # /AUTHORS/1.1///T1.1
 452                # u=rw,g=rw,o=rw
 453                # 0
 454                # ok
 455
 456                if ($line =~ s/^(?:Created|Updated) //) {
 457                        $line = $self->readline(); # path
 458                        $line = $self->readline(); # Entries line
 459                        my $mode = $self->readline(); chomp $mode;
 460                        $self->{'mode'} = $mode;
 461                        defined (my $cnt = $self->readline())
 462                                or die "EOF from server after 'Changed'\n";
 463                        chomp $cnt;
 464                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 465                        $line="";
 466                        $res = $self->_fetchfile($fh, $cnt);
 467                } elsif ($line =~ s/^ //) {
 468                        print $fh $line;
 469                        $res += length($line);
 470                } elsif ($line =~ /^M\b/) {
 471                        # output, do nothing
 472                } elsif ($line =~ /^Mbinary\b/) {
 473                        my $cnt;
 474                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 475                        chomp $cnt;
 476                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 477                        $line="";
 478                        $res += $self->_fetchfile($fh, $cnt);
 479                } else {
 480                        chomp $line;
 481                        if ($line eq "ok") {
 482                                # print STDERR "S: ok (".length($res).")\n";
 483                                return $res;
 484                        } elsif ($line =~ s/^E //) {
 485                                # print STDERR "S: $line\n";
 486                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 487                                $line = $self->readline(); # filename
 488                                $line = $self->readline(); # OK
 489                                chomp $line;
 490                                die "Unknown: $line" if $line ne "ok";
 491                                return -1;
 492                        } else {
 493                                die "Unknown: $line\n";
 494                        }
 495                }
 496        }
 497        return undef;
 498}
 499sub file {
 500        my ($self,$fn,$rev) = @_;
 501        my $res;
 502
 503        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 504                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 505
 506        $self->_file($fn,$rev) and $res = $self->_line($fh);
 507
 508        if (!defined $res) {
 509            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 510            truncate $fh, 0;
 511            $self->conn();
 512            $self->_file($fn,$rev) or die "No file command send";
 513            $res = $self->_line($fh);
 514            die "Retry failed" unless defined $res;
 515        }
 516        close ($fh);
 517
 518        return ($name, $res);
 519}
 520sub _fetchfile {
 521        my ($self, $fh, $cnt) = @_;
 522        my $res = 0;
 523        my $bufsize = 1024 * 1024;
 524        while ($cnt) {
 525            if ($bufsize > $cnt) {
 526                $bufsize = $cnt;
 527            }
 528            my $buf;
 529            my $num = $self->{'socketi'}->read($buf,$bufsize);
 530            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 531            print $fh $buf;
 532            $res += $num;
 533            $cnt -= $num;
 534        }
 535        return $res;
 536}
 537
 538sub _scramble {
 539        my ($self, $pass) = @_;
 540        my $scrambled = "A";
 541
 542        return $scrambled unless $pass;
 543
 544        my $pass_len = length($pass);
 545        my @pass_arr = split("", $pass);
 546        my $i;
 547
 548        # from cvs/src/scramble.c
 549        my @shifts = (
 550                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 551                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 552                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 553                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 554                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 555                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 556                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 557                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 558                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 559                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 560                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 561                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 562                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 563                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 564                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 565                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 566        );
 567
 568        for ($i = 0; $i < $pass_len; $i++) {
 569                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 570        }
 571
 572        return $scrambled;
 573}
 574
 575package main;
 576
 577my $cvs = CVSconn->new($opt_d, $cvs_tree);
 578
 579
 580sub pdate($) {
 581        my ($d) = @_;
 582        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 583                or die "Unparseable date: $d\n";
 584        my $y=$1; $y-=1900 if $y>1900;
 585        return timegm($6||0,$5,$4,$3,$2-1,$y);
 586}
 587
 588sub pmode($) {
 589        my ($mode) = @_;
 590        my $m = 0;
 591        my $mm = 0;
 592        my $um = 0;
 593        for my $x(split(//,$mode)) {
 594                if ($x eq ",") {
 595                        $m |= $mm&$um;
 596                        $mm = 0;
 597                        $um = 0;
 598                } elsif ($x eq "u") { $um |= 0700;
 599                } elsif ($x eq "g") { $um |= 0070;
 600                } elsif ($x eq "o") { $um |= 0007;
 601                } elsif ($x eq "r") { $mm |= 0444;
 602                } elsif ($x eq "w") { $mm |= 0222;
 603                } elsif ($x eq "x") { $mm |= 0111;
 604                } elsif ($x eq "=") { # do nothing
 605                } else { die "Unknown mode: $mode\n";
 606                }
 607        }
 608        $m |= $mm&$um;
 609        return $m;
 610}
 611
 612sub getwd() {
 613        my $pwd = `pwd`;
 614        chomp $pwd;
 615        return $pwd;
 616}
 617
 618sub is_sha1 {
 619        my $s = shift;
 620        return $s =~ /^[a-f0-9]{40}$/;
 621}
 622
 623sub get_headref ($) {
 624        my $name = shift;
 625        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 626        return undef unless $? == 0;
 627        chomp $r;
 628        return $r;
 629}
 630
 631my $user_filename_prepend = '';
 632sub munge_user_filename {
 633        my $name = shift;
 634        return File::Spec->file_name_is_absolute($name) ?
 635                $name :
 636                $user_filename_prepend . $name;
 637}
 638
 639-d $git_tree
 640        or mkdir($git_tree,0777)
 641        or die "Could not create $git_tree: $!";
 642if ($git_tree ne '.') {
 643        $user_filename_prepend = getwd() . '/';
 644        chdir($git_tree);
 645}
 646
 647my $last_branch = "";
 648my $orig_branch = "";
 649my %branch_date;
 650my $tip_at_start = undef;
 651
 652my $git_dir = $ENV{"GIT_DIR"} || ".git";
 653$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 654$ENV{"GIT_DIR"} = $git_dir;
 655my $orig_git_index;
 656$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 657
 658my %index; # holds filenames of one index per branch
 659
 660unless (-d $git_dir) {
 661        system(qw(git init));
 662        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 663        system(qw(git read-tree --empty));
 664        die "Cannot init an empty tree: $?\n" if $?;
 665
 666        $last_branch = $opt_o;
 667        $orig_branch = "";
 668} else {
 669        open(F, "-|", qw(git symbolic-ref HEAD)) or
 670                die "Cannot run git symbolic-ref: $!\n";
 671        chomp ($last_branch = <F>);
 672        $last_branch = basename($last_branch);
 673        close(F);
 674        unless ($last_branch) {
 675                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 676                $last_branch = "master";
 677        }
 678        $orig_branch = $last_branch;
 679        $tip_at_start = `git rev-parse --verify HEAD`;
 680
 681        # Get the last import timestamps
 682        my $fmt = '($ref, $author) = (%(refname), %(author));';
 683        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 684        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 685        while (defined(my $entry = <H>)) {
 686                my ($ref, $author);
 687                eval($entry) || die "cannot eval refs list: $@";
 688                my ($head) = ($ref =~ m|^$remote/(.*)|);
 689                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 690                $branch_date{$head} = $1;
 691        }
 692        close(H);
 693        if (!exists $branch_date{$opt_o}) {
 694                die "Branch '$opt_o' does not exist.\n".
 695                       "Either use the correct '-o branch' option,\n".
 696                       "or import to a new repository.\n";
 697        }
 698}
 699
 700-d $git_dir
 701        or die "Could not create git subdir ($git_dir).\n";
 702
 703# now we read (and possibly save) author-info as well
 704-f "$git_dir/cvs-authors" and
 705  read_author_info("$git_dir/cvs-authors");
 706if ($opt_A) {
 707        read_author_info(munge_user_filename($opt_A));
 708        write_author_info("$git_dir/cvs-authors");
 709}
 710
 711# open .git/cvs-revisions, if requested
 712open my $revision_map, '>>', "$git_dir/cvs-revisions"
 713    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 714        if defined $opt_R;
 715
 716
 717#
 718# run cvsps into a file unless we are getting
 719# it passed as a file via $opt_P
 720#
 721my $cvspsfile;
 722unless ($opt_P) {
 723        print "Running cvsps...\n" if $opt_v;
 724        my $pid = open(CVSPS,"-|");
 725        my $cvspsfh;
 726        die "Cannot fork: $!\n" unless defined $pid;
 727        unless ($pid) {
 728                my @opt;
 729                @opt = split(/,/,$opt_p) if defined $opt_p;
 730                unshift @opt, '-z', $opt_z if defined $opt_z;
 731                unshift @opt, '-q'         unless defined $opt_v;
 732                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 733                        push @opt, '--cvs-direct';
 734                }
 735                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 736                die "Could not start cvsps: $!\n";
 737        }
 738        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 739                                          DIR => File::Spec->tmpdir());
 740        while (<CVSPS>) {
 741            print $cvspsfh $_;
 742        }
 743        close CVSPS;
 744        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 745        close $cvspsfh;
 746} else {
 747        $cvspsfile = munge_user_filename($opt_P);
 748}
 749
 750open(CVS, "<$cvspsfile") or die $!;
 751
 752## cvsps output:
 753#---------------------
 754#PatchSet 314
 755#Date: 1999/09/18 13:03:59
 756#Author: wkoch
 757#Branch: STABLE-BRANCH-1-0
 758#Ancestor branch: HEAD
 759#Tag: (none)
 760#Log:
 761#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 762#Members:
 763#       README:1.57->1.57.2.1
 764#       VERSION:1.96->1.96.2.1
 765#
 766#---------------------
 767
 768my $state = 0;
 769
 770sub update_index (\@\@) {
 771        my $old = shift;
 772        my $new = shift;
 773        open(my $fh, '|-', qw(git update-index -z --index-info))
 774                or die "unable to open git update-index: $!";
 775        print $fh
 776                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 777                        @$old),
 778                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 779                        @$new)
 780                or die "unable to write to git update-index: $!";
 781        close $fh
 782                or die "unable to write to git update-index: $!";
 783        $? and die "git update-index reported error: $?";
 784}
 785
 786sub write_tree () {
 787        open(my $fh, '-|', qw(git write-tree))
 788                or die "unable to open git write-tree: $!";
 789        chomp(my $tree = <$fh>);
 790        is_sha1($tree)
 791                or die "Cannot get tree id ($tree): $!";
 792        close($fh)
 793                or die "Error running git write-tree: $?\n";
 794        print "Tree ID $tree\n" if $opt_v;
 795        return $tree;
 796}
 797
 798my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 799my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 800
 801# commits that cvsps cannot place anywhere...
 802$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 803
 804sub commit {
 805        if ($branch eq $opt_o && !$index{branch} &&
 806                !get_headref("$remote/$branch")) {
 807            # looks like an initial commit
 808            # use the index primed by git init
 809            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 810            $index{$branch} = "$git_dir/index";
 811        } else {
 812            # use an index per branch to speed up
 813            # imports of projects with many branches
 814            unless ($index{$branch}) {
 815                $index{$branch} = tmpnam();
 816                $ENV{GIT_INDEX_FILE} = $index{$branch};
 817                if ($ancestor) {
 818                    system("git", "read-tree", "$remote/$ancestor");
 819                } else {
 820                    system("git", "read-tree", "$remote/$branch");
 821                }
 822                die "read-tree failed: $?\n" if $?;
 823            }
 824        }
 825        $ENV{GIT_INDEX_FILE} = $index{$branch};
 826
 827        update_index(@old, @new);
 828        @old = @new = ();
 829        my $tree = write_tree();
 830        my $parent = get_headref("$remote/$last_branch");
 831        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 832
 833        my @commit_args;
 834        push @commit_args, ("-p", $parent) if $parent;
 835
 836        # loose detection of merges
 837        # based on the commit msg
 838        foreach my $rx (@mergerx) {
 839                next unless $logmsg =~ $rx && $1;
 840                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 841                if (my $sha1 = get_headref("$remote/$mparent")) {
 842                        push @commit_args, '-p', "$remote/$mparent";
 843                        print "Merge parent branch: $mparent\n" if $opt_v;
 844                }
 845        }
 846
 847        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 848        $ENV{GIT_AUTHOR_NAME} = $author_name;
 849        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 850        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 851        $ENV{GIT_COMMITTER_NAME} = $author_name;
 852        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 853        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 854        my $pid = open2(my $commit_read, my $commit_write,
 855                'git', 'commit-tree', $tree, @commit_args);
 856
 857        # compatibility with git2cvs
 858        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 859        $logmsg =~ s/[\s\n]+\z//;
 860
 861        if (@skipped) {
 862            $logmsg .= "\n\n\nSKIPPED:\n\t";
 863            $logmsg .= join("\n\t", @skipped) . "\n";
 864            @skipped = ();
 865        }
 866
 867        print($commit_write "$logmsg\n") && close($commit_write)
 868                or die "Error writing to git commit-tree: $!\n";
 869
 870        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 871        chomp(my $cid = <$commit_read>);
 872        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 873        print "Commit ID $cid\n" if $opt_v;
 874        close($commit_read);
 875
 876        waitpid($pid,0);
 877        die "Error running git commit-tree: $?\n" if $?;
 878
 879        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 880                or die "Cannot write branch $branch for update: $!\n";
 881
 882        if ($revision_map) {
 883                print $revision_map "@$_ $cid\n" for @commit_revisions;
 884        }
 885        @commit_revisions = ();
 886
 887        if ($tag) {
 888                my ($xtag) = $tag;
 889                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 890                $xtag =~ tr/_/\./ if ( $opt_u );
 891                $xtag =~ s/[\/]/$opt_s/g;
 892
 893                # See refs.c for these rules.
 894                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 895                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 896                # Other bad strings for tags:
 897                # (See check_refname_component in refs.c.)
 898                1 while $xtag =~ s/
 899                        (?: \.\.        # Tag cannot contain '..'.
 900                        |   \@{         # Tag cannot contain '@{'.
 901                        | ^ -           # Tag cannot begin with '-'.
 902                        |   \.lock $    # Tag cannot end with '.lock'.
 903                        | ^ \.          # Tag cannot begin...
 904                        |   \. $        # ...or end with '.'
 905                        )//xg;
 906                # Tag cannot be empty.
 907                if ($xtag eq '') {
 908                        warn("warning: ignoring tag '$tag'",
 909                        " with invalid tagname\n");
 910                        return;
 911                }
 912
 913                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 914                        # We did our best to sanitize the tag, but still failed
 915                        # for whatever reason. Bail out, and give the user
 916                        # enough information to understand if/how we should
 917                        # improve the translation in the future.
 918                        if ($tag ne $xtag) {
 919                                print "Translated '$tag' tag to '$xtag'\n";
 920                        }
 921                        die "Cannot create tag $xtag: $!\n";
 922                }
 923
 924                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 925        }
 926};
 927
 928my $commitcount = 1;
 929while (<CVS>) {
 930        chomp;
 931        if ($state == 0 and /^-+$/) {
 932                $state = 1;
 933        } elsif ($state == 0) {
 934                $state = 1;
 935                redo;
 936        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 937                $patchset = 0+$_;
 938                $state=2;
 939        } elsif ($state == 2 and s/^Date:\s+//) {
 940                $date = pdate($_);
 941                unless ($date) {
 942                        print STDERR "Could not parse date: $_\n";
 943                        $state=0;
 944                        next;
 945                }
 946                $state=3;
 947        } elsif ($state == 3 and s/^Author:\s+//) {
 948                s/\s+$//;
 949                if (/^(.*?)\s+<(.*)>/) {
 950                    ($author_name, $author_email) = ($1, $2);
 951                } elsif ($conv_author_name{$_}) {
 952                        $author_name = $conv_author_name{$_};
 953                        $author_email = $conv_author_email{$_};
 954                } else {
 955                    $author_name = $author_email = $_;
 956                }
 957                $state = 4;
 958        } elsif ($state == 4 and s/^Branch:\s+//) {
 959                s/\s+$//;
 960                tr/_/\./ if ( $opt_u );
 961                s/[\/]/$opt_s/g;
 962                $branch = $_;
 963                $state = 5;
 964        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 965                s/\s+$//;
 966                $ancestor = $_;
 967                $ancestor = $opt_o if $ancestor eq "HEAD";
 968                $state = 6;
 969        } elsif ($state == 5) {
 970                $ancestor = undef;
 971                $state = 6;
 972                redo;
 973        } elsif ($state == 6 and s/^Tag:\s+//) {
 974                s/\s+$//;
 975                if ($_ eq "(none)") {
 976                        $tag = undef;
 977                } else {
 978                        $tag = $_;
 979                }
 980                $state = 7;
 981        } elsif ($state == 7 and /^Log:/) {
 982                $logmsg = "";
 983                $state = 8;
 984        } elsif ($state == 8 and /^Members:/) {
 985                $branch = $opt_o if $branch eq "HEAD";
 986                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 987                        # skip
 988                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 989                        $state = 11;
 990                        next;
 991                }
 992                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 993                        # skip if the commit is too recent
 994                        # given that the cvsps default fuzz is 300s, we give ourselves another
 995                        # 300s just in case -- this also prevents skipping commits
 996                        # due to server clock drift
 997                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 998                        $state = 11;
 999                        next;
1000                }
1001                if (exists $ignorebranch{$branch}) {
1002                        print STDERR "Skipping $branch\n";
1003                        $state = 11;
1004                        next;
1005                }
1006                if ($ancestor) {
1007                        if ($ancestor eq $branch) {
1008                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1009                                $ancestor = $opt_o;
1010                        }
1011                        if (defined get_headref("$remote/$branch")) {
1012                                print STDERR "Branch $branch already exists!\n";
1013                                $state=11;
1014                                next;
1015                        }
1016                        my $id = get_headref("$remote/$ancestor");
1017                        if (!$id) {
1018                                print STDERR "Branch $ancestor does not exist!\n";
1019                                $ignorebranch{$branch} = 1;
1020                                $state=11;
1021                                next;
1022                        }
1023
1024                        system(qw(git update-ref -m cvsimport),
1025                                "$remote/$branch", $id);
1026                        if($? != 0) {
1027                                print STDERR "Could not create branch $branch\n";
1028                                $ignorebranch{$branch} = 1;
1029                                $state=11;
1030                                next;
1031                        }
1032                }
1033                $last_branch = $branch if $branch ne $last_branch;
1034                $state = 9;
1035        } elsif ($state == 8) {
1036                $logmsg .= "$_\n";
1037        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1038#       VERSION:1.96->1.96.2.1
1039                my $init = ($2 eq "INITIAL");
1040                my $fn = $1;
1041                my $rev = $3;
1042                $fn =~ s#^/+##;
1043                if ($opt_S && $fn =~ m/$opt_S/) {
1044                    print "SKIPPING $fn v $rev\n";
1045                    push(@skipped, $fn);
1046                    next;
1047                }
1048                push @commit_revisions, [$fn, $rev];
1049                print "Fetching $fn   v $rev\n" if $opt_v;
1050                my ($tmpname, $size) = $cvs->file($fn,$rev);
1051                if ($size == -1) {
1052                        push(@old,$fn);
1053                        print "Drop $fn\n" if $opt_v;
1054                } else {
1055                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1056                        my $pid = open(my $F, '-|');
1057                        die $! unless defined $pid;
1058                        if (!$pid) {
1059                            exec("git", "hash-object", "-w", $tmpname)
1060                                or die "Cannot create object: $!\n";
1061                        }
1062                        my $sha = <$F>;
1063                        chomp $sha;
1064                        close $F;
1065                        my $mode = pmode($cvs->{'mode'});
1066                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1067                }
1068                unlink($tmpname);
1069        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1070                my $fn = $1;
1071                my $rev = $2;
1072                $fn =~ s#^/+##;
1073                push @commit_revisions, [$fn, $rev];
1074                push(@old,$fn);
1075                print "Delete $fn\n" if $opt_v;
1076        } elsif ($state == 9 and /^\s*$/) {
1077                $state = 10;
1078        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1079                $commitcount++;
1080                if ($opt_L && $commitcount > $opt_L) {
1081                        last;
1082                }
1083                commit();
1084                if (($commitcount & 1023) == 0) {
1085                        system(qw(git repack -a -d));
1086                }
1087                $state = 1;
1088        } elsif ($state == 11 and /^-+$/) {
1089                $state = 1;
1090        } elsif (/^-+$/) { # end of unknown-line processing
1091                $state = 1;
1092        } elsif ($state != 11) { # ignore stuff when skipping
1093                print STDERR "* UNKNOWN LINE * $_\n";
1094        }
1095}
1096commit() if $branch and $state != 11;
1097
1098unless ($opt_P) {
1099        unlink($cvspsfile);
1100}
1101
1102# The heuristic of repacking every 1024 commits can leave a
1103# lot of unpacked data.  If there is more than 1MB worth of
1104# not-packed objects, repack once more.
1105my $line = `git count-objects`;
1106if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1107  my ($n_objects, $kb) = ($1, $2);
1108  1024 < $kb
1109    and system(qw(git repack -a -d));
1110}
1111
1112foreach my $git_index (values %index) {
1113    if ($git_index ne "$git_dir/index") {
1114        unlink($git_index);
1115    }
1116}
1117
1118if (defined $orig_git_index) {
1119        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1120} else {
1121        delete $ENV{GIT_INDEX_FILE};
1122}
1123
1124# Now switch back to the branch we were in before all of this happened
1125if ($orig_branch) {
1126        print "DONE.\n" if $opt_v;
1127        if ($opt_i) {
1128                exit 0;
1129        }
1130        my $tip_at_end = `git rev-parse --verify HEAD`;
1131        if ($tip_at_start ne $tip_at_end) {
1132                for ($tip_at_start, $tip_at_end) { chomp; }
1133                print "Fetched into the current branch.\n" if $opt_v;
1134                system(qw(git read-tree -u -m),
1135                       $tip_at_start, $tip_at_end);
1136                die "Fast-forward update failed: $?\n" if $?;
1137        }
1138        else {
1139                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1140                die "Could not merge $opt_o into the current branch.\n" if $?;
1141        }
1142} else {
1143        $orig_branch = "master";
1144        print "DONE; creating $orig_branch branch\n" if $opt_v;
1145        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1146                unless defined get_headref('refs/heads/master');
1147        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1148                if ($opt_r && $opt_o ne 'HEAD');
1149        system('git', 'update-ref', 'HEAD', "$orig_branch");
1150        unless ($opt_i) {
1151                system(qw(git checkout -f));
1152                die "checkout failed: $?\n" if $?;
1153        }
1154}