0a31ebd82020f3aca0020d357c2028d5e7b5e37b
   1#!/usr/bin/perl
   2# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime tzset dup2 ENOENT);
  28use IPC::Open2;
  29$SIG{'PIPE'}="IGNORE";
  31set_timezone('UTC');
  32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email, %conv_author_tz);
  35sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # or with an optional timezone:
  63                #   spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
  64                elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
  65                        $user = $1;
  66                        $conv_author_name{$user} = $2;
  67                        $conv_author_email{$user} = $3;
  68                        $conv_author_tz{$user} = $4;
  69                }
  70                # However, we also read from CVSROOT/users format
  71                # to ease migration.
  72                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  73                        my $mapped;
  74                        ($user, $mapped) = ($1, $3);
  75                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  76                                $conv_author_name{$user} = $1;
  77                                $conv_author_email{$user} = $2;
  78                        }
  79                        elsif ($mapped =~ /^<?(.*)>?$/) {
  80                                $conv_author_name{$user} = $user;
  81                                $conv_author_email{$user} = $1;
  82                        }
  83                }
  84                # NEEDSWORK: Maybe warn on unrecognized lines?
  85        }
  86        close ($f);
  87}
  88sub write_author_info($) {
  90        my ($file) = @_;
  91        open my $f, '>', $file or
  92          die("Failed to open $file for writing: $!");
  93        foreach (keys %conv_author_name) {
  95                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
  96                print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
  97                print $f "\n";
  98        }
  99        close ($f);
 100}
 101# Versions of perl before 5.10.0 may not automatically check $TZ each
 103# time localtime is run (most platforms will do so only the first time).
 104# We can work around this by using tzset() to update the internal
 105# variable whenever we change the environment.
 106sub set_timezone {
 107        $ENV{TZ} = shift;
 108        tzset();
 109}
 110# convert getopts specs for use by git config
 112my %longmap = (
 113        'A:' => 'authors-file',
 114        'M:' => 'merge-regex',
 115        'P:' => undef,
 116        'R' => 'track-revisions',
 117        'S:' => 'ignore-paths',
 118);
 119sub read_repo_config {
 121        # Split the string between characters, unless there is a ':'
 122        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 123        my @opts = split(/ *(?!:)/, shift);
 124        foreach my $o (@opts) {
 125                my $key = $o;
 126                $key =~ s/://g;
 127                my $arg = 'git config';
 128                $arg .= ' --bool' if ($o !~ /:$/);
 129                my $ckey = $key;
 130                if (exists $longmap{$o}) {
 132                        # An uppercase option like -R cannot be
 133                        # expressed in the configuration, as the
 134                        # variable names are downcased.
 135                        $ckey = $longmap{$o};
 136                        next if (! defined $ckey);
 137                        $ckey =~ s/-//g;
 138                }
 139                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 140                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 141                        no strict 'refs';
 142                        my $opt_name = "opt_" . $key;
 143                        if (!$$opt_name) {
 144                                $$opt_name = $tmp;
 145                        }
 146                }
 147        }
 148}
 149my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 151read_repo_config($opts);
 152Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 153# turn the Getopt::Std specification in a Getopt::Long one,
 155# with support for multiple -M options
 156GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 157    or usage();
 158usage if $opt_h;
 159if (@ARGV == 0) {
 161                chomp(my $module = `git config --get cvsimport.module`);
 162                push(@ARGV, $module) if $? == 0;
 163}
 164@ARGV <= 1 or usage("You can't specify more than one CVS module");
 165if ($opt_d) {
 167        $ENV{"CVSROOT"} = $opt_d;
 168} elsif (-f 'CVS/Root') {
 169        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 170        $opt_d = <$f>;
 171        chomp $opt_d;
 172        close $f;
 173        $ENV{"CVSROOT"} = $opt_d;
 174} elsif ($ENV{"CVSROOT"}) {
 175        $opt_d = $ENV{"CVSROOT"};
 176} else {
 177        usage("CVSROOT needs to be set");
 178}
 179$opt_s ||= "-";
 180$opt_a ||= 0;
 181my $git_tree = $opt_C;
 183$git_tree ||= ".";
 184my $remote;
 186if (defined $opt_r) {
 187        $remote = 'refs/remotes/' . $opt_r;
 188        $opt_o ||= "master";
 189} else {
 190        $opt_o ||= "origin";
 191        $remote = 'refs/heads';
 192}
 193my $cvs_tree;
 195if ($#ARGV == 0) {
 196        $cvs_tree = $ARGV[0];
 197} elsif (-f 'CVS/Repository') {
 198        open my $f, '<', 'CVS/Repository' or
 199            die 'Failed to open CVS/Repository';
 200        $cvs_tree = <$f>;
 201        chomp $cvs_tree;
 202        close $f;
 203} else {
 204        usage("CVS module has to be specified");
 205}
 206our @mergerx = ();
 208if ($opt_m) {
 209        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 210}
 211if (@opt_M) {
 212        push (@mergerx, map { qr/$_/ } @opt_M);
 213}
 214# Remember UTC of our starting time
 216# we'll want to avoid importing commits
 217# that are too recent
 218our $starttime = time();
 219select(STDERR); $|=1; select(STDOUT);
 221package CVSconn;
 224# Basic CVS dialog.
 225# We're only interested in connecting and downloading, so ...
 226use File::Spec;
 228use File::Temp qw(tempfile);
 229use POSIX qw(strftime dup2);
 230sub new {
 232        my ($what,$repo,$subdir) = @_;
 233        $what=ref($what) if ref($what);
 234        my $self = {};
 236        $self->{'buffer'} = "";
 237        bless($self,$what);
 238        $repo =~ s#/+$##;
 240        $self->{'fullrep'} = $repo;
 241        $self->conn();
 242        $self->{'subdir'} = $subdir;
 244        $self->{'lines'} = undef;
 245        return $self;
 247}
 248sub find_password_entry {
 250        my ($cvspass, @cvsroot) = @_;
 251        my ($file, $delim) = @$cvspass;
 252        my $pass;
 253        local ($_);
 254        if (open(my $fh, $file)) {
 256                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 257                CVSPASSFILE:
 258                while (<$fh>) {
 259                        chomp;
 260                        s/^\/\d+\s+//;
 261                        my ($w, $p) = split($delim,$_,2);
 262                        for my $cvsroot (@cvsroot) {
 263                                if ($w eq $cvsroot) {
 264                                        $pass = $p;
 265                                        last CVSPASSFILE;
 266                                }
 267                        }
 268                }
 269                close($fh);
 270        }
 271        return $pass;
 272}
 273sub conn {
 275        my $self = shift;
 276        my $repo = $self->{'fullrep'};
 277        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 278                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 279                my ($proxyhost,$proxyport);
 281                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 282                        $proxyhost = $1;
 283                        # Default proxyport, if not specified, is 8080.
 284                        $proxyport = 8080;
 285                        if ($ENV{"CVS_PROXY_PORT"}) {
 286                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 287                        }
 288                        if ($param =~ m/proxyport=([^;]+)/) {
 289                                $proxyport = $1;
 290                        }
 291                }
 292                $repo ||= '/';
 293                # if username is not explicit in CVSROOT, then use current user, as cvs would
 295                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 296                my $rr2 = "-";
 297                unless ($port) {
 298                        $rr2 = ":pserver:$user\@$serv:$repo";
 299                        $port=2401;
 300                }
 301                my $rr = ":pserver:$user\@$serv:$port$repo";
 302                if ($pass) {
 304                        $pass = $self->_scramble($pass);
 305                } else {
 306                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 307                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 308                        my @loc = ();
 309                        foreach my $cvspass (@cvspass) {
 310                                my $p = find_password_entry($cvspass, $rr, $rr2);
 311                                if ($p) {
 312                                        push @loc, $cvspass->[0];
 313                                        $pass = $p;
 314                                }
 315                        }
 316                        if (1 < @loc) {
 318                                die("Multiple cvs password files have ".
 319                                    "entries for CVSROOT $opt_d: @loc");
 320                        } elsif (!$pass) {
 321                                $pass = "A";
 322                        }
 323                }
 324                my ($s, $rep);
 326                if ($proxyhost) {
 327                        # Use a HTTP Proxy. Only works for HTTP proxies that
 329                        # don't require user authentication
 330                        #
 331                        # See: http://www.ietf.org/rfc/rfc2817.txt
 332                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 334                        die "Socket to $proxyhost: $!\n" unless defined $s;
 335                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 336                                or die "Write to $proxyhost: $!\n";
 337                        $s->flush();
 338                        $rep = <$s>;
 340                        # The answer should look like 'HTTP/1.x 2yy ....'
 342                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 343                                die "Proxy connect: $rep\n";
 344                        }
 345                        # Skip up to the empty line of the proxy server output
 346                        # including the response headers.
 347                        while ($rep = <$s>) {
 348                                last if (!defined $rep ||
 349                                         $rep eq "\n" ||
 350                                         $rep eq "\r\n");
 351                        }
 352                } else {
 353                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 354                        die "Socket to $serv: $!\n" unless defined $s;
 355                }
 356                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 358                        or die "Write to $serv: $!\n";
 359                $s->flush();
 360                $rep = <$s>;
 362                if ($rep ne "I LOVE YOU\n") {
 364                        $rep="<unknown>" unless $rep;
 365                        die "AuthReply: $rep\n";
 366                }
 367                $self->{'socketo'} = $s;
 368                $self->{'socketi'} = $s;
 369        } else { # local or ext: Fork off our own cvs server.
 370                my $pr = IO::Pipe->new();
 371                my $pw = IO::Pipe->new();
 372                my $pid = fork();
 373                die "Fork: $!\n" unless defined $pid;
 374                my $cvs = 'cvs';
 375                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 376                my $rsh = 'rsh';
 377                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 378                my @cvs = ($cvs, 'server');
 380                my ($local, $user, $host);
 381                $local = $repo =~ s/:local://;
 382                if (!$local) {
 383                    $repo =~ s/:ext://;
 384                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 385                    ($user, $host) = ($1, $2);
 386                }
 387                if (!$local) {
 388                    if ($user) {
 389                        unshift @cvs, $rsh, '-l', $user, $host;
 390                    } else {
 391                        unshift @cvs, $rsh, $host;
 392                    }
 393                }
 394                unless ($pid) {
 396                        $pr->writer();
 397                        $pw->reader();
 398                        dup2($pw->fileno(),0);
 399                        dup2($pr->fileno(),1);
 400                        $pr->close();
 401                        $pw->close();
 402                        exec(@cvs);
 403                }
 404                $pw->writer();
 405                $pr->reader();
 406                $self->{'socketo'} = $pw;
 407                $self->{'socketi'} = $pr;
 408        }
 409        $self->{'socketo'}->write("Root $repo\n");
 410        # Trial and error says that this probably is the minimum set
 412        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 413        $self->{'socketo'}->write("valid-requests\n");
 415        $self->{'socketo'}->flush();
 416        my $rep=$self->readline();
 418        die "Failed to read from server" unless defined $rep;
 419        chomp($rep);
 420        if ($rep !~ s/^Valid-requests\s*//) {
 421                $rep="<unknown>" unless $rep;
 422                die "Expected Valid-requests from server, but got: $rep\n";
 423        }
 424        chomp(my $res=$self->readline());
 425        die "validReply: $res\n" if $res ne "ok";
 426        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 428        $self->{'repo'} = $repo;
 429}
 430sub readline {
 432        my ($self) = @_;
 433        return $self->{'socketi'}->getline();
 434}
 435sub _file {
 437        # Request a file with a given revision.
 438        # Trial and error says this is a good way to do it. :-/
 439        my ($self,$fn,$rev) = @_;
 440        $self->{'socketo'}->write("Argument -N\n") or return undef;
 441        $self->{'socketo'}->write("Argument -P\n") or return undef;
 442        # -kk: Linus' version doesn't use it - defaults to off
 443        if ($opt_k) {
 444            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 445        }
 446        $self->{'socketo'}->write("Argument -r\n") or return undef;
 447        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 448        $self->{'socketo'}->write("Argument --\n") or return undef;
 449        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 450        $self->{'socketo'}->write("Directory .\n") or return undef;
 451        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 452        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 453        $self->{'socketo'}->write("co\n") or return undef;
 454        $self->{'socketo'}->flush() or return undef;
 455        $self->{'lines'} = 0;
 456        return 1;
 457}
 458sub _line {
 459        # Read a line from the server.
 460        # ... except that 'line' may be an entire file. ;-)
 461        my ($self, $fh) = @_;
 462        die "Not in lines" unless defined $self->{'lines'};
 463        my $line;
 465        my $res=0;
 466        while (defined($line = $self->readline())) {
 467                # M U gnupg-cvs-rep/AUTHORS
 468                # Updated gnupg-cvs-rep/
 469                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 470                # /AUTHORS/1.1///T1.1
 471                # u=rw,g=rw,o=rw
 472                # 0
 473                # ok
 474                if ($line =~ s/^(?:Created|Updated) //) {
 476                        $line = $self->readline(); # path
 477                        $line = $self->readline(); # Entries line
 478                        my $mode = $self->readline(); chomp $mode;
 479                        $self->{'mode'} = $mode;
 480                        defined (my $cnt = $self->readline())
 481                                or die "EOF from server after 'Changed'\n";
 482                        chomp $cnt;
 483                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 484                        $line="";
 485                        $res = $self->_fetchfile($fh, $cnt);
 486                } elsif ($line =~ s/^ //) {
 487                        print $fh $line;
 488                        $res += length($line);
 489                } elsif ($line =~ /^M\b/) {
 490                        # output, do nothing
 491                } elsif ($line =~ /^Mbinary\b/) {
 492                        my $cnt;
 493                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 494                        chomp $cnt;
 495                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 496                        $line="";
 497                        $res += $self->_fetchfile($fh, $cnt);
 498                } else {
 499                        chomp $line;
 500                        if ($line eq "ok") {
 501                                # print STDERR "S: ok (".length($res).")\n";
 502                                return $res;
 503                        } elsif ($line =~ s/^E //) {
 504                                # print STDERR "S: $line\n";
 505                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 506                                $line = $self->readline(); # filename
 507                                $line = $self->readline(); # OK
 508                                chomp $line;
 509                                die "Unknown: $line" if $line ne "ok";
 510                                return -1;
 511                        } else {
 512                                die "Unknown: $line\n";
 513                        }
 514                }
 515        }
 516        return undef;
 517}
 518sub file {
 519        my ($self,$fn,$rev) = @_;
 520        my $res;
 521        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 523                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 524        $self->_file($fn,$rev) and $res = $self->_line($fh);
 526        if (!defined $res) {
 528            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 529            truncate $fh, 0;
 530            $self->conn();
 531            $self->_file($fn,$rev) or die "No file command send";
 532            $res = $self->_line($fh);
 533            die "Retry failed" unless defined $res;
 534        }
 535        close ($fh);
 536        return ($name, $res);
 538}
 539sub _fetchfile {
 540        my ($self, $fh, $cnt) = @_;
 541        my $res = 0;
 542        my $bufsize = 1024 * 1024;
 543        while ($cnt) {
 544            if ($bufsize > $cnt) {
 545                $bufsize = $cnt;
 546            }
 547            my $buf;
 548            my $num = $self->{'socketi'}->read($buf,$bufsize);
 549            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 550            print $fh $buf;
 551            $res += $num;
 552            $cnt -= $num;
 553        }
 554        return $res;
 555}
 556sub _scramble {
 558        my ($self, $pass) = @_;
 559        my $scrambled = "A";
 560        return $scrambled unless $pass;
 562        my $pass_len = length($pass);
 564        my @pass_arr = split("", $pass);
 565        my $i;
 566        # from cvs/src/scramble.c
 568        my @shifts = (
 569                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 570                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 571                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 572                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 573                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 574                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 575                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 576                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 577                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 578                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 579                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 580                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 581                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 582                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 583                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 584                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 585        );
 586        for ($i = 0; $i < $pass_len; $i++) {
 588                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 589        }
 590        return $scrambled;
 592}
 593package main;
 595my $cvs = CVSconn->new($opt_d, $cvs_tree);
 597sub pdate($) {
 600        my ($d) = @_;
 601        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 602                or die "Unparseable date: $d\n";
 603        my $y=$1; $y-=1900 if $y>1900;
 604        return timegm($6||0,$5,$4,$3,$2-1,$y);
 605}
 606sub pmode($) {
 608        my ($mode) = @_;
 609        my $m = 0;
 610        my $mm = 0;
 611        my $um = 0;
 612        for my $x(split(//,$mode)) {
 613                if ($x eq ",") {
 614                        $m |= $mm&$um;
 615                        $mm = 0;
 616                        $um = 0;
 617                } elsif ($x eq "u") { $um |= 0700;
 618                } elsif ($x eq "g") { $um |= 0070;
 619                } elsif ($x eq "o") { $um |= 0007;
 620                } elsif ($x eq "r") { $mm |= 0444;
 621                } elsif ($x eq "w") { $mm |= 0222;
 622                } elsif ($x eq "x") { $mm |= 0111;
 623                } elsif ($x eq "=") { # do nothing
 624                } else { die "Unknown mode: $mode\n";
 625                }
 626        }
 627        $m |= $mm&$um;
 628        return $m;
 629}
 630sub getwd() {
 632        my $pwd = `pwd`;
 633        chomp $pwd;
 634        return $pwd;
 635}
 636sub is_sha1 {
 638        my $s = shift;
 639        return $s =~ /^[a-f0-9]{40}$/;
 640}
 641sub get_headref ($) {
 643        my $name = shift;
 644        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 645        return undef unless $? == 0;
 646        chomp $r;
 647        return $r;
 648}
 649my $user_filename_prepend = '';
 651sub munge_user_filename {
 652        my $name = shift;
 653        return File::Spec->file_name_is_absolute($name) ?
 654                $name :
 655                $user_filename_prepend . $name;
 656}
 657-d $git_tree
 659        or mkdir($git_tree,0777)
 660        or die "Could not create $git_tree: $!";
 661if ($git_tree ne '.') {
 662        $user_filename_prepend = getwd() . '/';
 663        chdir($git_tree);
 664}
 665my $last_branch = "";
 667my $orig_branch = "";
 668my %branch_date;
 669my $tip_at_start = undef;
 670my $git_dir = $ENV{"GIT_DIR"} || ".git";
 672$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 673$ENV{"GIT_DIR"} = $git_dir;
 674my $orig_git_index;
 675$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 676my %index; # holds filenames of one index per branch
 678unless (-d $git_dir) {
 680        system(qw(git init));
 681        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 682        system(qw(git read-tree --empty));
 683        die "Cannot init an empty tree: $?\n" if $?;
 684        $last_branch = $opt_o;
 686        $orig_branch = "";
 687} else {
 688        open(F, "-|", qw(git symbolic-ref HEAD)) or
 689                die "Cannot run git symbolic-ref: $!\n";
 690        chomp ($last_branch = <F>);
 691        $last_branch = basename($last_branch);
 692        close(F);
 693        unless ($last_branch) {
 694                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 695                $last_branch = "master";
 696        }
 697        $orig_branch = $last_branch;
 698        $tip_at_start = `git rev-parse --verify HEAD`;
 699        # Get the last import timestamps
 701        my $fmt = '($ref, $author) = (%(refname), %(author));';
 702        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 703        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 704        while (defined(my $entry = <H>)) {
 705                my ($ref, $author);
 706                eval($entry) || die "cannot eval refs list: $@";
 707                my ($head) = ($ref =~ m|^$remote/(.*)|);
 708                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 709                $branch_date{$head} = $1;
 710        }
 711        close(H);
 712        if (!exists $branch_date{$opt_o}) {
 713                die "Branch '$opt_o' does not exist.\n".
 714                       "Either use the correct '-o branch' option,\n".
 715                       "or import to a new repository.\n";
 716        }
 717}
 718-d $git_dir
 720        or die "Could not create git subdir ($git_dir).\n";
 721# now we read (and possibly save) author-info as well
 723-f "$git_dir/cvs-authors" and
 724  read_author_info("$git_dir/cvs-authors");
 725if ($opt_A) {
 726        read_author_info(munge_user_filename($opt_A));
 727        write_author_info("$git_dir/cvs-authors");
 728}
 729# open .git/cvs-revisions, if requested
 731open my $revision_map, '>>', "$git_dir/cvs-revisions"
 732    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 733        if defined $opt_R;
 734#
 737# run cvsps into a file unless we are getting
 738# it passed as a file via $opt_P
 739#
 740my $cvspsfile;
 741unless ($opt_P) {
 742        print "Running cvsps...\n" if $opt_v;
 743        my $pid = open(CVSPS,"-|");
 744        my $cvspsfh;
 745        die "Cannot fork: $!\n" unless defined $pid;
 746        unless ($pid) {
 747                my @opt;
 748                @opt = split(/,/,$opt_p) if defined $opt_p;
 749                unshift @opt, '-z', $opt_z if defined $opt_z;
 750                unshift @opt, '-q'         unless defined $opt_v;
 751                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 752                        push @opt, '--cvs-direct';
 753                }
 754                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 755                die "Could not start cvsps: $!\n";
 756        }
 757        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 758                                          DIR => File::Spec->tmpdir());
 759        while (<CVSPS>) {
 760            print $cvspsfh $_;
 761        }
 762        close CVSPS;
 763        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 764        close $cvspsfh;
 765} else {
 766        $cvspsfile = munge_user_filename($opt_P);
 767}
 768open(CVS, "<$cvspsfile") or die $!;
 770## cvsps output:
 772#---------------------
 773#PatchSet 314
 774#Date: 1999/09/18 13:03:59
 775#Author: wkoch
 776#Branch: STABLE-BRANCH-1-0
 777#Ancestor branch: HEAD
 778#Tag: (none)
 779#Log:
 780#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 781#Members:
 782#       README:1.57->1.57.2.1
 783#       VERSION:1.96->1.96.2.1
 784#
 785#---------------------
 786my $state = 0;
 788sub update_index (\@\@) {
 790        my $old = shift;
 791        my $new = shift;
 792        open(my $fh, '|-', qw(git update-index -z --index-info))
 793                or die "unable to open git update-index: $!";
 794        print $fh
 795                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 796                        @$old),
 797                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 798                        @$new)
 799                or die "unable to write to git update-index: $!";
 800        close $fh
 801                or die "unable to write to git update-index: $!";
 802        $? and die "git update-index reported error: $?";
 803}
 804sub write_tree () {
 806        open(my $fh, '-|', qw(git write-tree))
 807                or die "unable to open git write-tree: $!";
 808        chomp(my $tree = <$fh>);
 809        is_sha1($tree)
 810                or die "Cannot get tree id ($tree): $!";
 811        close($fh)
 812                or die "Error running git write-tree: $?\n";
 813        print "Tree ID $tree\n" if $opt_v;
 814        return $tree;
 815}
 816my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
 818my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 819# commits that cvsps cannot place anywhere...
 821$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 822sub commit {
 824        if ($branch eq $opt_o && !$index{branch} &&
 825                !get_headref("$remote/$branch")) {
 826            # looks like an initial commit
 827            # use the index primed by git init
 828            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 829            $index{$branch} = "$git_dir/index";
 830        } else {
 831            # use an index per branch to speed up
 832            # imports of projects with many branches
 833            unless ($index{$branch}) {
 834                $index{$branch} = tmpnam();
 835                $ENV{GIT_INDEX_FILE} = $index{$branch};
 836                if ($ancestor) {
 837                    system("git", "read-tree", "$remote/$ancestor");
 838                } else {
 839                    system("git", "read-tree", "$remote/$branch");
 840                }
 841                die "read-tree failed: $?\n" if $?;
 842            }
 843        }
 844        $ENV{GIT_INDEX_FILE} = $index{$branch};
 845        update_index(@old, @new);
 847        @old = @new = ();
 848        my $tree = write_tree();
 849        my $parent = get_headref("$remote/$last_branch");
 850        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 851        my @commit_args;
 853        push @commit_args, ("-p", $parent) if $parent;
 854        # loose detection of merges
 856        # based on the commit msg
 857        foreach my $rx (@mergerx) {
 858                next unless $logmsg =~ $rx && $1;
 859                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 860                if (my $sha1 = get_headref("$remote/$mparent")) {
 861                        push @commit_args, '-p', "$remote/$mparent";
 862                        print "Merge parent branch: $mparent\n" if $opt_v;
 863                }
 864        }
 865        set_timezone($author_tz);
 867        my $commit_date = strftime("%s %z", localtime($date));
 868        set_timezone('UTC');
 869        $ENV{GIT_AUTHOR_NAME} = $author_name;
 870        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 871        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 872        $ENV{GIT_COMMITTER_NAME} = $author_name;
 873        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 874        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 875        my $pid = open2(my $commit_read, my $commit_write,
 876                'git', 'commit-tree', $tree, @commit_args);
 877        # compatibility with git2cvs
 879        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 880        $logmsg =~ s/[\s\n]+\z//;
 881        if (@skipped) {
 883            $logmsg .= "\n\n\nSKIPPED:\n\t";
 884            $logmsg .= join("\n\t", @skipped) . "\n";
 885            @skipped = ();
 886        }
 887        print($commit_write "$logmsg\n") && close($commit_write)
 889                or die "Error writing to git commit-tree: $!\n";
 890        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 892        chomp(my $cid = <$commit_read>);
 893        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 894        print "Commit ID $cid\n" if $opt_v;
 895        close($commit_read);
 896        waitpid($pid,0);
 898        die "Error running git commit-tree: $?\n" if $?;
 899        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 901                or die "Cannot write branch $branch for update: $!\n";
 902        if ($revision_map) {
 904                print $revision_map "@$_ $cid\n" for @commit_revisions;
 905        }
 906        @commit_revisions = ();
 907        if ($tag) {
 909                my ($xtag) = $tag;
 910                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 911                $xtag =~ tr/_/\./ if ( $opt_u );
 912                $xtag =~ s/[\/]/$opt_s/g;
 913                # See refs.c for these rules.
 915                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 916                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 917                # Other bad strings for tags:
 918                # (See check_refname_component in refs.c.)
 919                1 while $xtag =~ s/
 920                        (?: \.\.        # Tag cannot contain '..'.
 921                        |   \@{         # Tag cannot contain '@{'.
 922                        | ^ -           # Tag cannot begin with '-'.
 923                        |   \.lock $    # Tag cannot end with '.lock'.
 924                        | ^ \.          # Tag cannot begin...
 925                        |   \. $        # ...or end with '.'
 926                        )//xg;
 927                # Tag cannot be empty.
 928                if ($xtag eq '') {
 929                        warn("warning: ignoring tag '$tag'",
 930                        " with invalid tagname\n");
 931                        return;
 932                }
 933                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 935                        # We did our best to sanitize the tag, but still failed
 936                        # for whatever reason. Bail out, and give the user
 937                        # enough information to understand if/how we should
 938                        # improve the translation in the future.
 939                        if ($tag ne $xtag) {
 940                                print "Translated '$tag' tag to '$xtag'\n";
 941                        }
 942                        die "Cannot create tag $xtag: $!\n";
 943                }
 944                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 946        }
 947};
 948my $commitcount = 1;
 950while (<CVS>) {
 951        chomp;
 952        if ($state == 0 and /^-+$/) {
 953                $state = 1;
 954        } elsif ($state == 0) {
 955                $state = 1;
 956                redo;
 957        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 958                $patchset = 0+$_;
 959                $state=2;
 960        } elsif ($state == 2 and s/^Date:\s+//) {
 961                $date = pdate($_);
 962                unless ($date) {
 963                        print STDERR "Could not parse date: $_\n";
 964                        $state=0;
 965                        next;
 966                }
 967                $state=3;
 968        } elsif ($state == 3 and s/^Author:\s+//) {
 969                $author_tz = "UTC";
 970                s/\s+$//;
 971                if (/^(.*?)\s+<(.*)>/) {
 972                    ($author_name, $author_email) = ($1, $2);
 973                } elsif ($conv_author_name{$_}) {
 974                        $author_name = $conv_author_name{$_};
 975                        $author_email = $conv_author_email{$_};
 976                        $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
 977                } else {
 978                    $author_name = $author_email = $_;
 979                }
 980                $state = 4;
 981        } elsif ($state == 4 and s/^Branch:\s+//) {
 982                s/\s+$//;
 983                tr/_/\./ if ( $opt_u );
 984                s/[\/]/$opt_s/g;
 985                $branch = $_;
 986                $state = 5;
 987        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 988                s/\s+$//;
 989                $ancestor = $_;
 990                $ancestor = $opt_o if $ancestor eq "HEAD";
 991                $state = 6;
 992        } elsif ($state == 5) {
 993                $ancestor = undef;
 994                $state = 6;
 995                redo;
 996        } elsif ($state == 6 and s/^Tag:\s+//) {
 997                s/\s+$//;
 998                if ($_ eq "(none)") {
 999                        $tag = undef;
1000                } else {
1001                        $tag = $_;
1002                }
1003                $state = 7;
1004        } elsif ($state == 7 and /^Log:/) {
1005                $logmsg = "";
1006                $state = 8;
1007        } elsif ($state == 8 and /^Members:/) {
1008                $branch = $opt_o if $branch eq "HEAD";
1009                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1010                        # skip
1011                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1012                        $state = 11;
1013                        next;
1014                }
1015                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1016                        # skip if the commit is too recent
1017                        # given that the cvsps default fuzz is 300s, we give ourselves another
1018                        # 300s just in case -- this also prevents skipping commits
1019                        # due to server clock drift
1020                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
1021                        $state = 11;
1022                        next;
1023                }
1024                if (exists $ignorebranch{$branch}) {
1025                        print STDERR "Skipping $branch\n";
1026                        $state = 11;
1027                        next;
1028                }
1029                if ($ancestor) {
1030                        if ($ancestor eq $branch) {
1031                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1032                                $ancestor = $opt_o;
1033                        }
1034                        if (defined get_headref("$remote/$branch")) {
1035                                print STDERR "Branch $branch already exists!\n";
1036                                $state=11;
1037                                next;
1038                        }
1039                        my $id = get_headref("$remote/$ancestor");
1040                        if (!$id) {
1041                                print STDERR "Branch $ancestor does not exist!\n";
1042                                $ignorebranch{$branch} = 1;
1043                                $state=11;
1044                                next;
1045                        }
1046                        system(qw(git update-ref -m cvsimport),
1048                                "$remote/$branch", $id);
1049                        if($? != 0) {
1050                                print STDERR "Could not create branch $branch\n";
1051                                $ignorebranch{$branch} = 1;
1052                                $state=11;
1053                                next;
1054                        }
1055                }
1056                $last_branch = $branch if $branch ne $last_branch;
1057                $state = 9;
1058        } elsif ($state == 8) {
1059                $logmsg .= "$_\n";
1060        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1061#       VERSION:1.96->1.96.2.1
1062                my $init = ($2 eq "INITIAL");
1063                my $fn = $1;
1064                my $rev = $3;
1065                $fn =~ s#^/+##;
1066                if ($opt_S && $fn =~ m/$opt_S/) {
1067                    print "SKIPPING $fn v $rev\n";
1068                    push(@skipped, $fn);
1069                    next;
1070                }
1071                push @commit_revisions, [$fn, $rev];
1072                print "Fetching $fn   v $rev\n" if $opt_v;
1073                my ($tmpname, $size) = $cvs->file($fn,$rev);
1074                if ($size == -1) {
1075                        push(@old,$fn);
1076                        print "Drop $fn\n" if $opt_v;
1077                } else {
1078                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1079                        my $pid = open(my $F, '-|');
1080                        die $! unless defined $pid;
1081                        if (!$pid) {
1082                            exec("git", "hash-object", "-w", $tmpname)
1083                                or die "Cannot create object: $!\n";
1084                        }
1085                        my $sha = <$F>;
1086                        chomp $sha;
1087                        close $F;
1088                        my $mode = pmode($cvs->{'mode'});
1089                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1090                }
1091                unlink($tmpname);
1092        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1093                my $fn = $1;
1094                my $rev = $2;
1095                $fn =~ s#^/+##;
1096                push @commit_revisions, [$fn, $rev];
1097                push(@old,$fn);
1098                print "Delete $fn\n" if $opt_v;
1099        } elsif ($state == 9 and /^\s*$/) {
1100                $state = 10;
1101        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1102                $commitcount++;
1103                if ($opt_L && $commitcount > $opt_L) {
1104                        last;
1105                }
1106                commit();
1107                if (($commitcount & 1023) == 0) {
1108                        system(qw(git repack -a -d));
1109                }
1110                $state = 1;
1111        } elsif ($state == 11 and /^-+$/) {
1112                $state = 1;
1113        } elsif (/^-+$/) { # end of unknown-line processing
1114                $state = 1;
1115        } elsif ($state != 11) { # ignore stuff when skipping
1116                print STDERR "* UNKNOWN LINE * $_\n";
1117        }
1118}
1119commit() if $branch and $state != 11;
1120unless ($opt_P) {
1122        unlink($cvspsfile);
1123}
1124# The heuristic of repacking every 1024 commits can leave a
1126# lot of unpacked data.  If there is more than 1MB worth of
1127# not-packed objects, repack once more.
1128my $line = `git count-objects`;
1129if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1130  my ($n_objects, $kb) = ($1, $2);
1131  1024 < $kb
1132    and system(qw(git repack -a -d));
1133}
1134foreach my $git_index (values %index) {
1136    if ($git_index ne "$git_dir/index") {
1137        unlink($git_index);
1138    }
1139}
1140if (defined $orig_git_index) {
1142        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1143} else {
1144        delete $ENV{GIT_INDEX_FILE};
1145}
1146# Now switch back to the branch we were in before all of this happened
1148if ($orig_branch) {
1149        print "DONE.\n" if $opt_v;
1150        if ($opt_i) {
1151                exit 0;
1152        }
1153        my $tip_at_end = `git rev-parse --verify HEAD`;
1154        if ($tip_at_start ne $tip_at_end) {
1155                for ($tip_at_start, $tip_at_end) { chomp; }
1156                print "Fetched into the current branch.\n" if $opt_v;
1157                system(qw(git read-tree -u -m),
1158                       $tip_at_start, $tip_at_end);
1159                die "Fast-forward update failed: $?\n" if $?;
1160        }
1161        else {
1162                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1163                die "Could not merge $opt_o into the current branch.\n" if $?;
1164        }
1165} else {
1166        $orig_branch = "master";
1167        print "DONE; creating $orig_branch branch\n" if $opt_v;
1168        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1169                unless defined get_headref('refs/heads/master');
1170        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1171                if ($opt_r && $opt_o ne 'HEAD');
1172        system('git', 'update-ref', 'HEAD', "$orig_branch");
1173        unless ($opt_i) {
1174                system(qw(git checkout -f));
1175                die "checkout failed: $?\n" if $?;
1176        }
1177}