ceb119d89104648c37bf0bcd770acadf796cd9d1
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email, %conv_author_tz);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # or with an optional timezone:
  63                #   spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
  64                elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
  65                        $user = $1;
  66                        $conv_author_name{$user} = $2;
  67                        $conv_author_email{$user} = $3;
  68                        $conv_author_tz{$user} = $4;
  69                }
  70                # However, we also read from CVSROOT/users format
  71                # to ease migration.
  72                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  73                        my $mapped;
  74                        ($user, $mapped) = ($1, $3);
  75                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  76                                $conv_author_name{$user} = $1;
  77                                $conv_author_email{$user} = $2;
  78                        }
  79                        elsif ($mapped =~ /^<?(.*)>?$/) {
  80                                $conv_author_name{$user} = $user;
  81                                $conv_author_email{$user} = $1;
  82                        }
  83                }
  84                # NEEDSWORK: Maybe warn on unrecognized lines?
  85        }
  86        close ($f);
  87}
  88
  89sub write_author_info($) {
  90        my ($file) = @_;
  91        open my $f, '>', $file or
  92          die("Failed to open $file for writing: $!");
  93
  94        foreach (keys %conv_author_name) {
  95                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
  96                print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
  97                print $f "\n";
  98        }
  99        close ($f);
 100}
 101
 102# convert getopts specs for use by git config
 103my %longmap = (
 104        'A:' => 'authors-file',
 105        'M:' => 'merge-regex',
 106        'P:' => undef,
 107        'R' => 'track-revisions',
 108        'S:' => 'ignore-paths',
 109);
 110
 111sub read_repo_config {
 112        # Split the string between characters, unless there is a ':'
 113        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 114        my @opts = split(/ *(?!:)/, shift);
 115        foreach my $o (@opts) {
 116                my $key = $o;
 117                $key =~ s/://g;
 118                my $arg = 'git config';
 119                $arg .= ' --bool' if ($o !~ /:$/);
 120                my $ckey = $key;
 121
 122                if (exists $longmap{$o}) {
 123                        # An uppercase option like -R cannot be
 124                        # expressed in the configuration, as the
 125                        # variable names are downcased.
 126                        $ckey = $longmap{$o};
 127                        next if (! defined $ckey);
 128                        $ckey =~ s/-//g;
 129                }
 130                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 131                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 132                        no strict 'refs';
 133                        my $opt_name = "opt_" . $key;
 134                        if (!$$opt_name) {
 135                                $$opt_name = $tmp;
 136                        }
 137                }
 138        }
 139}
 140
 141my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 142read_repo_config($opts);
 143Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 144
 145# turn the Getopt::Std specification in a Getopt::Long one,
 146# with support for multiple -M options
 147GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 148    or usage();
 149usage if $opt_h;
 150
 151if (@ARGV == 0) {
 152                chomp(my $module = `git config --get cvsimport.module`);
 153                push(@ARGV, $module) if $? == 0;
 154}
 155@ARGV <= 1 or usage("You can't specify more than one CVS module");
 156
 157if ($opt_d) {
 158        $ENV{"CVSROOT"} = $opt_d;
 159} elsif (-f 'CVS/Root') {
 160        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 161        $opt_d = <$f>;
 162        chomp $opt_d;
 163        close $f;
 164        $ENV{"CVSROOT"} = $opt_d;
 165} elsif ($ENV{"CVSROOT"}) {
 166        $opt_d = $ENV{"CVSROOT"};
 167} else {
 168        usage("CVSROOT needs to be set");
 169}
 170$opt_s ||= "-";
 171$opt_a ||= 0;
 172
 173my $git_tree = $opt_C;
 174$git_tree ||= ".";
 175
 176my $remote;
 177if (defined $opt_r) {
 178        $remote = 'refs/remotes/' . $opt_r;
 179        $opt_o ||= "master";
 180} else {
 181        $opt_o ||= "origin";
 182        $remote = 'refs/heads';
 183}
 184
 185my $cvs_tree;
 186if ($#ARGV == 0) {
 187        $cvs_tree = $ARGV[0];
 188} elsif (-f 'CVS/Repository') {
 189        open my $f, '<', 'CVS/Repository' or
 190            die 'Failed to open CVS/Repository';
 191        $cvs_tree = <$f>;
 192        chomp $cvs_tree;
 193        close $f;
 194} else {
 195        usage("CVS module has to be specified");
 196}
 197
 198our @mergerx = ();
 199if ($opt_m) {
 200        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 201}
 202if (@opt_M) {
 203        push (@mergerx, map { qr/$_/ } @opt_M);
 204}
 205
 206# Remember UTC of our starting time
 207# we'll want to avoid importing commits
 208# that are too recent
 209our $starttime = time();
 210
 211select(STDERR); $|=1; select(STDOUT);
 212
 213
 214package CVSconn;
 215# Basic CVS dialog.
 216# We're only interested in connecting and downloading, so ...
 217
 218use File::Spec;
 219use File::Temp qw(tempfile);
 220use POSIX qw(strftime dup2);
 221
 222sub new {
 223        my ($what,$repo,$subdir) = @_;
 224        $what=ref($what) if ref($what);
 225
 226        my $self = {};
 227        $self->{'buffer'} = "";
 228        bless($self,$what);
 229
 230        $repo =~ s#/+$##;
 231        $self->{'fullrep'} = $repo;
 232        $self->conn();
 233
 234        $self->{'subdir'} = $subdir;
 235        $self->{'lines'} = undef;
 236
 237        return $self;
 238}
 239
 240sub find_password_entry {
 241        my ($cvspass, @cvsroot) = @_;
 242        my ($file, $delim) = @$cvspass;
 243        my $pass;
 244        local ($_);
 245
 246        if (open(my $fh, $file)) {
 247                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 248                CVSPASSFILE:
 249                while (<$fh>) {
 250                        chomp;
 251                        s/^\/\d+\s+//;
 252                        my ($w, $p) = split($delim,$_,2);
 253                        for my $cvsroot (@cvsroot) {
 254                                if ($w eq $cvsroot) {
 255                                        $pass = $p;
 256                                        last CVSPASSFILE;
 257                                }
 258                        }
 259                }
 260                close($fh);
 261        }
 262        return $pass;
 263}
 264
 265sub conn {
 266        my $self = shift;
 267        my $repo = $self->{'fullrep'};
 268        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 269                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 270
 271                my ($proxyhost,$proxyport);
 272                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 273                        $proxyhost = $1;
 274                        # Default proxyport, if not specified, is 8080.
 275                        $proxyport = 8080;
 276                        if ($ENV{"CVS_PROXY_PORT"}) {
 277                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 278                        }
 279                        if ($param =~ m/proxyport=([^;]+)/) {
 280                                $proxyport = $1;
 281                        }
 282                }
 283                $repo ||= '/';
 284
 285                # if username is not explicit in CVSROOT, then use current user, as cvs would
 286                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 287                my $rr2 = "-";
 288                unless ($port) {
 289                        $rr2 = ":pserver:$user\@$serv:$repo";
 290                        $port=2401;
 291                }
 292                my $rr = ":pserver:$user\@$serv:$port$repo";
 293
 294                if ($pass) {
 295                        $pass = $self->_scramble($pass);
 296                } else {
 297                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 298                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 299                        my @loc = ();
 300                        foreach my $cvspass (@cvspass) {
 301                                my $p = find_password_entry($cvspass, $rr, $rr2);
 302                                if ($p) {
 303                                        push @loc, $cvspass->[0];
 304                                        $pass = $p;
 305                                }
 306                        }
 307
 308                        if (1 < @loc) {
 309                                die("Multiple cvs password files have ".
 310                                    "entries for CVSROOT $opt_d: @loc");
 311                        } elsif (!$pass) {
 312                                $pass = "A";
 313                        }
 314                }
 315
 316                my ($s, $rep);
 317                if ($proxyhost) {
 318
 319                        # Use a HTTP Proxy. Only works for HTTP proxies that
 320                        # don't require user authentication
 321                        #
 322                        # See: http://www.ietf.org/rfc/rfc2817.txt
 323
 324                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 325                        die "Socket to $proxyhost: $!\n" unless defined $s;
 326                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 327                                or die "Write to $proxyhost: $!\n";
 328                        $s->flush();
 329
 330                        $rep = <$s>;
 331
 332                        # The answer should look like 'HTTP/1.x 2yy ....'
 333                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 334                                die "Proxy connect: $rep\n";
 335                        }
 336                        # Skip up to the empty line of the proxy server output
 337                        # including the response headers.
 338                        while ($rep = <$s>) {
 339                                last if (!defined $rep ||
 340                                         $rep eq "\n" ||
 341                                         $rep eq "\r\n");
 342                        }
 343                } else {
 344                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 345                        die "Socket to $serv: $!\n" unless defined $s;
 346                }
 347
 348                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 349                        or die "Write to $serv: $!\n";
 350                $s->flush();
 351
 352                $rep = <$s>;
 353
 354                if ($rep ne "I LOVE YOU\n") {
 355                        $rep="<unknown>" unless $rep;
 356                        die "AuthReply: $rep\n";
 357                }
 358                $self->{'socketo'} = $s;
 359                $self->{'socketi'} = $s;
 360        } else { # local or ext: Fork off our own cvs server.
 361                my $pr = IO::Pipe->new();
 362                my $pw = IO::Pipe->new();
 363                my $pid = fork();
 364                die "Fork: $!\n" unless defined $pid;
 365                my $cvs = 'cvs';
 366                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 367                my $rsh = 'rsh';
 368                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 369
 370                my @cvs = ($cvs, 'server');
 371                my ($local, $user, $host);
 372                $local = $repo =~ s/:local://;
 373                if (!$local) {
 374                    $repo =~ s/:ext://;
 375                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 376                    ($user, $host) = ($1, $2);
 377                }
 378                if (!$local) {
 379                    if ($user) {
 380                        unshift @cvs, $rsh, '-l', $user, $host;
 381                    } else {
 382                        unshift @cvs, $rsh, $host;
 383                    }
 384                }
 385
 386                unless ($pid) {
 387                        $pr->writer();
 388                        $pw->reader();
 389                        dup2($pw->fileno(),0);
 390                        dup2($pr->fileno(),1);
 391                        $pr->close();
 392                        $pw->close();
 393                        exec(@cvs);
 394                }
 395                $pw->writer();
 396                $pr->reader();
 397                $self->{'socketo'} = $pw;
 398                $self->{'socketi'} = $pr;
 399        }
 400        $self->{'socketo'}->write("Root $repo\n");
 401
 402        # Trial and error says that this probably is the minimum set
 403        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 404
 405        $self->{'socketo'}->write("valid-requests\n");
 406        $self->{'socketo'}->flush();
 407
 408        my $rep=$self->readline();
 409        die "Failed to read from server" unless defined $rep;
 410        chomp($rep);
 411        if ($rep !~ s/^Valid-requests\s*//) {
 412                $rep="<unknown>" unless $rep;
 413                die "Expected Valid-requests from server, but got: $rep\n";
 414        }
 415        chomp(my $res=$self->readline());
 416        die "validReply: $res\n" if $res ne "ok";
 417
 418        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 419        $self->{'repo'} = $repo;
 420}
 421
 422sub readline {
 423        my ($self) = @_;
 424        return $self->{'socketi'}->getline();
 425}
 426
 427sub _file {
 428        # Request a file with a given revision.
 429        # Trial and error says this is a good way to do it. :-/
 430        my ($self,$fn,$rev) = @_;
 431        $self->{'socketo'}->write("Argument -N\n") or return undef;
 432        $self->{'socketo'}->write("Argument -P\n") or return undef;
 433        # -kk: Linus' version doesn't use it - defaults to off
 434        if ($opt_k) {
 435            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 436        }
 437        $self->{'socketo'}->write("Argument -r\n") or return undef;
 438        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 439        $self->{'socketo'}->write("Argument --\n") or return undef;
 440        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 441        $self->{'socketo'}->write("Directory .\n") or return undef;
 442        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 443        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 444        $self->{'socketo'}->write("co\n") or return undef;
 445        $self->{'socketo'}->flush() or return undef;
 446        $self->{'lines'} = 0;
 447        return 1;
 448}
 449sub _line {
 450        # Read a line from the server.
 451        # ... except that 'line' may be an entire file. ;-)
 452        my ($self, $fh) = @_;
 453        die "Not in lines" unless defined $self->{'lines'};
 454
 455        my $line;
 456        my $res=0;
 457        while (defined($line = $self->readline())) {
 458                # M U gnupg-cvs-rep/AUTHORS
 459                # Updated gnupg-cvs-rep/
 460                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 461                # /AUTHORS/1.1///T1.1
 462                # u=rw,g=rw,o=rw
 463                # 0
 464                # ok
 465
 466                if ($line =~ s/^(?:Created|Updated) //) {
 467                        $line = $self->readline(); # path
 468                        $line = $self->readline(); # Entries line
 469                        my $mode = $self->readline(); chomp $mode;
 470                        $self->{'mode'} = $mode;
 471                        defined (my $cnt = $self->readline())
 472                                or die "EOF from server after 'Changed'\n";
 473                        chomp $cnt;
 474                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 475                        $line="";
 476                        $res = $self->_fetchfile($fh, $cnt);
 477                } elsif ($line =~ s/^ //) {
 478                        print $fh $line;
 479                        $res += length($line);
 480                } elsif ($line =~ /^M\b/) {
 481                        # output, do nothing
 482                } elsif ($line =~ /^Mbinary\b/) {
 483                        my $cnt;
 484                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 485                        chomp $cnt;
 486                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 487                        $line="";
 488                        $res += $self->_fetchfile($fh, $cnt);
 489                } else {
 490                        chomp $line;
 491                        if ($line eq "ok") {
 492                                # print STDERR "S: ok (".length($res).")\n";
 493                                return $res;
 494                        } elsif ($line =~ s/^E //) {
 495                                # print STDERR "S: $line\n";
 496                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 497                                $line = $self->readline(); # filename
 498                                $line = $self->readline(); # OK
 499                                chomp $line;
 500                                die "Unknown: $line" if $line ne "ok";
 501                                return -1;
 502                        } else {
 503                                die "Unknown: $line\n";
 504                        }
 505                }
 506        }
 507        return undef;
 508}
 509sub file {
 510        my ($self,$fn,$rev) = @_;
 511        my $res;
 512
 513        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 514                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 515
 516        $self->_file($fn,$rev) and $res = $self->_line($fh);
 517
 518        if (!defined $res) {
 519            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 520            truncate $fh, 0;
 521            $self->conn();
 522            $self->_file($fn,$rev) or die "No file command send";
 523            $res = $self->_line($fh);
 524            die "Retry failed" unless defined $res;
 525        }
 526        close ($fh);
 527
 528        return ($name, $res);
 529}
 530sub _fetchfile {
 531        my ($self, $fh, $cnt) = @_;
 532        my $res = 0;
 533        my $bufsize = 1024 * 1024;
 534        while ($cnt) {
 535            if ($bufsize > $cnt) {
 536                $bufsize = $cnt;
 537            }
 538            my $buf;
 539            my $num = $self->{'socketi'}->read($buf,$bufsize);
 540            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 541            print $fh $buf;
 542            $res += $num;
 543            $cnt -= $num;
 544        }
 545        return $res;
 546}
 547
 548sub _scramble {
 549        my ($self, $pass) = @_;
 550        my $scrambled = "A";
 551
 552        return $scrambled unless $pass;
 553
 554        my $pass_len = length($pass);
 555        my @pass_arr = split("", $pass);
 556        my $i;
 557
 558        # from cvs/src/scramble.c
 559        my @shifts = (
 560                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 561                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 562                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 563                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 564                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 565                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 566                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 567                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 568                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 569                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 570                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 571                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 572                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 573                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 574                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 575                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 576        );
 577
 578        for ($i = 0; $i < $pass_len; $i++) {
 579                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 580        }
 581
 582        return $scrambled;
 583}
 584
 585package main;
 586
 587my $cvs = CVSconn->new($opt_d, $cvs_tree);
 588
 589
 590sub pdate($) {
 591        my ($d) = @_;
 592        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 593                or die "Unparseable date: $d\n";
 594        my $y=$1; $y-=1900 if $y>1900;
 595        return timegm($6||0,$5,$4,$3,$2-1,$y);
 596}
 597
 598sub pmode($) {
 599        my ($mode) = @_;
 600        my $m = 0;
 601        my $mm = 0;
 602        my $um = 0;
 603        for my $x(split(//,$mode)) {
 604                if ($x eq ",") {
 605                        $m |= $mm&$um;
 606                        $mm = 0;
 607                        $um = 0;
 608                } elsif ($x eq "u") { $um |= 0700;
 609                } elsif ($x eq "g") { $um |= 0070;
 610                } elsif ($x eq "o") { $um |= 0007;
 611                } elsif ($x eq "r") { $mm |= 0444;
 612                } elsif ($x eq "w") { $mm |= 0222;
 613                } elsif ($x eq "x") { $mm |= 0111;
 614                } elsif ($x eq "=") { # do nothing
 615                } else { die "Unknown mode: $mode\n";
 616                }
 617        }
 618        $m |= $mm&$um;
 619        return $m;
 620}
 621
 622sub getwd() {
 623        my $pwd = `pwd`;
 624        chomp $pwd;
 625        return $pwd;
 626}
 627
 628sub is_sha1 {
 629        my $s = shift;
 630        return $s =~ /^[a-f0-9]{40}$/;
 631}
 632
 633sub get_headref ($) {
 634        my $name = shift;
 635        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 636        return undef unless $? == 0;
 637        chomp $r;
 638        return $r;
 639}
 640
 641my $user_filename_prepend = '';
 642sub munge_user_filename {
 643        my $name = shift;
 644        return File::Spec->file_name_is_absolute($name) ?
 645                $name :
 646                $user_filename_prepend . $name;
 647}
 648
 649-d $git_tree
 650        or mkdir($git_tree,0777)
 651        or die "Could not create $git_tree: $!";
 652if ($git_tree ne '.') {
 653        $user_filename_prepend = getwd() . '/';
 654        chdir($git_tree);
 655}
 656
 657my $last_branch = "";
 658my $orig_branch = "";
 659my %branch_date;
 660my $tip_at_start = undef;
 661
 662my $git_dir = $ENV{"GIT_DIR"} || ".git";
 663$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 664$ENV{"GIT_DIR"} = $git_dir;
 665my $orig_git_index;
 666$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 667
 668my %index; # holds filenames of one index per branch
 669
 670unless (-d $git_dir) {
 671        system(qw(git init));
 672        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 673        system(qw(git read-tree --empty));
 674        die "Cannot init an empty tree: $?\n" if $?;
 675
 676        $last_branch = $opt_o;
 677        $orig_branch = "";
 678} else {
 679        open(F, "-|", qw(git symbolic-ref HEAD)) or
 680                die "Cannot run git symbolic-ref: $!\n";
 681        chomp ($last_branch = <F>);
 682        $last_branch = basename($last_branch);
 683        close(F);
 684        unless ($last_branch) {
 685                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 686                $last_branch = "master";
 687        }
 688        $orig_branch = $last_branch;
 689        $tip_at_start = `git rev-parse --verify HEAD`;
 690
 691        # Get the last import timestamps
 692        my $fmt = '($ref, $author) = (%(refname), %(author));';
 693        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 694        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 695        while (defined(my $entry = <H>)) {
 696                my ($ref, $author);
 697                eval($entry) || die "cannot eval refs list: $@";
 698                my ($head) = ($ref =~ m|^$remote/(.*)|);
 699                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 700                $branch_date{$head} = $1;
 701        }
 702        close(H);
 703        if (!exists $branch_date{$opt_o}) {
 704                die "Branch '$opt_o' does not exist.\n".
 705                       "Either use the correct '-o branch' option,\n".
 706                       "or import to a new repository.\n";
 707        }
 708}
 709
 710-d $git_dir
 711        or die "Could not create git subdir ($git_dir).\n";
 712
 713# now we read (and possibly save) author-info as well
 714-f "$git_dir/cvs-authors" and
 715  read_author_info("$git_dir/cvs-authors");
 716if ($opt_A) {
 717        read_author_info(munge_user_filename($opt_A));
 718        write_author_info("$git_dir/cvs-authors");
 719}
 720
 721# open .git/cvs-revisions, if requested
 722open my $revision_map, '>>', "$git_dir/cvs-revisions"
 723    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 724        if defined $opt_R;
 725
 726
 727#
 728# run cvsps into a file unless we are getting
 729# it passed as a file via $opt_P
 730#
 731my $cvspsfile;
 732unless ($opt_P) {
 733        print "Running cvsps...\n" if $opt_v;
 734        my $pid = open(CVSPS,"-|");
 735        my $cvspsfh;
 736        die "Cannot fork: $!\n" unless defined $pid;
 737        unless ($pid) {
 738                my @opt;
 739                @opt = split(/,/,$opt_p) if defined $opt_p;
 740                unshift @opt, '-z', $opt_z if defined $opt_z;
 741                unshift @opt, '-q'         unless defined $opt_v;
 742                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 743                        push @opt, '--cvs-direct';
 744                }
 745                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 746                die "Could not start cvsps: $!\n";
 747        }
 748        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 749                                          DIR => File::Spec->tmpdir());
 750        while (<CVSPS>) {
 751            print $cvspsfh $_;
 752        }
 753        close CVSPS;
 754        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 755        close $cvspsfh;
 756} else {
 757        $cvspsfile = munge_user_filename($opt_P);
 758}
 759
 760open(CVS, "<$cvspsfile") or die $!;
 761
 762## cvsps output:
 763#---------------------
 764#PatchSet 314
 765#Date: 1999/09/18 13:03:59
 766#Author: wkoch
 767#Branch: STABLE-BRANCH-1-0
 768#Ancestor branch: HEAD
 769#Tag: (none)
 770#Log:
 771#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 772#Members:
 773#       README:1.57->1.57.2.1
 774#       VERSION:1.96->1.96.2.1
 775#
 776#---------------------
 777
 778my $state = 0;
 779
 780sub update_index (\@\@) {
 781        my $old = shift;
 782        my $new = shift;
 783        open(my $fh, '|-', qw(git update-index -z --index-info))
 784                or die "unable to open git update-index: $!";
 785        print $fh
 786                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 787                        @$old),
 788                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 789                        @$new)
 790                or die "unable to write to git update-index: $!";
 791        close $fh
 792                or die "unable to write to git update-index: $!";
 793        $? and die "git update-index reported error: $?";
 794}
 795
 796sub write_tree () {
 797        open(my $fh, '-|', qw(git write-tree))
 798                or die "unable to open git write-tree: $!";
 799        chomp(my $tree = <$fh>);
 800        is_sha1($tree)
 801                or die "Cannot get tree id ($tree): $!";
 802        close($fh)
 803                or die "Error running git write-tree: $?\n";
 804        print "Tree ID $tree\n" if $opt_v;
 805        return $tree;
 806}
 807
 808my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
 809my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 810
 811# commits that cvsps cannot place anywhere...
 812$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 813
 814sub commit {
 815        if ($branch eq $opt_o && !$index{branch} &&
 816                !get_headref("$remote/$branch")) {
 817            # looks like an initial commit
 818            # use the index primed by git init
 819            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 820            $index{$branch} = "$git_dir/index";
 821        } else {
 822            # use an index per branch to speed up
 823            # imports of projects with many branches
 824            unless ($index{$branch}) {
 825                $index{$branch} = tmpnam();
 826                $ENV{GIT_INDEX_FILE} = $index{$branch};
 827                if ($ancestor) {
 828                    system("git", "read-tree", "$remote/$ancestor");
 829                } else {
 830                    system("git", "read-tree", "$remote/$branch");
 831                }
 832                die "read-tree failed: $?\n" if $?;
 833            }
 834        }
 835        $ENV{GIT_INDEX_FILE} = $index{$branch};
 836
 837        update_index(@old, @new);
 838        @old = @new = ();
 839        my $tree = write_tree();
 840        my $parent = get_headref("$remote/$last_branch");
 841        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 842
 843        my @commit_args;
 844        push @commit_args, ("-p", $parent) if $parent;
 845
 846        # loose detection of merges
 847        # based on the commit msg
 848        foreach my $rx (@mergerx) {
 849                next unless $logmsg =~ $rx && $1;
 850                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 851                if (my $sha1 = get_headref("$remote/$mparent")) {
 852                        push @commit_args, '-p', "$remote/$mparent";
 853                        print "Merge parent branch: $mparent\n" if $opt_v;
 854                }
 855        }
 856
 857        $ENV{'TZ'}=$author_tz;
 858        my $commit_date = strftime("%s %z", localtime($date));
 859        $ENV{'TZ'}="UTC";
 860        $ENV{GIT_AUTHOR_NAME} = $author_name;
 861        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 862        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 863        $ENV{GIT_COMMITTER_NAME} = $author_name;
 864        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 865        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 866        my $pid = open2(my $commit_read, my $commit_write,
 867                'git', 'commit-tree', $tree, @commit_args);
 868
 869        # compatibility with git2cvs
 870        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 871        $logmsg =~ s/[\s\n]+\z//;
 872
 873        if (@skipped) {
 874            $logmsg .= "\n\n\nSKIPPED:\n\t";
 875            $logmsg .= join("\n\t", @skipped) . "\n";
 876            @skipped = ();
 877        }
 878
 879        print($commit_write "$logmsg\n") && close($commit_write)
 880                or die "Error writing to git commit-tree: $!\n";
 881
 882        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 883        chomp(my $cid = <$commit_read>);
 884        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 885        print "Commit ID $cid\n" if $opt_v;
 886        close($commit_read);
 887
 888        waitpid($pid,0);
 889        die "Error running git commit-tree: $?\n" if $?;
 890
 891        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 892                or die "Cannot write branch $branch for update: $!\n";
 893
 894        if ($revision_map) {
 895                print $revision_map "@$_ $cid\n" for @commit_revisions;
 896        }
 897        @commit_revisions = ();
 898
 899        if ($tag) {
 900                my ($xtag) = $tag;
 901                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 902                $xtag =~ tr/_/\./ if ( $opt_u );
 903                $xtag =~ s/[\/]/$opt_s/g;
 904
 905                # See refs.c for these rules.
 906                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 907                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 908                # Other bad strings for tags:
 909                # (See check_refname_component in refs.c.)
 910                1 while $xtag =~ s/
 911                        (?: \.\.        # Tag cannot contain '..'.
 912                        |   \@{         # Tag cannot contain '@{'.
 913                        | ^ -           # Tag cannot begin with '-'.
 914                        |   \.lock $    # Tag cannot end with '.lock'.
 915                        | ^ \.          # Tag cannot begin...
 916                        |   \. $        # ...or end with '.'
 917                        )//xg;
 918                # Tag cannot be empty.
 919                if ($xtag eq '') {
 920                        warn("warning: ignoring tag '$tag'",
 921                        " with invalid tagname\n");
 922                        return;
 923                }
 924
 925                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 926                        # We did our best to sanitize the tag, but still failed
 927                        # for whatever reason. Bail out, and give the user
 928                        # enough information to understand if/how we should
 929                        # improve the translation in the future.
 930                        if ($tag ne $xtag) {
 931                                print "Translated '$tag' tag to '$xtag'\n";
 932                        }
 933                        die "Cannot create tag $xtag: $!\n";
 934                }
 935
 936                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 937        }
 938};
 939
 940my $commitcount = 1;
 941while (<CVS>) {
 942        chomp;
 943        if ($state == 0 and /^-+$/) {
 944                $state = 1;
 945        } elsif ($state == 0) {
 946                $state = 1;
 947                redo;
 948        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 949                $patchset = 0+$_;
 950                $state=2;
 951        } elsif ($state == 2 and s/^Date:\s+//) {
 952                $date = pdate($_);
 953                unless ($date) {
 954                        print STDERR "Could not parse date: $_\n";
 955                        $state=0;
 956                        next;
 957                }
 958                $state=3;
 959        } elsif ($state == 3 and s/^Author:\s+//) {
 960                $author_tz = "UTC";
 961                s/\s+$//;
 962                if (/^(.*?)\s+<(.*)>/) {
 963                    ($author_name, $author_email) = ($1, $2);
 964                } elsif ($conv_author_name{$_}) {
 965                        $author_name = $conv_author_name{$_};
 966                        $author_email = $conv_author_email{$_};
 967                        $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
 968                } else {
 969                    $author_name = $author_email = $_;
 970                }
 971                $state = 4;
 972        } elsif ($state == 4 and s/^Branch:\s+//) {
 973                s/\s+$//;
 974                tr/_/\./ if ( $opt_u );
 975                s/[\/]/$opt_s/g;
 976                $branch = $_;
 977                $state = 5;
 978        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 979                s/\s+$//;
 980                $ancestor = $_;
 981                $ancestor = $opt_o if $ancestor eq "HEAD";
 982                $state = 6;
 983        } elsif ($state == 5) {
 984                $ancestor = undef;
 985                $state = 6;
 986                redo;
 987        } elsif ($state == 6 and s/^Tag:\s+//) {
 988                s/\s+$//;
 989                if ($_ eq "(none)") {
 990                        $tag = undef;
 991                } else {
 992                        $tag = $_;
 993                }
 994                $state = 7;
 995        } elsif ($state == 7 and /^Log:/) {
 996                $logmsg = "";
 997                $state = 8;
 998        } elsif ($state == 8 and /^Members:/) {
 999                $branch = $opt_o if $branch eq "HEAD";
1000                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1001                        # skip
1002                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1003                        $state = 11;
1004                        next;
1005                }
1006                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1007                        # skip if the commit is too recent
1008                        # given that the cvsps default fuzz is 300s, we give ourselves another
1009                        # 300s just in case -- this also prevents skipping commits
1010                        # due to server clock drift
1011                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
1012                        $state = 11;
1013                        next;
1014                }
1015                if (exists $ignorebranch{$branch}) {
1016                        print STDERR "Skipping $branch\n";
1017                        $state = 11;
1018                        next;
1019                }
1020                if ($ancestor) {
1021                        if ($ancestor eq $branch) {
1022                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1023                                $ancestor = $opt_o;
1024                        }
1025                        if (defined get_headref("$remote/$branch")) {
1026                                print STDERR "Branch $branch already exists!\n";
1027                                $state=11;
1028                                next;
1029                        }
1030                        my $id = get_headref("$remote/$ancestor");
1031                        if (!$id) {
1032                                print STDERR "Branch $ancestor does not exist!\n";
1033                                $ignorebranch{$branch} = 1;
1034                                $state=11;
1035                                next;
1036                        }
1037
1038                        system(qw(git update-ref -m cvsimport),
1039                                "$remote/$branch", $id);
1040                        if($? != 0) {
1041                                print STDERR "Could not create branch $branch\n";
1042                                $ignorebranch{$branch} = 1;
1043                                $state=11;
1044                                next;
1045                        }
1046                }
1047                $last_branch = $branch if $branch ne $last_branch;
1048                $state = 9;
1049        } elsif ($state == 8) {
1050                $logmsg .= "$_\n";
1051        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1052#       VERSION:1.96->1.96.2.1
1053                my $init = ($2 eq "INITIAL");
1054                my $fn = $1;
1055                my $rev = $3;
1056                $fn =~ s#^/+##;
1057                if ($opt_S && $fn =~ m/$opt_S/) {
1058                    print "SKIPPING $fn v $rev\n";
1059                    push(@skipped, $fn);
1060                    next;
1061                }
1062                push @commit_revisions, [$fn, $rev];
1063                print "Fetching $fn   v $rev\n" if $opt_v;
1064                my ($tmpname, $size) = $cvs->file($fn,$rev);
1065                if ($size == -1) {
1066                        push(@old,$fn);
1067                        print "Drop $fn\n" if $opt_v;
1068                } else {
1069                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1070                        my $pid = open(my $F, '-|');
1071                        die $! unless defined $pid;
1072                        if (!$pid) {
1073                            exec("git", "hash-object", "-w", $tmpname)
1074                                or die "Cannot create object: $!\n";
1075                        }
1076                        my $sha = <$F>;
1077                        chomp $sha;
1078                        close $F;
1079                        my $mode = pmode($cvs->{'mode'});
1080                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1081                }
1082                unlink($tmpname);
1083        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1084                my $fn = $1;
1085                my $rev = $2;
1086                $fn =~ s#^/+##;
1087                push @commit_revisions, [$fn, $rev];
1088                push(@old,$fn);
1089                print "Delete $fn\n" if $opt_v;
1090        } elsif ($state == 9 and /^\s*$/) {
1091                $state = 10;
1092        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1093                $commitcount++;
1094                if ($opt_L && $commitcount > $opt_L) {
1095                        last;
1096                }
1097                commit();
1098                if (($commitcount & 1023) == 0) {
1099                        system(qw(git repack -a -d));
1100                }
1101                $state = 1;
1102        } elsif ($state == 11 and /^-+$/) {
1103                $state = 1;
1104        } elsif (/^-+$/) { # end of unknown-line processing
1105                $state = 1;
1106        } elsif ($state != 11) { # ignore stuff when skipping
1107                print STDERR "* UNKNOWN LINE * $_\n";
1108        }
1109}
1110commit() if $branch and $state != 11;
1111
1112unless ($opt_P) {
1113        unlink($cvspsfile);
1114}
1115
1116# The heuristic of repacking every 1024 commits can leave a
1117# lot of unpacked data.  If there is more than 1MB worth of
1118# not-packed objects, repack once more.
1119my $line = `git count-objects`;
1120if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1121  my ($n_objects, $kb) = ($1, $2);
1122  1024 < $kb
1123    and system(qw(git repack -a -d));
1124}
1125
1126foreach my $git_index (values %index) {
1127    if ($git_index ne "$git_dir/index") {
1128        unlink($git_index);
1129    }
1130}
1131
1132if (defined $orig_git_index) {
1133        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1134} else {
1135        delete $ENV{GIT_INDEX_FILE};
1136}
1137
1138# Now switch back to the branch we were in before all of this happened
1139if ($orig_branch) {
1140        print "DONE.\n" if $opt_v;
1141        if ($opt_i) {
1142                exit 0;
1143        }
1144        my $tip_at_end = `git rev-parse --verify HEAD`;
1145        if ($tip_at_start ne $tip_at_end) {
1146                for ($tip_at_start, $tip_at_end) { chomp; }
1147                print "Fetched into the current branch.\n" if $opt_v;
1148                system(qw(git read-tree -u -m),
1149                       $tip_at_start, $tip_at_end);
1150                die "Fast-forward update failed: $?\n" if $?;
1151        }
1152        else {
1153                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1154                die "Could not merge $opt_o into the current branch.\n" if $?;
1155        }
1156} else {
1157        $orig_branch = "master";
1158        print "DONE; creating $orig_branch branch\n" if $opt_v;
1159        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1160                unless defined get_headref('refs/heads/master');
1161        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1162                if ($opt_r && $opt_o ne 'HEAD');
1163        system('git', 'update-ref', 'HEAD', "$orig_branch");
1164        unless ($opt_i) {
1165                system(qw(git checkout -f));
1166                die "checkout failed: $?\n" if $?;
1167        }
1168}