git-cvsimport.perlon commit fast-import: make hash-size independent (28d055b)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime tzset dup2 ENOENT);
  28use IPC::Open2;
  29use Git qw(get_tz_offset);
  30
  31$SIG{'PIPE'}="IGNORE";
  32set_timezone('UTC');
  33
  34our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  35my (%conv_author_name, %conv_author_email, %conv_author_tz);
  36
  37sub usage(;$) {
  38        my $msg = shift;
  39        print(STDERR "Error: $msg\n") if $msg;
  40        print STDERR <<END;
  41usage: git cvsimport     # fetch/update GIT from CVS
  42       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  43       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  44       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  45       [-r remote] [-R] [CVS_module]
  46END
  47        exit(1);
  48}
  49
  50sub read_author_info($) {
  51        my ($file) = @_;
  52        my $user;
  53        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  54
  55        while (<$f>) {
  56                # Expected format is this:
  57                #   exon=Andreas Ericsson <ae@op5.se>
  58                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  59                        $user = $1;
  60                        $conv_author_name{$user} = $2;
  61                        $conv_author_email{$user} = $3;
  62                }
  63                # or with an optional timezone:
  64                #   spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
  65                elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
  66                        $user = $1;
  67                        $conv_author_name{$user} = $2;
  68                        $conv_author_email{$user} = $3;
  69                        $conv_author_tz{$user} = $4;
  70                }
  71                # However, we also read from CVSROOT/users format
  72                # to ease migration.
  73                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  74                        my $mapped;
  75                        ($user, $mapped) = ($1, $3);
  76                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  77                                $conv_author_name{$user} = $1;
  78                                $conv_author_email{$user} = $2;
  79                        }
  80                        elsif ($mapped =~ /^<?(.*)>?$/) {
  81                                $conv_author_name{$user} = $user;
  82                                $conv_author_email{$user} = $1;
  83                        }
  84                }
  85                # NEEDSWORK: Maybe warn on unrecognized lines?
  86        }
  87        close ($f);
  88}
  89
  90sub write_author_info($) {
  91        my ($file) = @_;
  92        open my $f, '>', $file or
  93          die("Failed to open $file for writing: $!");
  94
  95        foreach (keys %conv_author_name) {
  96                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
  97                print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
  98                print $f "\n";
  99        }
 100        close ($f);
 101}
 102
 103# Versions of perl before 5.10.0 may not automatically check $TZ each
 104# time localtime is run (most platforms will do so only the first time).
 105# We can work around this by using tzset() to update the internal
 106# variable whenever we change the environment.
 107sub set_timezone {
 108        $ENV{TZ} = shift;
 109        tzset();
 110}
 111
 112# convert getopts specs for use by git config
 113my %longmap = (
 114        'A:' => 'authors-file',
 115        'M:' => 'merge-regex',
 116        'P:' => undef,
 117        'R' => 'track-revisions',
 118        'S:' => 'ignore-paths',
 119);
 120
 121sub read_repo_config {
 122        # Split the string between characters, unless there is a ':'
 123        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 124        my @opts = split(/ *(?!:)/, shift);
 125        foreach my $o (@opts) {
 126                my $key = $o;
 127                $key =~ s/://g;
 128                my $arg = 'git config';
 129                $arg .= ' --bool' if ($o !~ /:$/);
 130                my $ckey = $key;
 131
 132                if (exists $longmap{$o}) {
 133                        # An uppercase option like -R cannot be
 134                        # expressed in the configuration, as the
 135                        # variable names are downcased.
 136                        $ckey = $longmap{$o};
 137                        next if (! defined $ckey);
 138                        $ckey =~ s/-//g;
 139                }
 140                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 141                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 142                        no strict 'refs';
 143                        my $opt_name = "opt_" . $key;
 144                        if (!$$opt_name) {
 145                                $$opt_name = $tmp;
 146                        }
 147                }
 148        }
 149}
 150
 151my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 152read_repo_config($opts);
 153Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 154
 155# turn the Getopt::Std specification in a Getopt::Long one,
 156# with support for multiple -M options
 157GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 158    or usage();
 159usage if $opt_h;
 160
 161if (@ARGV == 0) {
 162                chomp(my $module = `git config --get cvsimport.module`);
 163                push(@ARGV, $module) if $? == 0;
 164}
 165@ARGV <= 1 or usage("You can't specify more than one CVS module");
 166
 167if ($opt_d) {
 168        $ENV{"CVSROOT"} = $opt_d;
 169} elsif (-f 'CVS/Root') {
 170        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 171        $opt_d = <$f>;
 172        chomp $opt_d;
 173        close $f;
 174        $ENV{"CVSROOT"} = $opt_d;
 175} elsif ($ENV{"CVSROOT"}) {
 176        $opt_d = $ENV{"CVSROOT"};
 177} else {
 178        usage("CVSROOT needs to be set");
 179}
 180$opt_s ||= "-";
 181$opt_a ||= 0;
 182
 183my $git_tree = $opt_C;
 184$git_tree ||= ".";
 185
 186my $remote;
 187if (defined $opt_r) {
 188        $remote = 'refs/remotes/' . $opt_r;
 189        $opt_o ||= "master";
 190} else {
 191        $opt_o ||= "origin";
 192        $remote = 'refs/heads';
 193}
 194
 195my $cvs_tree;
 196if ($#ARGV == 0) {
 197        $cvs_tree = $ARGV[0];
 198} elsif (-f 'CVS/Repository') {
 199        open my $f, '<', 'CVS/Repository' or
 200            die 'Failed to open CVS/Repository';
 201        $cvs_tree = <$f>;
 202        chomp $cvs_tree;
 203        close $f;
 204} else {
 205        usage("CVS module has to be specified");
 206}
 207
 208our @mergerx = ();
 209if ($opt_m) {
 210        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 211}
 212if (@opt_M) {
 213        push (@mergerx, map { qr/$_/ } @opt_M);
 214}
 215
 216# Remember UTC of our starting time
 217# we'll want to avoid importing commits
 218# that are too recent
 219our $starttime = time();
 220
 221select(STDERR); $|=1; select(STDOUT);
 222
 223
 224package CVSconn;
 225# Basic CVS dialog.
 226# We're only interested in connecting and downloading, so ...
 227
 228use File::Spec;
 229use File::Temp qw(tempfile);
 230use POSIX qw(strftime dup2);
 231
 232sub new {
 233        my ($what,$repo,$subdir) = @_;
 234        $what=ref($what) if ref($what);
 235
 236        my $self = {};
 237        $self->{'buffer'} = "";
 238        bless($self,$what);
 239
 240        $repo =~ s#/+$##;
 241        $self->{'fullrep'} = $repo;
 242        $self->conn();
 243
 244        $self->{'subdir'} = $subdir;
 245        $self->{'lines'} = undef;
 246
 247        return $self;
 248}
 249
 250sub find_password_entry {
 251        my ($cvspass, @cvsroot) = @_;
 252        my ($file, $delim) = @$cvspass;
 253        my $pass;
 254        local ($_);
 255
 256        if (open(my $fh, $file)) {
 257                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 258                CVSPASSFILE:
 259                while (<$fh>) {
 260                        chomp;
 261                        s/^\/\d+\s+//;
 262                        my ($w, $p) = split($delim,$_,2);
 263                        for my $cvsroot (@cvsroot) {
 264                                if ($w eq $cvsroot) {
 265                                        $pass = $p;
 266                                        last CVSPASSFILE;
 267                                }
 268                        }
 269                }
 270                close($fh);
 271        }
 272        return $pass;
 273}
 274
 275sub conn {
 276        my $self = shift;
 277        my $repo = $self->{'fullrep'};
 278        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 279                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 280
 281                my ($proxyhost,$proxyport);
 282                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 283                        $proxyhost = $1;
 284                        # Default proxyport, if not specified, is 8080.
 285                        $proxyport = 8080;
 286                        if ($ENV{"CVS_PROXY_PORT"}) {
 287                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 288                        }
 289                        if ($param =~ m/proxyport=([^;]+)/) {
 290                                $proxyport = $1;
 291                        }
 292                }
 293                $repo ||= '/';
 294
 295                # if username is not explicit in CVSROOT, then use current user, as cvs would
 296                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 297                my $rr2 = "-";
 298                unless ($port) {
 299                        $rr2 = ":pserver:$user\@$serv:$repo";
 300                        $port=2401;
 301                }
 302                my $rr = ":pserver:$user\@$serv:$port$repo";
 303
 304                if ($pass) {
 305                        $pass = $self->_scramble($pass);
 306                } else {
 307                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 308                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 309                        my @loc = ();
 310                        foreach my $cvspass (@cvspass) {
 311                                my $p = find_password_entry($cvspass, $rr, $rr2);
 312                                if ($p) {
 313                                        push @loc, $cvspass->[0];
 314                                        $pass = $p;
 315                                }
 316                        }
 317
 318                        if (1 < @loc) {
 319                                die("Multiple cvs password files have ".
 320                                    "entries for CVSROOT $opt_d: @loc");
 321                        } elsif (!$pass) {
 322                                $pass = "A";
 323                        }
 324                }
 325
 326                my ($s, $rep);
 327                if ($proxyhost) {
 328
 329                        # Use a HTTP Proxy. Only works for HTTP proxies that
 330                        # don't require user authentication
 331                        #
 332                        # See: http://www.ietf.org/rfc/rfc2817.txt
 333
 334                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 335                        die "Socket to $proxyhost: $!\n" unless defined $s;
 336                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 337                                or die "Write to $proxyhost: $!\n";
 338                        $s->flush();
 339
 340                        $rep = <$s>;
 341
 342                        # The answer should look like 'HTTP/1.x 2yy ....'
 343                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 344                                die "Proxy connect: $rep\n";
 345                        }
 346                        # Skip up to the empty line of the proxy server output
 347                        # including the response headers.
 348                        while ($rep = <$s>) {
 349                                last if (!defined $rep ||
 350                                         $rep eq "\n" ||
 351                                         $rep eq "\r\n");
 352                        }
 353                } else {
 354                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 355                        die "Socket to $serv: $!\n" unless defined $s;
 356                }
 357
 358                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 359                        or die "Write to $serv: $!\n";
 360                $s->flush();
 361
 362                $rep = <$s>;
 363
 364                if ($rep ne "I LOVE YOU\n") {
 365                        $rep="<unknown>" unless $rep;
 366                        die "AuthReply: $rep\n";
 367                }
 368                $self->{'socketo'} = $s;
 369                $self->{'socketi'} = $s;
 370        } else { # local or ext: Fork off our own cvs server.
 371                my $pr = IO::Pipe->new();
 372                my $pw = IO::Pipe->new();
 373                my $pid = fork();
 374                die "Fork: $!\n" unless defined $pid;
 375                my $cvs = 'cvs';
 376                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 377                my $rsh = 'rsh';
 378                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 379
 380                my @cvs = ($cvs, 'server');
 381                my ($local, $user, $host);
 382                $local = $repo =~ s/:local://;
 383                if (!$local) {
 384                    $repo =~ s/:ext://;
 385                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 386                    ($user, $host) = ($1, $2);
 387                }
 388                if (!$local) {
 389                    if ($user) {
 390                        unshift @cvs, $rsh, '-l', $user, $host;
 391                    } else {
 392                        unshift @cvs, $rsh, $host;
 393                    }
 394                }
 395
 396                unless ($pid) {
 397                        $pr->writer();
 398                        $pw->reader();
 399                        dup2($pw->fileno(),0);
 400                        dup2($pr->fileno(),1);
 401                        $pr->close();
 402                        $pw->close();
 403                        exec(@cvs);
 404                }
 405                $pw->writer();
 406                $pr->reader();
 407                $self->{'socketo'} = $pw;
 408                $self->{'socketi'} = $pr;
 409        }
 410        $self->{'socketo'}->write("Root $repo\n");
 411
 412        # Trial and error says that this probably is the minimum set
 413        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 414
 415        $self->{'socketo'}->write("valid-requests\n");
 416        $self->{'socketo'}->flush();
 417
 418        my $rep=$self->readline();
 419        die "Failed to read from server" unless defined $rep;
 420        chomp($rep);
 421        if ($rep !~ s/^Valid-requests\s*//) {
 422                $rep="<unknown>" unless $rep;
 423                die "Expected Valid-requests from server, but got: $rep\n";
 424        }
 425        chomp(my $res=$self->readline());
 426        die "validReply: $res\n" if $res ne "ok";
 427
 428        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 429        $self->{'repo'} = $repo;
 430}
 431
 432sub readline {
 433        my ($self) = @_;
 434        return $self->{'socketi'}->getline();
 435}
 436
 437sub _file {
 438        # Request a file with a given revision.
 439        # Trial and error says this is a good way to do it. :-/
 440        my ($self,$fn,$rev) = @_;
 441        $self->{'socketo'}->write("Argument -N\n") or return undef;
 442        $self->{'socketo'}->write("Argument -P\n") or return undef;
 443        # -kk: Linus' version doesn't use it - defaults to off
 444        if ($opt_k) {
 445            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 446        }
 447        $self->{'socketo'}->write("Argument -r\n") or return undef;
 448        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 449        $self->{'socketo'}->write("Argument --\n") or return undef;
 450        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 451        $self->{'socketo'}->write("Directory .\n") or return undef;
 452        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 453        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 454        $self->{'socketo'}->write("co\n") or return undef;
 455        $self->{'socketo'}->flush() or return undef;
 456        $self->{'lines'} = 0;
 457        return 1;
 458}
 459sub _line {
 460        # Read a line from the server.
 461        # ... except that 'line' may be an entire file. ;-)
 462        my ($self, $fh) = @_;
 463        die "Not in lines" unless defined $self->{'lines'};
 464
 465        my $line;
 466        my $res=0;
 467        while (defined($line = $self->readline())) {
 468                # M U gnupg-cvs-rep/AUTHORS
 469                # Updated gnupg-cvs-rep/
 470                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 471                # /AUTHORS/1.1///T1.1
 472                # u=rw,g=rw,o=rw
 473                # 0
 474                # ok
 475
 476                if ($line =~ s/^(?:Created|Updated) //) {
 477                        $line = $self->readline(); # path
 478                        $line = $self->readline(); # Entries line
 479                        my $mode = $self->readline(); chomp $mode;
 480                        $self->{'mode'} = $mode;
 481                        defined (my $cnt = $self->readline())
 482                                or die "EOF from server after 'Changed'\n";
 483                        chomp $cnt;
 484                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 485                        $line="";
 486                        $res = $self->_fetchfile($fh, $cnt);
 487                } elsif ($line =~ s/^ //) {
 488                        print $fh $line;
 489                        $res += length($line);
 490                } elsif ($line =~ /^M\b/) {
 491                        # output, do nothing
 492                } elsif ($line =~ /^Mbinary\b/) {
 493                        my $cnt;
 494                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 495                        chomp $cnt;
 496                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 497                        $line="";
 498                        $res += $self->_fetchfile($fh, $cnt);
 499                } else {
 500                        chomp $line;
 501                        if ($line eq "ok") {
 502                                # print STDERR "S: ok (".length($res).")\n";
 503                                return $res;
 504                        } elsif ($line =~ s/^E //) {
 505                                # print STDERR "S: $line\n";
 506                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 507                                $line = $self->readline(); # filename
 508                                $line = $self->readline(); # OK
 509                                chomp $line;
 510                                die "Unknown: $line" if $line ne "ok";
 511                                return -1;
 512                        } else {
 513                                die "Unknown: $line\n";
 514                        }
 515                }
 516        }
 517        return undef;
 518}
 519sub file {
 520        my ($self,$fn,$rev) = @_;
 521        my $res;
 522
 523        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 524                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 525
 526        $self->_file($fn,$rev) and $res = $self->_line($fh);
 527
 528        if (!defined $res) {
 529            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 530            truncate $fh, 0;
 531            $self->conn();
 532            $self->_file($fn,$rev) or die "No file command send";
 533            $res = $self->_line($fh);
 534            die "Retry failed" unless defined $res;
 535        }
 536        close ($fh);
 537
 538        return ($name, $res);
 539}
 540sub _fetchfile {
 541        my ($self, $fh, $cnt) = @_;
 542        my $res = 0;
 543        my $bufsize = 1024 * 1024;
 544        while ($cnt) {
 545            if ($bufsize > $cnt) {
 546                $bufsize = $cnt;
 547            }
 548            my $buf;
 549            my $num = $self->{'socketi'}->read($buf,$bufsize);
 550            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 551            print $fh $buf;
 552            $res += $num;
 553            $cnt -= $num;
 554        }
 555        return $res;
 556}
 557
 558sub _scramble {
 559        my ($self, $pass) = @_;
 560        my $scrambled = "A";
 561
 562        return $scrambled unless $pass;
 563
 564        my $pass_len = length($pass);
 565        my @pass_arr = split("", $pass);
 566        my $i;
 567
 568        # from cvs/src/scramble.c
 569        my @shifts = (
 570                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 571                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 572                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 573                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 574                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 575                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 576                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 577                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 578                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 579                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 580                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 581                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 582                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 583                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 584                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 585                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 586        );
 587
 588        for ($i = 0; $i < $pass_len; $i++) {
 589                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 590        }
 591
 592        return $scrambled;
 593}
 594
 595package main;
 596
 597my $cvs = CVSconn->new($opt_d, $cvs_tree);
 598
 599
 600sub pdate($) {
 601        my ($d) = @_;
 602        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 603                or die "Unparseable date: $d\n";
 604        my $y=$1;
 605        $y+=100 if $y<70;
 606        $y+=1900 if $y<1000;
 607        return timegm($6||0,$5,$4,$3,$2-1,$y);
 608}
 609
 610sub pmode($) {
 611        my ($mode) = @_;
 612        my $m = 0;
 613        my $mm = 0;
 614        my $um = 0;
 615        for my $x(split(//,$mode)) {
 616                if ($x eq ",") {
 617                        $m |= $mm&$um;
 618                        $mm = 0;
 619                        $um = 0;
 620                } elsif ($x eq "u") { $um |= 0700;
 621                } elsif ($x eq "g") { $um |= 0070;
 622                } elsif ($x eq "o") { $um |= 0007;
 623                } elsif ($x eq "r") { $mm |= 0444;
 624                } elsif ($x eq "w") { $mm |= 0222;
 625                } elsif ($x eq "x") { $mm |= 0111;
 626                } elsif ($x eq "=") { # do nothing
 627                } else { die "Unknown mode: $mode\n";
 628                }
 629        }
 630        $m |= $mm&$um;
 631        return $m;
 632}
 633
 634sub getwd() {
 635        my $pwd = `pwd`;
 636        chomp $pwd;
 637        return $pwd;
 638}
 639
 640sub is_sha1 {
 641        my $s = shift;
 642        return $s =~ /^[a-f0-9]{40}$/;
 643}
 644
 645sub get_headref ($) {
 646        my $name = shift;
 647        $name =~ s/'/'\\''/g;
 648        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 649        return undef unless $? == 0;
 650        chomp $r;
 651        return $r;
 652}
 653
 654my $user_filename_prepend = '';
 655sub munge_user_filename {
 656        my $name = shift;
 657        return File::Spec->file_name_is_absolute($name) ?
 658                $name :
 659                $user_filename_prepend . $name;
 660}
 661
 662-d $git_tree
 663        or mkdir($git_tree,0777)
 664        or die "Could not create $git_tree: $!";
 665if ($git_tree ne '.') {
 666        $user_filename_prepend = getwd() . '/';
 667        chdir($git_tree);
 668}
 669
 670my $last_branch = "";
 671my $orig_branch = "";
 672my %branch_date;
 673my $tip_at_start = undef;
 674
 675my $git_dir = $ENV{"GIT_DIR"} || ".git";
 676$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 677$ENV{"GIT_DIR"} = $git_dir;
 678my $orig_git_index;
 679$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 680
 681my %index; # holds filenames of one index per branch
 682
 683unless (-d $git_dir) {
 684        system(qw(git init));
 685        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 686        system(qw(git read-tree --empty));
 687        die "Cannot init an empty tree: $?\n" if $?;
 688
 689        $last_branch = $opt_o;
 690        $orig_branch = "";
 691} else {
 692        open(F, "-|", qw(git symbolic-ref HEAD)) or
 693                die "Cannot run git symbolic-ref: $!\n";
 694        chomp ($last_branch = <F>);
 695        $last_branch = basename($last_branch);
 696        close(F);
 697        unless ($last_branch) {
 698                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 699                $last_branch = "master";
 700        }
 701        $orig_branch = $last_branch;
 702        $tip_at_start = `git rev-parse --verify HEAD`;
 703
 704        # Get the last import timestamps
 705        my $fmt = '($ref, $author) = (%(refname), %(author));';
 706        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 707        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 708        while (defined(my $entry = <H>)) {
 709                my ($ref, $author);
 710                eval($entry) || die "cannot eval refs list: $@";
 711                my ($head) = ($ref =~ m|^$remote/(.*)|);
 712                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 713                $branch_date{$head} = $1;
 714        }
 715        close(H);
 716        if (!exists $branch_date{$opt_o}) {
 717                die "Branch '$opt_o' does not exist.\n".
 718                       "Either use the correct '-o branch' option,\n".
 719                       "or import to a new repository.\n";
 720        }
 721}
 722
 723-d $git_dir
 724        or die "Could not create git subdir ($git_dir).\n";
 725
 726# now we read (and possibly save) author-info as well
 727-f "$git_dir/cvs-authors" and
 728  read_author_info("$git_dir/cvs-authors");
 729if ($opt_A) {
 730        read_author_info(munge_user_filename($opt_A));
 731        write_author_info("$git_dir/cvs-authors");
 732}
 733
 734# open .git/cvs-revisions, if requested
 735open my $revision_map, '>>', "$git_dir/cvs-revisions"
 736    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 737        if defined $opt_R;
 738
 739
 740#
 741# run cvsps into a file unless we are getting
 742# it passed as a file via $opt_P
 743#
 744my $cvspsfile;
 745unless ($opt_P) {
 746        print "Running cvsps...\n" if $opt_v;
 747        my $pid = open(CVSPS,"-|");
 748        my $cvspsfh;
 749        die "Cannot fork: $!\n" unless defined $pid;
 750        unless ($pid) {
 751                my @opt;
 752                @opt = split(/,/,$opt_p) if defined $opt_p;
 753                unshift @opt, '-z', $opt_z if defined $opt_z;
 754                unshift @opt, '-q'         unless defined $opt_v;
 755                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 756                        push @opt, '--cvs-direct';
 757                }
 758                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 759                die "Could not start cvsps: $!\n";
 760        }
 761        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 762                                          DIR => File::Spec->tmpdir());
 763        while (<CVSPS>) {
 764            print $cvspsfh $_;
 765        }
 766        close CVSPS;
 767        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 768        close $cvspsfh;
 769} else {
 770        $cvspsfile = munge_user_filename($opt_P);
 771}
 772
 773open(CVS, "<$cvspsfile") or die $!;
 774
 775## cvsps output:
 776#---------------------
 777#PatchSet 314
 778#Date: 1999/09/18 13:03:59
 779#Author: wkoch
 780#Branch: STABLE-BRANCH-1-0
 781#Ancestor branch: HEAD
 782#Tag: (none)
 783#Log:
 784#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 785#Members:
 786#       README:1.57->1.57.2.1
 787#       VERSION:1.96->1.96.2.1
 788#
 789#---------------------
 790
 791my $state = 0;
 792
 793sub update_index (\@\@) {
 794        my $old = shift;
 795        my $new = shift;
 796        open(my $fh, '|-', qw(git update-index -z --index-info))
 797                or die "unable to open git update-index: $!";
 798        print $fh
 799                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 800                        @$old),
 801                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 802                        @$new)
 803                or die "unable to write to git update-index: $!";
 804        close $fh
 805                or die "unable to write to git update-index: $!";
 806        $? and die "git update-index reported error: $?";
 807}
 808
 809sub write_tree () {
 810        open(my $fh, '-|', qw(git write-tree))
 811                or die "unable to open git write-tree: $!";
 812        chomp(my $tree = <$fh>);
 813        is_sha1($tree)
 814                or die "Cannot get tree id ($tree): $!";
 815        close($fh)
 816                or die "Error running git write-tree: $?\n";
 817        print "Tree ID $tree\n" if $opt_v;
 818        return $tree;
 819}
 820
 821my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
 822my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 823
 824# commits that cvsps cannot place anywhere...
 825$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 826
 827sub commit {
 828        if ($branch eq $opt_o && !$index{branch} &&
 829                !get_headref("$remote/$branch")) {
 830            # looks like an initial commit
 831            # use the index primed by git init
 832            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 833            $index{$branch} = "$git_dir/index";
 834        } else {
 835            # use an index per branch to speed up
 836            # imports of projects with many branches
 837            unless ($index{$branch}) {
 838                $index{$branch} = tmpnam();
 839                $ENV{GIT_INDEX_FILE} = $index{$branch};
 840                if ($ancestor) {
 841                    system("git", "read-tree", "$remote/$ancestor");
 842                } else {
 843                    system("git", "read-tree", "$remote/$branch");
 844                }
 845                die "read-tree failed: $?\n" if $?;
 846            }
 847        }
 848        $ENV{GIT_INDEX_FILE} = $index{$branch};
 849
 850        update_index(@old, @new);
 851        @old = @new = ();
 852        my $tree = write_tree();
 853        my $parent = get_headref("$remote/$last_branch");
 854        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 855
 856        my @commit_args;
 857        push @commit_args, ("-p", $parent) if $parent;
 858
 859        # loose detection of merges
 860        # based on the commit msg
 861        foreach my $rx (@mergerx) {
 862                next unless $logmsg =~ $rx && $1;
 863                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 864                if (my $sha1 = get_headref("$remote/$mparent")) {
 865                        push @commit_args, '-p', "$remote/$mparent";
 866                        print "Merge parent branch: $mparent\n" if $opt_v;
 867                }
 868        }
 869
 870        set_timezone($author_tz);
 871        # $date is in the seconds since epoch format
 872        my $tz_offset = get_tz_offset($date);
 873        my $commit_date = "$date $tz_offset";
 874        set_timezone('UTC');
 875        $ENV{GIT_AUTHOR_NAME} = $author_name;
 876        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 877        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 878        $ENV{GIT_COMMITTER_NAME} = $author_name;
 879        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 880        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 881        my $pid = open2(my $commit_read, my $commit_write,
 882                'git', 'commit-tree', $tree, @commit_args);
 883
 884        # compatibility with git2cvs
 885        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 886        $logmsg =~ s/[\s\n]+\z//;
 887
 888        if (@skipped) {
 889            $logmsg .= "\n\n\nSKIPPED:\n\t";
 890            $logmsg .= join("\n\t", @skipped) . "\n";
 891            @skipped = ();
 892        }
 893
 894        print($commit_write "$logmsg\n") && close($commit_write)
 895                or die "Error writing to git commit-tree: $!\n";
 896
 897        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 898        chomp(my $cid = <$commit_read>);
 899        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 900        print "Commit ID $cid\n" if $opt_v;
 901        close($commit_read);
 902
 903        waitpid($pid,0);
 904        die "Error running git commit-tree: $?\n" if $?;
 905
 906        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 907                or die "Cannot write branch $branch for update: $!\n";
 908
 909        if ($revision_map) {
 910                print $revision_map "@$_ $cid\n" for @commit_revisions;
 911        }
 912        @commit_revisions = ();
 913
 914        if ($tag) {
 915                my ($xtag) = $tag;
 916                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 917                $xtag =~ tr/_/\./ if ( $opt_u );
 918                $xtag =~ s/[\/]/$opt_s/g;
 919
 920                # See refs.c for these rules.
 921                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 922                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 923                # Other bad strings for tags:
 924                # (See check_refname_component in refs.c.)
 925                1 while $xtag =~ s/
 926                        (?: \.\.        # Tag cannot contain '..'.
 927                        |   \@\{        # Tag cannot contain '@{'.
 928                        | ^ -           # Tag cannot begin with '-'.
 929                        |   \.lock $    # Tag cannot end with '.lock'.
 930                        | ^ \.          # Tag cannot begin...
 931                        |   \. $        # ...or end with '.'
 932                        )//xg;
 933                # Tag cannot be empty.
 934                if ($xtag eq '') {
 935                        warn("warning: ignoring tag '$tag'",
 936                        " with invalid tagname\n");
 937                        return;
 938                }
 939
 940                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 941                        # We did our best to sanitize the tag, but still failed
 942                        # for whatever reason. Bail out, and give the user
 943                        # enough information to understand if/how we should
 944                        # improve the translation in the future.
 945                        if ($tag ne $xtag) {
 946                                print "Translated '$tag' tag to '$xtag'\n";
 947                        }
 948                        die "Cannot create tag $xtag: $!\n";
 949                }
 950
 951                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 952        }
 953};
 954
 955my $commitcount = 1;
 956while (<CVS>) {
 957        chomp;
 958        if ($state == 0 and /^-+$/) {
 959                $state = 1;
 960        } elsif ($state == 0) {
 961                $state = 1;
 962                redo;
 963        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 964                $patchset = 0+$_;
 965                $state=2;
 966        } elsif ($state == 2 and s/^Date:\s+//) {
 967                $date = pdate($_);
 968                unless ($date) {
 969                        print STDERR "Could not parse date: $_\n";
 970                        $state=0;
 971                        next;
 972                }
 973                $state=3;
 974        } elsif ($state == 3 and s/^Author:\s+//) {
 975                $author_tz = "UTC";
 976                s/\s+$//;
 977                if (/^(.*?)\s+<(.*)>/) {
 978                    ($author_name, $author_email) = ($1, $2);
 979                } elsif ($conv_author_name{$_}) {
 980                        $author_name = $conv_author_name{$_};
 981                        $author_email = $conv_author_email{$_};
 982                        $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
 983                } else {
 984                    $author_name = $author_email = $_;
 985                }
 986                $state = 4;
 987        } elsif ($state == 4 and s/^Branch:\s+//) {
 988                s/\s+$//;
 989                tr/_/\./ if ( $opt_u );
 990                s/[\/]/$opt_s/g;
 991                $branch = $_;
 992                $state = 5;
 993        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 994                s/\s+$//;
 995                $ancestor = $_;
 996                $ancestor = $opt_o if $ancestor eq "HEAD";
 997                $state = 6;
 998        } elsif ($state == 5) {
 999                $ancestor = undef;
1000                $state = 6;
1001                redo;
1002        } elsif ($state == 6 and s/^Tag:\s+//) {
1003                s/\s+$//;
1004                if ($_ eq "(none)") {
1005                        $tag = undef;
1006                } else {
1007                        $tag = $_;
1008                }
1009                $state = 7;
1010        } elsif ($state == 7 and /^Log:/) {
1011                $logmsg = "";
1012                $state = 8;
1013        } elsif ($state == 8 and /^Members:/) {
1014                $branch = $opt_o if $branch eq "HEAD";
1015                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1016                        # skip
1017                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1018                        $state = 11;
1019                        next;
1020                }
1021                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1022                        # skip if the commit is too recent
1023                        # given that the cvsps default fuzz is 300s, we give ourselves another
1024                        # 300s just in case -- this also prevents skipping commits
1025                        # due to server clock drift
1026                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
1027                        $state = 11;
1028                        next;
1029                }
1030                if (exists $ignorebranch{$branch}) {
1031                        print STDERR "Skipping $branch\n";
1032                        $state = 11;
1033                        next;
1034                }
1035                if ($ancestor) {
1036                        if ($ancestor eq $branch) {
1037                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1038                                $ancestor = $opt_o;
1039                        }
1040                        if (defined get_headref("$remote/$branch")) {
1041                                print STDERR "Branch $branch already exists!\n";
1042                                $state=11;
1043                                next;
1044                        }
1045                        my $id = get_headref("$remote/$ancestor");
1046                        if (!$id) {
1047                                print STDERR "Branch $ancestor does not exist!\n";
1048                                $ignorebranch{$branch} = 1;
1049                                $state=11;
1050                                next;
1051                        }
1052
1053                        system(qw(git update-ref -m cvsimport),
1054                                "$remote/$branch", $id);
1055                        if($? != 0) {
1056                                print STDERR "Could not create branch $branch\n";
1057                                $ignorebranch{$branch} = 1;
1058                                $state=11;
1059                                next;
1060                        }
1061                }
1062                $last_branch = $branch if $branch ne $last_branch;
1063                $state = 9;
1064        } elsif ($state == 8) {
1065                $logmsg .= "$_\n";
1066        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1067#       VERSION:1.96->1.96.2.1
1068                my $init = ($2 eq "INITIAL");
1069                my $fn = $1;
1070                my $rev = $3;
1071                $fn =~ s#^/+##;
1072                if ($opt_S && $fn =~ m/$opt_S/) {
1073                    print "SKIPPING $fn v $rev\n";
1074                    push(@skipped, $fn);
1075                    next;
1076                }
1077                push @commit_revisions, [$fn, $rev];
1078                print "Fetching $fn   v $rev\n" if $opt_v;
1079                my ($tmpname, $size) = $cvs->file($fn,$rev);
1080                if ($size == -1) {
1081                        push(@old,$fn);
1082                        print "Drop $fn\n" if $opt_v;
1083                } else {
1084                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1085                        my $pid = open(my $F, '-|');
1086                        die $! unless defined $pid;
1087                        if (!$pid) {
1088                            exec("git", "hash-object", "-w", $tmpname)
1089                                or die "Cannot create object: $!\n";
1090                        }
1091                        my $sha = <$F>;
1092                        chomp $sha;
1093                        close $F;
1094                        my $mode = pmode($cvs->{'mode'});
1095                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1096                }
1097                unlink($tmpname);
1098        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1099                my $fn = $1;
1100                my $rev = $2;
1101                $fn =~ s#^/+##;
1102                push @commit_revisions, [$fn, $rev];
1103                push(@old,$fn);
1104                print "Delete $fn\n" if $opt_v;
1105        } elsif ($state == 9 and /^\s*$/) {
1106                $state = 10;
1107        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1108                $commitcount++;
1109                if ($opt_L && $commitcount > $opt_L) {
1110                        last;
1111                }
1112                commit();
1113                if (($commitcount & 1023) == 0) {
1114                        system(qw(git repack -a -d));
1115                }
1116                $state = 1;
1117        } elsif ($state == 11 and /^-+$/) {
1118                $state = 1;
1119        } elsif (/^-+$/) { # end of unknown-line processing
1120                $state = 1;
1121        } elsif ($state != 11) { # ignore stuff when skipping
1122                print STDERR "* UNKNOWN LINE * $_\n";
1123        }
1124}
1125commit() if $branch and $state != 11;
1126
1127unless ($opt_P) {
1128        unlink($cvspsfile);
1129}
1130
1131# The heuristic of repacking every 1024 commits can leave a
1132# lot of unpacked data.  If there is more than 1MB worth of
1133# not-packed objects, repack once more.
1134my $line = `git count-objects`;
1135if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1136  my ($n_objects, $kb) = ($1, $2);
1137  1024 < $kb
1138    and system(qw(git repack -a -d));
1139}
1140
1141foreach my $git_index (values %index) {
1142    if ($git_index ne "$git_dir/index") {
1143        unlink($git_index);
1144    }
1145}
1146
1147if (defined $orig_git_index) {
1148        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1149} else {
1150        delete $ENV{GIT_INDEX_FILE};
1151}
1152
1153# Now switch back to the branch we were in before all of this happened
1154if ($orig_branch) {
1155        print "DONE.\n" if $opt_v;
1156        if ($opt_i) {
1157                exit 0;
1158        }
1159        my $tip_at_end = `git rev-parse --verify HEAD`;
1160        if ($tip_at_start ne $tip_at_end) {
1161                for ($tip_at_start, $tip_at_end) { chomp; }
1162                print "Fetched into the current branch.\n" if $opt_v;
1163                system(qw(git read-tree -u -m),
1164                       $tip_at_start, $tip_at_end);
1165                die "Fast-forward update failed: $?\n" if $?;
1166        }
1167        else {
1168                system(qw(git merge -m cvsimport), "$remote/$opt_o");
1169                die "Could not merge $opt_o into the current branch.\n" if $?;
1170        }
1171} else {
1172        $orig_branch = "master";
1173        print "DONE; creating $orig_branch branch\n" if $opt_v;
1174        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1175                unless defined get_headref('refs/heads/master');
1176        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1177                if ($opt_r && $opt_o ne 'HEAD');
1178        system('git', 'update-ref', 'HEAD', "$orig_branch");
1179        unless ($opt_i) {
1180                system(qw(git checkout -f));
1181                die "checkout failed: $?\n" if $?;
1182        }
1183}