git-cvsimport.perlon commit commit: convert commit->buffer to a slab (c1b3c71)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime tzset dup2 ENOENT);
  28use IPC::Open2;
  29use Git qw(get_tz_offset);
  30
  31$SIG{'PIPE'}="IGNORE";
  32set_timezone('UTC');
  33
  34our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  35my (%conv_author_name, %conv_author_email, %conv_author_tz);
  36
  37sub usage(;$) {
  38        my $msg = shift;
  39        print(STDERR "Error: $msg\n") if $msg;
  40        print STDERR <<END;
  41usage: git cvsimport     # fetch/update GIT from CVS
  42       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  43       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  44       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  45       [-r remote] [-R] [CVS_module]
  46END
  47        exit(1);
  48}
  49
  50sub read_author_info($) {
  51        my ($file) = @_;
  52        my $user;
  53        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  54
  55        while (<$f>) {
  56                # Expected format is this:
  57                #   exon=Andreas Ericsson <ae@op5.se>
  58                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  59                        $user = $1;
  60                        $conv_author_name{$user} = $2;
  61                        $conv_author_email{$user} = $3;
  62                }
  63                # or with an optional timezone:
  64                #   spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
  65                elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
  66                        $user = $1;
  67                        $conv_author_name{$user} = $2;
  68                        $conv_author_email{$user} = $3;
  69                        $conv_author_tz{$user} = $4;
  70                }
  71                # However, we also read from CVSROOT/users format
  72                # to ease migration.
  73                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  74                        my $mapped;
  75                        ($user, $mapped) = ($1, $3);
  76                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  77                                $conv_author_name{$user} = $1;
  78                                $conv_author_email{$user} = $2;
  79                        }
  80                        elsif ($mapped =~ /^<?(.*)>?$/) {
  81                                $conv_author_name{$user} = $user;
  82                                $conv_author_email{$user} = $1;
  83                        }
  84                }
  85                # NEEDSWORK: Maybe warn on unrecognized lines?
  86        }
  87        close ($f);
  88}
  89
  90sub write_author_info($) {
  91        my ($file) = @_;
  92        open my $f, '>', $file or
  93          die("Failed to open $file for writing: $!");
  94
  95        foreach (keys %conv_author_name) {
  96                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
  97                print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
  98                print $f "\n";
  99        }
 100        close ($f);
 101}
 102
 103# Versions of perl before 5.10.0 may not automatically check $TZ each
 104# time localtime is run (most platforms will do so only the first time).
 105# We can work around this by using tzset() to update the internal
 106# variable whenever we change the environment.
 107sub set_timezone {
 108        $ENV{TZ} = shift;
 109        tzset();
 110}
 111
 112# convert getopts specs for use by git config
 113my %longmap = (
 114        'A:' => 'authors-file',
 115        'M:' => 'merge-regex',
 116        'P:' => undef,
 117        'R' => 'track-revisions',
 118        'S:' => 'ignore-paths',
 119);
 120
 121sub read_repo_config {
 122        # Split the string between characters, unless there is a ':'
 123        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 124        my @opts = split(/ *(?!:)/, shift);
 125        foreach my $o (@opts) {
 126                my $key = $o;
 127                $key =~ s/://g;
 128                my $arg = 'git config';
 129                $arg .= ' --bool' if ($o !~ /:$/);
 130                my $ckey = $key;
 131
 132                if (exists $longmap{$o}) {
 133                        # An uppercase option like -R cannot be
 134                        # expressed in the configuration, as the
 135                        # variable names are downcased.
 136                        $ckey = $longmap{$o};
 137                        next if (! defined $ckey);
 138                        $ckey =~ s/-//g;
 139                }
 140                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 141                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 142                        no strict 'refs';
 143                        my $opt_name = "opt_" . $key;
 144                        if (!$$opt_name) {
 145                                $$opt_name = $tmp;
 146                        }
 147                }
 148        }
 149}
 150
 151my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 152read_repo_config($opts);
 153Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 154
 155# turn the Getopt::Std specification in a Getopt::Long one,
 156# with support for multiple -M options
 157GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 158    or usage();
 159usage if $opt_h;
 160
 161if (@ARGV == 0) {
 162                chomp(my $module = `git config --get cvsimport.module`);
 163                push(@ARGV, $module) if $? == 0;
 164}
 165@ARGV <= 1 or usage("You can't specify more than one CVS module");
 166
 167if ($opt_d) {
 168        $ENV{"CVSROOT"} = $opt_d;
 169} elsif (-f 'CVS/Root') {
 170        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 171        $opt_d = <$f>;
 172        chomp $opt_d;
 173        close $f;
 174        $ENV{"CVSROOT"} = $opt_d;
 175} elsif ($ENV{"CVSROOT"}) {
 176        $opt_d = $ENV{"CVSROOT"};
 177} else {
 178        usage("CVSROOT needs to be set");
 179}
 180$opt_s ||= "-";
 181$opt_a ||= 0;
 182
 183my $git_tree = $opt_C;
 184$git_tree ||= ".";
 185
 186my $remote;
 187if (defined $opt_r) {
 188        $remote = 'refs/remotes/' . $opt_r;
 189        $opt_o ||= "master";
 190} else {
 191        $opt_o ||= "origin";
 192        $remote = 'refs/heads';
 193}
 194
 195my $cvs_tree;
 196if ($#ARGV == 0) {
 197        $cvs_tree = $ARGV[0];
 198} elsif (-f 'CVS/Repository') {
 199        open my $f, '<', 'CVS/Repository' or
 200            die 'Failed to open CVS/Repository';
 201        $cvs_tree = <$f>;
 202        chomp $cvs_tree;
 203        close $f;
 204} else {
 205        usage("CVS module has to be specified");
 206}
 207
 208our @mergerx = ();
 209if ($opt_m) {
 210        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 211}
 212if (@opt_M) {
 213        push (@mergerx, map { qr/$_/ } @opt_M);
 214}
 215
 216# Remember UTC of our starting time
 217# we'll want to avoid importing commits
 218# that are too recent
 219our $starttime = time();
 220
 221select(STDERR); $|=1; select(STDOUT);
 222
 223
 224package CVSconn;
 225# Basic CVS dialog.
 226# We're only interested in connecting and downloading, so ...
 227
 228use File::Spec;
 229use File::Temp qw(tempfile);
 230use POSIX qw(strftime dup2);
 231
 232sub new {
 233        my ($what,$repo,$subdir) = @_;
 234        $what=ref($what) if ref($what);
 235
 236        my $self = {};
 237        $self->{'buffer'} = "";
 238        bless($self,$what);
 239
 240        $repo =~ s#/+$##;
 241        $self->{'fullrep'} = $repo;
 242        $self->conn();
 243
 244        $self->{'subdir'} = $subdir;
 245        $self->{'lines'} = undef;
 246
 247        return $self;
 248}
 249
 250sub find_password_entry {
 251        my ($cvspass, @cvsroot) = @_;
 252        my ($file, $delim) = @$cvspass;
 253        my $pass;
 254        local ($_);
 255
 256        if (open(my $fh, $file)) {
 257                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 258                CVSPASSFILE:
 259                while (<$fh>) {
 260                        chomp;
 261                        s/^\/\d+\s+//;
 262                        my ($w, $p) = split($delim,$_,2);
 263                        for my $cvsroot (@cvsroot) {
 264                                if ($w eq $cvsroot) {
 265                                        $pass = $p;
 266                                        last CVSPASSFILE;
 267                                }
 268                        }
 269                }
 270                close($fh);
 271        }
 272        return $pass;
 273}
 274
 275sub conn {
 276        my $self = shift;
 277        my $repo = $self->{'fullrep'};
 278        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 279                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 280
 281                my ($proxyhost,$proxyport);
 282                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 283                        $proxyhost = $1;
 284                        # Default proxyport, if not specified, is 8080.
 285                        $proxyport = 8080;
 286                        if ($ENV{"CVS_PROXY_PORT"}) {
 287                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 288                        }
 289                        if ($param =~ m/proxyport=([^;]+)/) {
 290                                $proxyport = $1;
 291                        }
 292                }
 293                $repo ||= '/';
 294
 295                # if username is not explicit in CVSROOT, then use current user, as cvs would
 296                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 297                my $rr2 = "-";
 298                unless ($port) {
 299                        $rr2 = ":pserver:$user\@$serv:$repo";
 300                        $port=2401;
 301                }
 302                my $rr = ":pserver:$user\@$serv:$port$repo";
 303
 304                if ($pass) {
 305                        $pass = $self->_scramble($pass);
 306                } else {
 307                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 308                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 309                        my @loc = ();
 310                        foreach my $cvspass (@cvspass) {
 311                                my $p = find_password_entry($cvspass, $rr, $rr2);
 312                                if ($p) {
 313                                        push @loc, $cvspass->[0];
 314                                        $pass = $p;
 315                                }
 316                        }
 317
 318                        if (1 < @loc) {
 319                                die("Multiple cvs password files have ".
 320                                    "entries for CVSROOT $opt_d: @loc");
 321                        } elsif (!$pass) {
 322                                $pass = "A";
 323                        }
 324                }
 325
 326                my ($s, $rep);
 327                if ($proxyhost) {
 328
 329                        # Use a HTTP Proxy. Only works for HTTP proxies that
 330                        # don't require user authentication
 331                        #
 332                        # See: http://www.ietf.org/rfc/rfc2817.txt
 333
 334                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 335                        die "Socket to $proxyhost: $!\n" unless defined $s;
 336                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 337                                or die "Write to $proxyhost: $!\n";
 338                        $s->flush();
 339
 340                        $rep = <$s>;
 341
 342                        # The answer should look like 'HTTP/1.x 2yy ....'
 343                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 344                                die "Proxy connect: $rep\n";
 345                        }
 346                        # Skip up to the empty line of the proxy server output
 347                        # including the response headers.
 348                        while ($rep = <$s>) {
 349                                last if (!defined $rep ||
 350                                         $rep eq "\n" ||
 351                                         $rep eq "\r\n");
 352                        }
 353                } else {
 354                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 355                        die "Socket to $serv: $!\n" unless defined $s;
 356                }
 357
 358                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 359                        or die "Write to $serv: $!\n";
 360                $s->flush();
 361
 362                $rep = <$s>;
 363
 364                if ($rep ne "I LOVE YOU\n") {
 365                        $rep="<unknown>" unless $rep;
 366                        die "AuthReply: $rep\n";
 367                }
 368                $self->{'socketo'} = $s;
 369                $self->{'socketi'} = $s;
 370        } else { # local or ext: Fork off our own cvs server.
 371                my $pr = IO::Pipe->new();
 372                my $pw = IO::Pipe->new();
 373                my $pid = fork();
 374                die "Fork: $!\n" unless defined $pid;
 375                my $cvs = 'cvs';
 376                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 377                my $rsh = 'rsh';
 378                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 379
 380                my @cvs = ($cvs, 'server');
 381                my ($local, $user, $host);
 382                $local = $repo =~ s/:local://;
 383                if (!$local) {
 384                    $repo =~ s/:ext://;
 385                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 386                    ($user, $host) = ($1, $2);
 387                }
 388                if (!$local) {
 389                    if ($user) {
 390                        unshift @cvs, $rsh, '-l', $user, $host;
 391                    } else {
 392                        unshift @cvs, $rsh, $host;
 393                    }
 394                }
 395
 396                unless ($pid) {
 397                        $pr->writer();
 398                        $pw->reader();
 399                        dup2($pw->fileno(),0);
 400                        dup2($pr->fileno(),1);
 401                        $pr->close();
 402                        $pw->close();
 403                        exec(@cvs);
 404                }
 405                $pw->writer();
 406                $pr->reader();
 407                $self->{'socketo'} = $pw;
 408                $self->{'socketi'} = $pr;
 409        }
 410        $self->{'socketo'}->write("Root $repo\n");
 411
 412        # Trial and error says that this probably is the minimum set
 413        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 414
 415        $self->{'socketo'}->write("valid-requests\n");
 416        $self->{'socketo'}->flush();
 417
 418        my $rep=$self->readline();
 419        die "Failed to read from server" unless defined $rep;
 420        chomp($rep);
 421        if ($rep !~ s/^Valid-requests\s*//) {
 422                $rep="<unknown>" unless $rep;
 423                die "Expected Valid-requests from server, but got: $rep\n";
 424        }
 425        chomp(my $res=$self->readline());
 426        die "validReply: $res\n" if $res ne "ok";
 427
 428        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 429        $self->{'repo'} = $repo;
 430}
 431
 432sub readline {
 433        my ($self) = @_;
 434        return $self->{'socketi'}->getline();
 435}
 436
 437sub _file {
 438        # Request a file with a given revision.
 439        # Trial and error says this is a good way to do it. :-/
 440        my ($self,$fn,$rev) = @_;
 441        $self->{'socketo'}->write("Argument -N\n") or return undef;
 442        $self->{'socketo'}->write("Argument -P\n") or return undef;
 443        # -kk: Linus' version doesn't use it - defaults to off
 444        if ($opt_k) {
 445            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 446        }
 447        $self->{'socketo'}->write("Argument -r\n") or return undef;
 448        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 449        $self->{'socketo'}->write("Argument --\n") or return undef;
 450        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 451        $self->{'socketo'}->write("Directory .\n") or return undef;
 452        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 453        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 454        $self->{'socketo'}->write("co\n") or return undef;
 455        $self->{'socketo'}->flush() or return undef;
 456        $self->{'lines'} = 0;
 457        return 1;
 458}
 459sub _line {
 460        # Read a line from the server.
 461        # ... except that 'line' may be an entire file. ;-)
 462        my ($self, $fh) = @_;
 463        die "Not in lines" unless defined $self->{'lines'};
 464
 465        my $line;
 466        my $res=0;
 467        while (defined($line = $self->readline())) {
 468                # M U gnupg-cvs-rep/AUTHORS
 469                # Updated gnupg-cvs-rep/
 470                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 471                # /AUTHORS/1.1///T1.1
 472                # u=rw,g=rw,o=rw
 473                # 0
 474                # ok
 475
 476                if ($line =~ s/^(?:Created|Updated) //) {
 477                        $line = $self->readline(); # path
 478                        $line = $self->readline(); # Entries line
 479                        my $mode = $self->readline(); chomp $mode;
 480                        $self->{'mode'} = $mode;
 481                        defined (my $cnt = $self->readline())
 482                                or die "EOF from server after 'Changed'\n";
 483                        chomp $cnt;
 484                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 485                        $line="";
 486                        $res = $self->_fetchfile($fh, $cnt);
 487                } elsif ($line =~ s/^ //) {
 488                        print $fh $line;
 489                        $res += length($line);
 490                } elsif ($line =~ /^M\b/) {
 491                        # output, do nothing
 492                } elsif ($line =~ /^Mbinary\b/) {
 493                        my $cnt;
 494                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 495                        chomp $cnt;
 496                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 497                        $line="";
 498                        $res += $self->_fetchfile($fh, $cnt);
 499                } else {
 500                        chomp $line;
 501                        if ($line eq "ok") {
 502                                # print STDERR "S: ok (".length($res).")\n";
 503                                return $res;
 504                        } elsif ($line =~ s/^E //) {
 505                                # print STDERR "S: $line\n";
 506                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 507                                $line = $self->readline(); # filename
 508                                $line = $self->readline(); # OK
 509                                chomp $line;
 510                                die "Unknown: $line" if $line ne "ok";
 511                                return -1;
 512                        } else {
 513                                die "Unknown: $line\n";
 514                        }
 515                }
 516        }
 517        return undef;
 518}
 519sub file {
 520        my ($self,$fn,$rev) = @_;
 521        my $res;
 522
 523        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 524                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 525
 526        $self->_file($fn,$rev) and $res = $self->_line($fh);
 527
 528        if (!defined $res) {
 529            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 530            truncate $fh, 0;
 531            $self->conn();
 532            $self->_file($fn,$rev) or die "No file command send";
 533            $res = $self->_line($fh);
 534            die "Retry failed" unless defined $res;
 535        }
 536        close ($fh);
 537
 538        return ($name, $res);
 539}
 540sub _fetchfile {
 541        my ($self, $fh, $cnt) = @_;
 542        my $res = 0;
 543        my $bufsize = 1024 * 1024;
 544        while ($cnt) {
 545            if ($bufsize > $cnt) {
 546                $bufsize = $cnt;
 547            }
 548            my $buf;
 549            my $num = $self->{'socketi'}->read($buf,$bufsize);
 550            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 551            print $fh $buf;
 552            $res += $num;
 553            $cnt -= $num;
 554        }
 555        return $res;
 556}
 557
 558sub _scramble {
 559        my ($self, $pass) = @_;
 560        my $scrambled = "A";
 561
 562        return $scrambled unless $pass;
 563
 564        my $pass_len = length($pass);
 565        my @pass_arr = split("", $pass);
 566        my $i;
 567
 568        # from cvs/src/scramble.c
 569        my @shifts = (
 570                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 571                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 572                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 573                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 574                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 575                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 576                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 577                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 578                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 579                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 580                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 581                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 582                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 583                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 584                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 585                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 586        );
 587
 588        for ($i = 0; $i < $pass_len; $i++) {
 589                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 590        }
 591
 592        return $scrambled;
 593}
 594
 595package main;
 596
 597my $cvs = CVSconn->new($opt_d, $cvs_tree);
 598
 599
 600sub pdate($) {
 601        my ($d) = @_;
 602        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 603                or die "Unparseable date: $d\n";
 604        my $y=$1; $y-=1900 if $y>1900;
 605        return timegm($6||0,$5,$4,$3,$2-1,$y);
 606}
 607
 608sub pmode($) {
 609        my ($mode) = @_;
 610        my $m = 0;
 611        my $mm = 0;
 612        my $um = 0;
 613        for my $x(split(//,$mode)) {
 614                if ($x eq ",") {
 615                        $m |= $mm&$um;
 616                        $mm = 0;
 617                        $um = 0;
 618                } elsif ($x eq "u") { $um |= 0700;
 619                } elsif ($x eq "g") { $um |= 0070;
 620                } elsif ($x eq "o") { $um |= 0007;
 621                } elsif ($x eq "r") { $mm |= 0444;
 622                } elsif ($x eq "w") { $mm |= 0222;
 623                } elsif ($x eq "x") { $mm |= 0111;
 624                } elsif ($x eq "=") { # do nothing
 625                } else { die "Unknown mode: $mode\n";
 626                }
 627        }
 628        $m |= $mm&$um;
 629        return $m;
 630}
 631
 632sub getwd() {
 633        my $pwd = `pwd`;
 634        chomp $pwd;
 635        return $pwd;
 636}
 637
 638sub is_sha1 {
 639        my $s = shift;
 640        return $s =~ /^[a-f0-9]{40}$/;
 641}
 642
 643sub get_headref ($) {
 644        my $name = shift;
 645        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 646        return undef unless $? == 0;
 647        chomp $r;
 648        return $r;
 649}
 650
 651my $user_filename_prepend = '';
 652sub munge_user_filename {
 653        my $name = shift;
 654        return File::Spec->file_name_is_absolute($name) ?
 655                $name :
 656                $user_filename_prepend . $name;
 657}
 658
 659-d $git_tree
 660        or mkdir($git_tree,0777)
 661        or die "Could not create $git_tree: $!";
 662if ($git_tree ne '.') {
 663        $user_filename_prepend = getwd() . '/';
 664        chdir($git_tree);
 665}
 666
 667my $last_branch = "";
 668my $orig_branch = "";
 669my %branch_date;
 670my $tip_at_start = undef;
 671
 672my $git_dir = $ENV{"GIT_DIR"} || ".git";
 673$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 674$ENV{"GIT_DIR"} = $git_dir;
 675my $orig_git_index;
 676$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 677
 678my %index; # holds filenames of one index per branch
 679
 680unless (-d $git_dir) {
 681        system(qw(git init));
 682        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 683        system(qw(git read-tree --empty));
 684        die "Cannot init an empty tree: $?\n" if $?;
 685
 686        $last_branch = $opt_o;
 687        $orig_branch = "";
 688} else {
 689        open(F, "-|", qw(git symbolic-ref HEAD)) or
 690                die "Cannot run git symbolic-ref: $!\n";
 691        chomp ($last_branch = <F>);
 692        $last_branch = basename($last_branch);
 693        close(F);
 694        unless ($last_branch) {
 695                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 696                $last_branch = "master";
 697        }
 698        $orig_branch = $last_branch;
 699        $tip_at_start = `git rev-parse --verify HEAD`;
 700
 701        # Get the last import timestamps
 702        my $fmt = '($ref, $author) = (%(refname), %(author));';
 703        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 704        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 705        while (defined(my $entry = <H>)) {
 706                my ($ref, $author);
 707                eval($entry) || die "cannot eval refs list: $@";
 708                my ($head) = ($ref =~ m|^$remote/(.*)|);
 709                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 710                $branch_date{$head} = $1;
 711        }
 712        close(H);
 713        if (!exists $branch_date{$opt_o}) {
 714                die "Branch '$opt_o' does not exist.\n".
 715                       "Either use the correct '-o branch' option,\n".
 716                       "or import to a new repository.\n";
 717        }
 718}
 719
 720-d $git_dir
 721        or die "Could not create git subdir ($git_dir).\n";
 722
 723# now we read (and possibly save) author-info as well
 724-f "$git_dir/cvs-authors" and
 725  read_author_info("$git_dir/cvs-authors");
 726if ($opt_A) {
 727        read_author_info(munge_user_filename($opt_A));
 728        write_author_info("$git_dir/cvs-authors");
 729}
 730
 731# open .git/cvs-revisions, if requested
 732open my $revision_map, '>>', "$git_dir/cvs-revisions"
 733    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 734        if defined $opt_R;
 735
 736
 737#
 738# run cvsps into a file unless we are getting
 739# it passed as a file via $opt_P
 740#
 741my $cvspsfile;
 742unless ($opt_P) {
 743        print "Running cvsps...\n" if $opt_v;
 744        my $pid = open(CVSPS,"-|");
 745        my $cvspsfh;
 746        die "Cannot fork: $!\n" unless defined $pid;
 747        unless ($pid) {
 748                my @opt;
 749                @opt = split(/,/,$opt_p) if defined $opt_p;
 750                unshift @opt, '-z', $opt_z if defined $opt_z;
 751                unshift @opt, '-q'         unless defined $opt_v;
 752                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 753                        push @opt, '--cvs-direct';
 754                }
 755                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 756                die "Could not start cvsps: $!\n";
 757        }
 758        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 759                                          DIR => File::Spec->tmpdir());
 760        while (<CVSPS>) {
 761            print $cvspsfh $_;
 762        }
 763        close CVSPS;
 764        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 765        close $cvspsfh;
 766} else {
 767        $cvspsfile = munge_user_filename($opt_P);
 768}
 769
 770open(CVS, "<$cvspsfile") or die $!;
 771
 772## cvsps output:
 773#---------------------
 774#PatchSet 314
 775#Date: 1999/09/18 13:03:59
 776#Author: wkoch
 777#Branch: STABLE-BRANCH-1-0
 778#Ancestor branch: HEAD
 779#Tag: (none)
 780#Log:
 781#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 782#Members:
 783#       README:1.57->1.57.2.1
 784#       VERSION:1.96->1.96.2.1
 785#
 786#---------------------
 787
 788my $state = 0;
 789
 790sub update_index (\@\@) {
 791        my $old = shift;
 792        my $new = shift;
 793        open(my $fh, '|-', qw(git update-index -z --index-info))
 794                or die "unable to open git update-index: $!";
 795        print $fh
 796                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 797                        @$old),
 798                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 799                        @$new)
 800                or die "unable to write to git update-index: $!";
 801        close $fh
 802                or die "unable to write to git update-index: $!";
 803        $? and die "git update-index reported error: $?";
 804}
 805
 806sub write_tree () {
 807        open(my $fh, '-|', qw(git write-tree))
 808                or die "unable to open git write-tree: $!";
 809        chomp(my $tree = <$fh>);
 810        is_sha1($tree)
 811                or die "Cannot get tree id ($tree): $!";
 812        close($fh)
 813                or die "Error running git write-tree: $?\n";
 814        print "Tree ID $tree\n" if $opt_v;
 815        return $tree;
 816}
 817
 818my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
 819my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 820
 821# commits that cvsps cannot place anywhere...
 822$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 823
 824sub commit {
 825        if ($branch eq $opt_o && !$index{branch} &&
 826                !get_headref("$remote/$branch")) {
 827            # looks like an initial commit
 828            # use the index primed by git init
 829            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 830            $index{$branch} = "$git_dir/index";
 831        } else {
 832            # use an index per branch to speed up
 833            # imports of projects with many branches
 834            unless ($index{$branch}) {
 835                $index{$branch} = tmpnam();
 836                $ENV{GIT_INDEX_FILE} = $index{$branch};
 837                if ($ancestor) {
 838                    system("git", "read-tree", "$remote/$ancestor");
 839                } else {
 840                    system("git", "read-tree", "$remote/$branch");
 841                }
 842                die "read-tree failed: $?\n" if $?;
 843            }
 844        }
 845        $ENV{GIT_INDEX_FILE} = $index{$branch};
 846
 847        update_index(@old, @new);
 848        @old = @new = ();
 849        my $tree = write_tree();
 850        my $parent = get_headref("$remote/$last_branch");
 851        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 852
 853        my @commit_args;
 854        push @commit_args, ("-p", $parent) if $parent;
 855
 856        # loose detection of merges
 857        # based on the commit msg
 858        foreach my $rx (@mergerx) {
 859                next unless $logmsg =~ $rx && $1;
 860                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 861                if (my $sha1 = get_headref("$remote/$mparent")) {
 862                        push @commit_args, '-p', "$remote/$mparent";
 863                        print "Merge parent branch: $mparent\n" if $opt_v;
 864                }
 865        }
 866
 867        set_timezone($author_tz);
 868        # $date is in the seconds since epoch format
 869        my $tz_offset = get_tz_offset($date);
 870        my $commit_date = "$date $tz_offset";
 871        set_timezone('UTC');
 872        $ENV{GIT_AUTHOR_NAME} = $author_name;
 873        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 874        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 875        $ENV{GIT_COMMITTER_NAME} = $author_name;
 876        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 877        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 878        my $pid = open2(my $commit_read, my $commit_write,
 879                'git', 'commit-tree', $tree, @commit_args);
 880
 881        # compatibility with git2cvs
 882        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 883        $logmsg =~ s/[\s\n]+\z//;
 884
 885        if (@skipped) {
 886            $logmsg .= "\n\n\nSKIPPED:\n\t";
 887            $logmsg .= join("\n\t", @skipped) . "\n";
 888            @skipped = ();
 889        }
 890
 891        print($commit_write "$logmsg\n") && close($commit_write)
 892                or die "Error writing to git commit-tree: $!\n";
 893
 894        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 895        chomp(my $cid = <$commit_read>);
 896        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 897        print "Commit ID $cid\n" if $opt_v;
 898        close($commit_read);
 899
 900        waitpid($pid,0);
 901        die "Error running git commit-tree: $?\n" if $?;
 902
 903        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 904                or die "Cannot write branch $branch for update: $!\n";
 905
 906        if ($revision_map) {
 907                print $revision_map "@$_ $cid\n" for @commit_revisions;
 908        }
 909        @commit_revisions = ();
 910
 911        if ($tag) {
 912                my ($xtag) = $tag;
 913                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 914                $xtag =~ tr/_/\./ if ( $opt_u );
 915                $xtag =~ s/[\/]/$opt_s/g;
 916
 917                # See refs.c for these rules.
 918                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 919                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 920                # Other bad strings for tags:
 921                # (See check_refname_component in refs.c.)
 922                1 while $xtag =~ s/
 923                        (?: \.\.        # Tag cannot contain '..'.
 924                        |   \@{         # Tag cannot contain '@{'.
 925                        | ^ -           # Tag cannot begin with '-'.
 926                        |   \.lock $    # Tag cannot end with '.lock'.
 927                        | ^ \.          # Tag cannot begin...
 928                        |   \. $        # ...or end with '.'
 929                        )//xg;
 930                # Tag cannot be empty.
 931                if ($xtag eq '') {
 932                        warn("warning: ignoring tag '$tag'",
 933                        " with invalid tagname\n");
 934                        return;
 935                }
 936
 937                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 938                        # We did our best to sanitize the tag, but still failed
 939                        # for whatever reason. Bail out, and give the user
 940                        # enough information to understand if/how we should
 941                        # improve the translation in the future.
 942                        if ($tag ne $xtag) {
 943                                print "Translated '$tag' tag to '$xtag'\n";
 944                        }
 945                        die "Cannot create tag $xtag: $!\n";
 946                }
 947
 948                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 949        }
 950};
 951
 952my $commitcount = 1;
 953while (<CVS>) {
 954        chomp;
 955        if ($state == 0 and /^-+$/) {
 956                $state = 1;
 957        } elsif ($state == 0) {
 958                $state = 1;
 959                redo;
 960        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 961                $patchset = 0+$_;
 962                $state=2;
 963        } elsif ($state == 2 and s/^Date:\s+//) {
 964                $date = pdate($_);
 965                unless ($date) {
 966                        print STDERR "Could not parse date: $_\n";
 967                        $state=0;
 968                        next;
 969                }
 970                $state=3;
 971        } elsif ($state == 3 and s/^Author:\s+//) {
 972                $author_tz = "UTC";
 973                s/\s+$//;
 974                if (/^(.*?)\s+<(.*)>/) {
 975                    ($author_name, $author_email) = ($1, $2);
 976                } elsif ($conv_author_name{$_}) {
 977                        $author_name = $conv_author_name{$_};
 978                        $author_email = $conv_author_email{$_};
 979                        $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
 980                } else {
 981                    $author_name = $author_email = $_;
 982                }
 983                $state = 4;
 984        } elsif ($state == 4 and s/^Branch:\s+//) {
 985                s/\s+$//;
 986                tr/_/\./ if ( $opt_u );
 987                s/[\/]/$opt_s/g;
 988                $branch = $_;
 989                $state = 5;
 990        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 991                s/\s+$//;
 992                $ancestor = $_;
 993                $ancestor = $opt_o if $ancestor eq "HEAD";
 994                $state = 6;
 995        } elsif ($state == 5) {
 996                $ancestor = undef;
 997                $state = 6;
 998                redo;
 999        } elsif ($state == 6 and s/^Tag:\s+//) {
1000                s/\s+$//;
1001                if ($_ eq "(none)") {
1002                        $tag = undef;
1003                } else {
1004                        $tag = $_;
1005                }
1006                $state = 7;
1007        } elsif ($state == 7 and /^Log:/) {
1008                $logmsg = "";
1009                $state = 8;
1010        } elsif ($state == 8 and /^Members:/) {
1011                $branch = $opt_o if $branch eq "HEAD";
1012                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1013                        # skip
1014                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1015                        $state = 11;
1016                        next;
1017                }
1018                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1019                        # skip if the commit is too recent
1020                        # given that the cvsps default fuzz is 300s, we give ourselves another
1021                        # 300s just in case -- this also prevents skipping commits
1022                        # due to server clock drift
1023                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
1024                        $state = 11;
1025                        next;
1026                }
1027                if (exists $ignorebranch{$branch}) {
1028                        print STDERR "Skipping $branch\n";
1029                        $state = 11;
1030                        next;
1031                }
1032                if ($ancestor) {
1033                        if ($ancestor eq $branch) {
1034                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1035                                $ancestor = $opt_o;
1036                        }
1037                        if (defined get_headref("$remote/$branch")) {
1038                                print STDERR "Branch $branch already exists!\n";
1039                                $state=11;
1040                                next;
1041                        }
1042                        my $id = get_headref("$remote/$ancestor");
1043                        if (!$id) {
1044                                print STDERR "Branch $ancestor does not exist!\n";
1045                                $ignorebranch{$branch} = 1;
1046                                $state=11;
1047                                next;
1048                        }
1049
1050                        system(qw(git update-ref -m cvsimport),
1051                                "$remote/$branch", $id);
1052                        if($? != 0) {
1053                                print STDERR "Could not create branch $branch\n";
1054                                $ignorebranch{$branch} = 1;
1055                                $state=11;
1056                                next;
1057                        }
1058                }
1059                $last_branch = $branch if $branch ne $last_branch;
1060                $state = 9;
1061        } elsif ($state == 8) {
1062                $logmsg .= "$_\n";
1063        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1064#       VERSION:1.96->1.96.2.1
1065                my $init = ($2 eq "INITIAL");
1066                my $fn = $1;
1067                my $rev = $3;
1068                $fn =~ s#^/+##;
1069                if ($opt_S && $fn =~ m/$opt_S/) {
1070                    print "SKIPPING $fn v $rev\n";
1071                    push(@skipped, $fn);
1072                    next;
1073                }
1074                push @commit_revisions, [$fn, $rev];
1075                print "Fetching $fn   v $rev\n" if $opt_v;
1076                my ($tmpname, $size) = $cvs->file($fn,$rev);
1077                if ($size == -1) {
1078                        push(@old,$fn);
1079                        print "Drop $fn\n" if $opt_v;
1080                } else {
1081                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1082                        my $pid = open(my $F, '-|');
1083                        die $! unless defined $pid;
1084                        if (!$pid) {
1085                            exec("git", "hash-object", "-w", $tmpname)
1086                                or die "Cannot create object: $!\n";
1087                        }
1088                        my $sha = <$F>;
1089                        chomp $sha;
1090                        close $F;
1091                        my $mode = pmode($cvs->{'mode'});
1092                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1093                }
1094                unlink($tmpname);
1095        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1096                my $fn = $1;
1097                my $rev = $2;
1098                $fn =~ s#^/+##;
1099                push @commit_revisions, [$fn, $rev];
1100                push(@old,$fn);
1101                print "Delete $fn\n" if $opt_v;
1102        } elsif ($state == 9 and /^\s*$/) {
1103                $state = 10;
1104        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1105                $commitcount++;
1106                if ($opt_L && $commitcount > $opt_L) {
1107                        last;
1108                }
1109                commit();
1110                if (($commitcount & 1023) == 0) {
1111                        system(qw(git repack -a -d));
1112                }
1113                $state = 1;
1114        } elsif ($state == 11 and /^-+$/) {
1115                $state = 1;
1116        } elsif (/^-+$/) { # end of unknown-line processing
1117                $state = 1;
1118        } elsif ($state != 11) { # ignore stuff when skipping
1119                print STDERR "* UNKNOWN LINE * $_\n";
1120        }
1121}
1122commit() if $branch and $state != 11;
1123
1124unless ($opt_P) {
1125        unlink($cvspsfile);
1126}
1127
1128# The heuristic of repacking every 1024 commits can leave a
1129# lot of unpacked data.  If there is more than 1MB worth of
1130# not-packed objects, repack once more.
1131my $line = `git count-objects`;
1132if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1133  my ($n_objects, $kb) = ($1, $2);
1134  1024 < $kb
1135    and system(qw(git repack -a -d));
1136}
1137
1138foreach my $git_index (values %index) {
1139    if ($git_index ne "$git_dir/index") {
1140        unlink($git_index);
1141    }
1142}
1143
1144if (defined $orig_git_index) {
1145        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1146} else {
1147        delete $ENV{GIT_INDEX_FILE};
1148}
1149
1150# Now switch back to the branch we were in before all of this happened
1151if ($orig_branch) {
1152        print "DONE.\n" if $opt_v;
1153        if ($opt_i) {
1154                exit 0;
1155        }
1156        my $tip_at_end = `git rev-parse --verify HEAD`;
1157        if ($tip_at_start ne $tip_at_end) {
1158                for ($tip_at_start, $tip_at_end) { chomp; }
1159                print "Fetched into the current branch.\n" if $opt_v;
1160                system(qw(git read-tree -u -m),
1161                       $tip_at_start, $tip_at_end);
1162                die "Fast-forward update failed: $?\n" if $?;
1163        }
1164        else {
1165                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1166                die "Could not merge $opt_o into the current branch.\n" if $?;
1167        }
1168} else {
1169        $orig_branch = "master";
1170        print "DONE; creating $orig_branch branch\n" if $opt_v;
1171        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1172                unless defined get_headref('refs/heads/master');
1173        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1174                if ($opt_r && $opt_o ne 'HEAD');
1175        system('git', 'update-ref', 'HEAD', "$orig_branch");
1176        unless ($opt_i) {
1177                system(qw(git checkout -f));
1178                die "checkout failed: $?\n" if $?;
1179        }
1180}