git-cvsimport.perlon commit trace: measure where the time is spent in the index-heavy operations (ca54d9b)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime tzset dup2 ENOENT);
  28use IPC::Open2;
  29use Git qw(get_tz_offset);
  30
  31$SIG{'PIPE'}="IGNORE";
  32set_timezone('UTC');
  33
  34our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  35my (%conv_author_name, %conv_author_email, %conv_author_tz);
  36
  37sub usage(;$) {
  38        my $msg = shift;
  39        print(STDERR "Error: $msg\n") if $msg;
  40        print STDERR <<END;
  41usage: git cvsimport     # fetch/update GIT from CVS
  42       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  43       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  44       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  45       [-r remote] [-R] [CVS_module]
  46END
  47        exit(1);
  48}
  49
  50sub read_author_info($) {
  51        my ($file) = @_;
  52        my $user;
  53        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  54
  55        while (<$f>) {
  56                # Expected format is this:
  57                #   exon=Andreas Ericsson <ae@op5.se>
  58                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  59                        $user = $1;
  60                        $conv_author_name{$user} = $2;
  61                        $conv_author_email{$user} = $3;
  62                }
  63                # or with an optional timezone:
  64                #   spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
  65                elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
  66                        $user = $1;
  67                        $conv_author_name{$user} = $2;
  68                        $conv_author_email{$user} = $3;
  69                        $conv_author_tz{$user} = $4;
  70                }
  71                # However, we also read from CVSROOT/users format
  72                # to ease migration.
  73                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  74                        my $mapped;
  75                        ($user, $mapped) = ($1, $3);
  76                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  77                                $conv_author_name{$user} = $1;
  78                                $conv_author_email{$user} = $2;
  79                        }
  80                        elsif ($mapped =~ /^<?(.*)>?$/) {
  81                                $conv_author_name{$user} = $user;
  82                                $conv_author_email{$user} = $1;
  83                        }
  84                }
  85                # NEEDSWORK: Maybe warn on unrecognized lines?
  86        }
  87        close ($f);
  88}
  89
  90sub write_author_info($) {
  91        my ($file) = @_;
  92        open my $f, '>', $file or
  93          die("Failed to open $file for writing: $!");
  94
  95        foreach (keys %conv_author_name) {
  96                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
  97                print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
  98                print $f "\n";
  99        }
 100        close ($f);
 101}
 102
 103# Versions of perl before 5.10.0 may not automatically check $TZ each
 104# time localtime is run (most platforms will do so only the first time).
 105# We can work around this by using tzset() to update the internal
 106# variable whenever we change the environment.
 107sub set_timezone {
 108        $ENV{TZ} = shift;
 109        tzset();
 110}
 111
 112# convert getopts specs for use by git config
 113my %longmap = (
 114        'A:' => 'authors-file',
 115        'M:' => 'merge-regex',
 116        'P:' => undef,
 117        'R' => 'track-revisions',
 118        'S:' => 'ignore-paths',
 119);
 120
 121sub read_repo_config {
 122        # Split the string between characters, unless there is a ':'
 123        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 124        my @opts = split(/ *(?!:)/, shift);
 125        foreach my $o (@opts) {
 126                my $key = $o;
 127                $key =~ s/://g;
 128                my $arg = 'git config';
 129                $arg .= ' --bool' if ($o !~ /:$/);
 130                my $ckey = $key;
 131
 132                if (exists $longmap{$o}) {
 133                        # An uppercase option like -R cannot be
 134                        # expressed in the configuration, as the
 135                        # variable names are downcased.
 136                        $ckey = $longmap{$o};
 137                        next if (! defined $ckey);
 138                        $ckey =~ s/-//g;
 139                }
 140                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 141                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 142                        no strict 'refs';
 143                        my $opt_name = "opt_" . $key;
 144                        if (!$$opt_name) {
 145                                $$opt_name = $tmp;
 146                        }
 147                }
 148        }
 149}
 150
 151my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 152read_repo_config($opts);
 153Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 154
 155# turn the Getopt::Std specification in a Getopt::Long one,
 156# with support for multiple -M options
 157GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 158    or usage();
 159usage if $opt_h;
 160
 161if (@ARGV == 0) {
 162                chomp(my $module = `git config --get cvsimport.module`);
 163                push(@ARGV, $module) if $? == 0;
 164}
 165@ARGV <= 1 or usage("You can't specify more than one CVS module");
 166
 167if ($opt_d) {
 168        $ENV{"CVSROOT"} = $opt_d;
 169} elsif (-f 'CVS/Root') {
 170        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 171        $opt_d = <$f>;
 172        chomp $opt_d;
 173        close $f;
 174        $ENV{"CVSROOT"} = $opt_d;
 175} elsif ($ENV{"CVSROOT"}) {
 176        $opt_d = $ENV{"CVSROOT"};
 177} else {
 178        usage("CVSROOT needs to be set");
 179}
 180$opt_s ||= "-";
 181$opt_a ||= 0;
 182
 183my $git_tree = $opt_C;
 184$git_tree ||= ".";
 185
 186my $remote;
 187if (defined $opt_r) {
 188        $remote = 'refs/remotes/' . $opt_r;
 189        $opt_o ||= "master";
 190} else {
 191        $opt_o ||= "origin";
 192        $remote = 'refs/heads';
 193}
 194
 195my $cvs_tree;
 196if ($#ARGV == 0) {
 197        $cvs_tree = $ARGV[0];
 198} elsif (-f 'CVS/Repository') {
 199        open my $f, '<', 'CVS/Repository' or
 200            die 'Failed to open CVS/Repository';
 201        $cvs_tree = <$f>;
 202        chomp $cvs_tree;
 203        close $f;
 204} else {
 205        usage("CVS module has to be specified");
 206}
 207
 208our @mergerx = ();
 209if ($opt_m) {
 210        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 211}
 212if (@opt_M) {
 213        push (@mergerx, map { qr/$_/ } @opt_M);
 214}
 215
 216# Remember UTC of our starting time
 217# we'll want to avoid importing commits
 218# that are too recent
 219our $starttime = time();
 220
 221select(STDERR); $|=1; select(STDOUT);
 222
 223
 224package CVSconn;
 225# Basic CVS dialog.
 226# We're only interested in connecting and downloading, so ...
 227
 228use File::Spec;
 229use File::Temp qw(tempfile);
 230use POSIX qw(strftime dup2);
 231
 232sub new {
 233        my ($what,$repo,$subdir) = @_;
 234        $what=ref($what) if ref($what);
 235
 236        my $self = {};
 237        $self->{'buffer'} = "";
 238        bless($self,$what);
 239
 240        $repo =~ s#/+$##;
 241        $self->{'fullrep'} = $repo;
 242        $self->conn();
 243
 244        $self->{'subdir'} = $subdir;
 245        $self->{'lines'} = undef;
 246
 247        return $self;
 248}
 249
 250sub find_password_entry {
 251        my ($cvspass, @cvsroot) = @_;
 252        my ($file, $delim) = @$cvspass;
 253        my $pass;
 254        local ($_);
 255
 256        if (open(my $fh, $file)) {
 257                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 258                CVSPASSFILE:
 259                while (<$fh>) {
 260                        chomp;
 261                        s/^\/\d+\s+//;
 262                        my ($w, $p) = split($delim,$_,2);
 263                        for my $cvsroot (@cvsroot) {
 264                                if ($w eq $cvsroot) {
 265                                        $pass = $p;
 266                                        last CVSPASSFILE;
 267                                }
 268                        }
 269                }
 270                close($fh);
 271        }
 272        return $pass;
 273}
 274
 275sub conn {
 276        my $self = shift;
 277        my $repo = $self->{'fullrep'};
 278        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 279                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 280
 281                my ($proxyhost,$proxyport);
 282                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 283                        $proxyhost = $1;
 284                        # Default proxyport, if not specified, is 8080.
 285                        $proxyport = 8080;
 286                        if ($ENV{"CVS_PROXY_PORT"}) {
 287                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 288                        }
 289                        if ($param =~ m/proxyport=([^;]+)/) {
 290                                $proxyport = $1;
 291                        }
 292                }
 293                $repo ||= '/';
 294
 295                # if username is not explicit in CVSROOT, then use current user, as cvs would
 296                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 297                my $rr2 = "-";
 298                unless ($port) {
 299                        $rr2 = ":pserver:$user\@$serv:$repo";
 300                        $port=2401;
 301                }
 302                my $rr = ":pserver:$user\@$serv:$port$repo";
 303
 304                if ($pass) {
 305                        $pass = $self->_scramble($pass);
 306                } else {
 307                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 308                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 309                        my @loc = ();
 310                        foreach my $cvspass (@cvspass) {
 311                                my $p = find_password_entry($cvspass, $rr, $rr2);
 312                                if ($p) {
 313                                        push @loc, $cvspass->[0];
 314                                        $pass = $p;
 315                                }
 316                        }
 317
 318                        if (1 < @loc) {
 319                                die("Multiple cvs password files have ".
 320                                    "entries for CVSROOT $opt_d: @loc");
 321                        } elsif (!$pass) {
 322                                $pass = "A";
 323                        }
 324                }
 325
 326                my ($s, $rep);
 327                if ($proxyhost) {
 328
 329                        # Use a HTTP Proxy. Only works for HTTP proxies that
 330                        # don't require user authentication
 331                        #
 332                        # See: http://www.ietf.org/rfc/rfc2817.txt
 333
 334                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 335                        die "Socket to $proxyhost: $!\n" unless defined $s;
 336                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 337                                or die "Write to $proxyhost: $!\n";
 338                        $s->flush();
 339
 340                        $rep = <$s>;
 341
 342                        # The answer should look like 'HTTP/1.x 2yy ....'
 343                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 344                                die "Proxy connect: $rep\n";
 345                        }
 346                        # Skip up to the empty line of the proxy server output
 347                        # including the response headers.
 348                        while ($rep = <$s>) {
 349                                last if (!defined $rep ||
 350                                         $rep eq "\n" ||
 351                                         $rep eq "\r\n");
 352                        }
 353                } else {
 354                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 355                        die "Socket to $serv: $!\n" unless defined $s;
 356                }
 357
 358                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 359                        or die "Write to $serv: $!\n";
 360                $s->flush();
 361
 362                $rep = <$s>;
 363
 364                if ($rep ne "I LOVE YOU\n") {
 365                        $rep="<unknown>" unless $rep;
 366                        die "AuthReply: $rep\n";
 367                }
 368                $self->{'socketo'} = $s;
 369                $self->{'socketi'} = $s;
 370        } else { # local or ext: Fork off our own cvs server.
 371                my $pr = IO::Pipe->new();
 372                my $pw = IO::Pipe->new();
 373                my $pid = fork();
 374                die "Fork: $!\n" unless defined $pid;
 375                my $cvs = 'cvs';
 376                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 377                my $rsh = 'rsh';
 378                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 379
 380                my @cvs = ($cvs, 'server');
 381                my ($local, $user, $host);
 382                $local = $repo =~ s/:local://;
 383                if (!$local) {
 384                    $repo =~ s/:ext://;
 385                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 386                    ($user, $host) = ($1, $2);
 387                }
 388                if (!$local) {
 389                    if ($user) {
 390                        unshift @cvs, $rsh, '-l', $user, $host;
 391                    } else {
 392                        unshift @cvs, $rsh, $host;
 393                    }
 394                }
 395
 396                unless ($pid) {
 397                        $pr->writer();
 398                        $pw->reader();
 399                        dup2($pw->fileno(),0);
 400                        dup2($pr->fileno(),1);
 401                        $pr->close();
 402                        $pw->close();
 403                        exec(@cvs);
 404                }
 405                $pw->writer();
 406                $pr->reader();
 407                $self->{'socketo'} = $pw;
 408                $self->{'socketi'} = $pr;
 409        }
 410        $self->{'socketo'}->write("Root $repo\n");
 411
 412        # Trial and error says that this probably is the minimum set
 413        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 414
 415        $self->{'socketo'}->write("valid-requests\n");
 416        $self->{'socketo'}->flush();
 417
 418        my $rep=$self->readline();
 419        die "Failed to read from server" unless defined $rep;
 420        chomp($rep);
 421        if ($rep !~ s/^Valid-requests\s*//) {
 422                $rep="<unknown>" unless $rep;
 423                die "Expected Valid-requests from server, but got: $rep\n";
 424        }
 425        chomp(my $res=$self->readline());
 426        die "validReply: $res\n" if $res ne "ok";
 427
 428        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 429        $self->{'repo'} = $repo;
 430}
 431
 432sub readline {
 433        my ($self) = @_;
 434        return $self->{'socketi'}->getline();
 435}
 436
 437sub _file {
 438        # Request a file with a given revision.
 439        # Trial and error says this is a good way to do it. :-/
 440        my ($self,$fn,$rev) = @_;
 441        $self->{'socketo'}->write("Argument -N\n") or return undef;
 442        $self->{'socketo'}->write("Argument -P\n") or return undef;
 443        # -kk: Linus' version doesn't use it - defaults to off
 444        if ($opt_k) {
 445            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 446        }
 447        $self->{'socketo'}->write("Argument -r\n") or return undef;
 448        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 449        $self->{'socketo'}->write("Argument --\n") or return undef;
 450        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 451        $self->{'socketo'}->write("Directory .\n") or return undef;
 452        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 453        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 454        $self->{'socketo'}->write("co\n") or return undef;
 455        $self->{'socketo'}->flush() or return undef;
 456        $self->{'lines'} = 0;
 457        return 1;
 458}
 459sub _line {
 460        # Read a line from the server.
 461        # ... except that 'line' may be an entire file. ;-)
 462        my ($self, $fh) = @_;
 463        die "Not in lines" unless defined $self->{'lines'};
 464
 465        my $line;
 466        my $res=0;
 467        while (defined($line = $self->readline())) {
 468                # M U gnupg-cvs-rep/AUTHORS
 469                # Updated gnupg-cvs-rep/
 470                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 471                # /AUTHORS/1.1///T1.1
 472                # u=rw,g=rw,o=rw
 473                # 0
 474                # ok
 475
 476                if ($line =~ s/^(?:Created|Updated) //) {
 477                        $line = $self->readline(); # path
 478                        $line = $self->readline(); # Entries line
 479                        my $mode = $self->readline(); chomp $mode;
 480                        $self->{'mode'} = $mode;
 481                        defined (my $cnt = $self->readline())
 482                                or die "EOF from server after 'Changed'\n";
 483                        chomp $cnt;
 484                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 485                        $line="";
 486                        $res = $self->_fetchfile($fh, $cnt);
 487                } elsif ($line =~ s/^ //) {
 488                        print $fh $line;
 489                        $res += length($line);
 490                } elsif ($line =~ /^M\b/) {
 491                        # output, do nothing
 492                } elsif ($line =~ /^Mbinary\b/) {
 493                        my $cnt;
 494                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 495                        chomp $cnt;
 496                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 497                        $line="";
 498                        $res += $self->_fetchfile($fh, $cnt);
 499                } else {
 500                        chomp $line;
 501                        if ($line eq "ok") {
 502                                # print STDERR "S: ok (".length($res).")\n";
 503                                return $res;
 504                        } elsif ($line =~ s/^E //) {
 505                                # print STDERR "S: $line\n";
 506                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 507                                $line = $self->readline(); # filename
 508                                $line = $self->readline(); # OK
 509                                chomp $line;
 510                                die "Unknown: $line" if $line ne "ok";
 511                                return -1;
 512                        } else {
 513                                die "Unknown: $line\n";
 514                        }
 515                }
 516        }
 517        return undef;
 518}
 519sub file {
 520        my ($self,$fn,$rev) = @_;
 521        my $res;
 522
 523        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 524                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 525
 526        $self->_file($fn,$rev) and $res = $self->_line($fh);
 527
 528        if (!defined $res) {
 529            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 530            truncate $fh, 0;
 531            $self->conn();
 532            $self->_file($fn,$rev) or die "No file command send";
 533            $res = $self->_line($fh);
 534            die "Retry failed" unless defined $res;
 535        }
 536        close ($fh);
 537
 538        return ($name, $res);
 539}
 540sub _fetchfile {
 541        my ($self, $fh, $cnt) = @_;
 542        my $res = 0;
 543        my $bufsize = 1024 * 1024;
 544        while ($cnt) {
 545            if ($bufsize > $cnt) {
 546                $bufsize = $cnt;
 547            }
 548            my $buf;
 549            my $num = $self->{'socketi'}->read($buf,$bufsize);
 550            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 551            print $fh $buf;
 552            $res += $num;
 553            $cnt -= $num;
 554        }
 555        return $res;
 556}
 557
 558sub _scramble {
 559        my ($self, $pass) = @_;
 560        my $scrambled = "A";
 561
 562        return $scrambled unless $pass;
 563
 564        my $pass_len = length($pass);
 565        my @pass_arr = split("", $pass);
 566        my $i;
 567
 568        # from cvs/src/scramble.c
 569        my @shifts = (
 570                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 571                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 572                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 573                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 574                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 575                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 576                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 577                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 578                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 579                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 580                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 581                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 582                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 583                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 584                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 585                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 586        );
 587
 588        for ($i = 0; $i < $pass_len; $i++) {
 589                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 590        }
 591
 592        return $scrambled;
 593}
 594
 595package main;
 596
 597my $cvs = CVSconn->new($opt_d, $cvs_tree);
 598
 599
 600sub pdate($) {
 601        my ($d) = @_;
 602        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 603                or die "Unparseable date: $d\n";
 604        my $y=$1; $y-=1900 if $y>1900;
 605        return timegm($6||0,$5,$4,$3,$2-1,$y);
 606}
 607
 608sub pmode($) {
 609        my ($mode) = @_;
 610        my $m = 0;
 611        my $mm = 0;
 612        my $um = 0;
 613        for my $x(split(//,$mode)) {
 614                if ($x eq ",") {
 615                        $m |= $mm&$um;
 616                        $mm = 0;
 617                        $um = 0;
 618                } elsif ($x eq "u") { $um |= 0700;
 619                } elsif ($x eq "g") { $um |= 0070;
 620                } elsif ($x eq "o") { $um |= 0007;
 621                } elsif ($x eq "r") { $mm |= 0444;
 622                } elsif ($x eq "w") { $mm |= 0222;
 623                } elsif ($x eq "x") { $mm |= 0111;
 624                } elsif ($x eq "=") { # do nothing
 625                } else { die "Unknown mode: $mode\n";
 626                }
 627        }
 628        $m |= $mm&$um;
 629        return $m;
 630}
 631
 632sub getwd() {
 633        my $pwd = `pwd`;
 634        chomp $pwd;
 635        return $pwd;
 636}
 637
 638sub is_sha1 {
 639        my $s = shift;
 640        return $s =~ /^[a-f0-9]{40}$/;
 641}
 642
 643sub get_headref ($) {
 644        my $name = shift;
 645        $name =~ s/'/'\\''/g;
 646        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 647        return undef unless $? == 0;
 648        chomp $r;
 649        return $r;
 650}
 651
 652my $user_filename_prepend = '';
 653sub munge_user_filename {
 654        my $name = shift;
 655        return File::Spec->file_name_is_absolute($name) ?
 656                $name :
 657                $user_filename_prepend . $name;
 658}
 659
 660-d $git_tree
 661        or mkdir($git_tree,0777)
 662        or die "Could not create $git_tree: $!";
 663if ($git_tree ne '.') {
 664        $user_filename_prepend = getwd() . '/';
 665        chdir($git_tree);
 666}
 667
 668my $last_branch = "";
 669my $orig_branch = "";
 670my %branch_date;
 671my $tip_at_start = undef;
 672
 673my $git_dir = $ENV{"GIT_DIR"} || ".git";
 674$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 675$ENV{"GIT_DIR"} = $git_dir;
 676my $orig_git_index;
 677$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 678
 679my %index; # holds filenames of one index per branch
 680
 681unless (-d $git_dir) {
 682        system(qw(git init));
 683        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 684        system(qw(git read-tree --empty));
 685        die "Cannot init an empty tree: $?\n" if $?;
 686
 687        $last_branch = $opt_o;
 688        $orig_branch = "";
 689} else {
 690        open(F, "-|", qw(git symbolic-ref HEAD)) or
 691                die "Cannot run git symbolic-ref: $!\n";
 692        chomp ($last_branch = <F>);
 693        $last_branch = basename($last_branch);
 694        close(F);
 695        unless ($last_branch) {
 696                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 697                $last_branch = "master";
 698        }
 699        $orig_branch = $last_branch;
 700        $tip_at_start = `git rev-parse --verify HEAD`;
 701
 702        # Get the last import timestamps
 703        my $fmt = '($ref, $author) = (%(refname), %(author));';
 704        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 705        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 706        while (defined(my $entry = <H>)) {
 707                my ($ref, $author);
 708                eval($entry) || die "cannot eval refs list: $@";
 709                my ($head) = ($ref =~ m|^$remote/(.*)|);
 710                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 711                $branch_date{$head} = $1;
 712        }
 713        close(H);
 714        if (!exists $branch_date{$opt_o}) {
 715                die "Branch '$opt_o' does not exist.\n".
 716                       "Either use the correct '-o branch' option,\n".
 717                       "or import to a new repository.\n";
 718        }
 719}
 720
 721-d $git_dir
 722        or die "Could not create git subdir ($git_dir).\n";
 723
 724# now we read (and possibly save) author-info as well
 725-f "$git_dir/cvs-authors" and
 726  read_author_info("$git_dir/cvs-authors");
 727if ($opt_A) {
 728        read_author_info(munge_user_filename($opt_A));
 729        write_author_info("$git_dir/cvs-authors");
 730}
 731
 732# open .git/cvs-revisions, if requested
 733open my $revision_map, '>>', "$git_dir/cvs-revisions"
 734    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 735        if defined $opt_R;
 736
 737
 738#
 739# run cvsps into a file unless we are getting
 740# it passed as a file via $opt_P
 741#
 742my $cvspsfile;
 743unless ($opt_P) {
 744        print "Running cvsps...\n" if $opt_v;
 745        my $pid = open(CVSPS,"-|");
 746        my $cvspsfh;
 747        die "Cannot fork: $!\n" unless defined $pid;
 748        unless ($pid) {
 749                my @opt;
 750                @opt = split(/,/,$opt_p) if defined $opt_p;
 751                unshift @opt, '-z', $opt_z if defined $opt_z;
 752                unshift @opt, '-q'         unless defined $opt_v;
 753                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 754                        push @opt, '--cvs-direct';
 755                }
 756                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 757                die "Could not start cvsps: $!\n";
 758        }
 759        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 760                                          DIR => File::Spec->tmpdir());
 761        while (<CVSPS>) {
 762            print $cvspsfh $_;
 763        }
 764        close CVSPS;
 765        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 766        close $cvspsfh;
 767} else {
 768        $cvspsfile = munge_user_filename($opt_P);
 769}
 770
 771open(CVS, "<$cvspsfile") or die $!;
 772
 773## cvsps output:
 774#---------------------
 775#PatchSet 314
 776#Date: 1999/09/18 13:03:59
 777#Author: wkoch
 778#Branch: STABLE-BRANCH-1-0
 779#Ancestor branch: HEAD
 780#Tag: (none)
 781#Log:
 782#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 783#Members:
 784#       README:1.57->1.57.2.1
 785#       VERSION:1.96->1.96.2.1
 786#
 787#---------------------
 788
 789my $state = 0;
 790
 791sub update_index (\@\@) {
 792        my $old = shift;
 793        my $new = shift;
 794        open(my $fh, '|-', qw(git update-index -z --index-info))
 795                or die "unable to open git update-index: $!";
 796        print $fh
 797                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 798                        @$old),
 799                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 800                        @$new)
 801                or die "unable to write to git update-index: $!";
 802        close $fh
 803                or die "unable to write to git update-index: $!";
 804        $? and die "git update-index reported error: $?";
 805}
 806
 807sub write_tree () {
 808        open(my $fh, '-|', qw(git write-tree))
 809                or die "unable to open git write-tree: $!";
 810        chomp(my $tree = <$fh>);
 811        is_sha1($tree)
 812                or die "Cannot get tree id ($tree): $!";
 813        close($fh)
 814                or die "Error running git write-tree: $?\n";
 815        print "Tree ID $tree\n" if $opt_v;
 816        return $tree;
 817}
 818
 819my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
 820my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 821
 822# commits that cvsps cannot place anywhere...
 823$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 824
 825sub commit {
 826        if ($branch eq $opt_o && !$index{branch} &&
 827                !get_headref("$remote/$branch")) {
 828            # looks like an initial commit
 829            # use the index primed by git init
 830            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 831            $index{$branch} = "$git_dir/index";
 832        } else {
 833            # use an index per branch to speed up
 834            # imports of projects with many branches
 835            unless ($index{$branch}) {
 836                $index{$branch} = tmpnam();
 837                $ENV{GIT_INDEX_FILE} = $index{$branch};
 838                if ($ancestor) {
 839                    system("git", "read-tree", "$remote/$ancestor");
 840                } else {
 841                    system("git", "read-tree", "$remote/$branch");
 842                }
 843                die "read-tree failed: $?\n" if $?;
 844            }
 845        }
 846        $ENV{GIT_INDEX_FILE} = $index{$branch};
 847
 848        update_index(@old, @new);
 849        @old = @new = ();
 850        my $tree = write_tree();
 851        my $parent = get_headref("$remote/$last_branch");
 852        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 853
 854        my @commit_args;
 855        push @commit_args, ("-p", $parent) if $parent;
 856
 857        # loose detection of merges
 858        # based on the commit msg
 859        foreach my $rx (@mergerx) {
 860                next unless $logmsg =~ $rx && $1;
 861                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 862                if (my $sha1 = get_headref("$remote/$mparent")) {
 863                        push @commit_args, '-p', "$remote/$mparent";
 864                        print "Merge parent branch: $mparent\n" if $opt_v;
 865                }
 866        }
 867
 868        set_timezone($author_tz);
 869        # $date is in the seconds since epoch format
 870        my $tz_offset = get_tz_offset($date);
 871        my $commit_date = "$date $tz_offset";
 872        set_timezone('UTC');
 873        $ENV{GIT_AUTHOR_NAME} = $author_name;
 874        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 875        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 876        $ENV{GIT_COMMITTER_NAME} = $author_name;
 877        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 878        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 879        my $pid = open2(my $commit_read, my $commit_write,
 880                'git', 'commit-tree', $tree, @commit_args);
 881
 882        # compatibility with git2cvs
 883        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 884        $logmsg =~ s/[\s\n]+\z//;
 885
 886        if (@skipped) {
 887            $logmsg .= "\n\n\nSKIPPED:\n\t";
 888            $logmsg .= join("\n\t", @skipped) . "\n";
 889            @skipped = ();
 890        }
 891
 892        print($commit_write "$logmsg\n") && close($commit_write)
 893                or die "Error writing to git commit-tree: $!\n";
 894
 895        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 896        chomp(my $cid = <$commit_read>);
 897        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 898        print "Commit ID $cid\n" if $opt_v;
 899        close($commit_read);
 900
 901        waitpid($pid,0);
 902        die "Error running git commit-tree: $?\n" if $?;
 903
 904        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 905                or die "Cannot write branch $branch for update: $!\n";
 906
 907        if ($revision_map) {
 908                print $revision_map "@$_ $cid\n" for @commit_revisions;
 909        }
 910        @commit_revisions = ();
 911
 912        if ($tag) {
 913                my ($xtag) = $tag;
 914                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 915                $xtag =~ tr/_/\./ if ( $opt_u );
 916                $xtag =~ s/[\/]/$opt_s/g;
 917
 918                # See refs.c for these rules.
 919                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 920                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 921                # Other bad strings for tags:
 922                # (See check_refname_component in refs.c.)
 923                1 while $xtag =~ s/
 924                        (?: \.\.        # Tag cannot contain '..'.
 925                        |   \@\{        # Tag cannot contain '@{'.
 926                        | ^ -           # Tag cannot begin with '-'.
 927                        |   \.lock $    # Tag cannot end with '.lock'.
 928                        | ^ \.          # Tag cannot begin...
 929                        |   \. $        # ...or end with '.'
 930                        )//xg;
 931                # Tag cannot be empty.
 932                if ($xtag eq '') {
 933                        warn("warning: ignoring tag '$tag'",
 934                        " with invalid tagname\n");
 935                        return;
 936                }
 937
 938                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 939                        # We did our best to sanitize the tag, but still failed
 940                        # for whatever reason. Bail out, and give the user
 941                        # enough information to understand if/how we should
 942                        # improve the translation in the future.
 943                        if ($tag ne $xtag) {
 944                                print "Translated '$tag' tag to '$xtag'\n";
 945                        }
 946                        die "Cannot create tag $xtag: $!\n";
 947                }
 948
 949                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 950        }
 951};
 952
 953my $commitcount = 1;
 954while (<CVS>) {
 955        chomp;
 956        if ($state == 0 and /^-+$/) {
 957                $state = 1;
 958        } elsif ($state == 0) {
 959                $state = 1;
 960                redo;
 961        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 962                $patchset = 0+$_;
 963                $state=2;
 964        } elsif ($state == 2 and s/^Date:\s+//) {
 965                $date = pdate($_);
 966                unless ($date) {
 967                        print STDERR "Could not parse date: $_\n";
 968                        $state=0;
 969                        next;
 970                }
 971                $state=3;
 972        } elsif ($state == 3 and s/^Author:\s+//) {
 973                $author_tz = "UTC";
 974                s/\s+$//;
 975                if (/^(.*?)\s+<(.*)>/) {
 976                    ($author_name, $author_email) = ($1, $2);
 977                } elsif ($conv_author_name{$_}) {
 978                        $author_name = $conv_author_name{$_};
 979                        $author_email = $conv_author_email{$_};
 980                        $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
 981                } else {
 982                    $author_name = $author_email = $_;
 983                }
 984                $state = 4;
 985        } elsif ($state == 4 and s/^Branch:\s+//) {
 986                s/\s+$//;
 987                tr/_/\./ if ( $opt_u );
 988                s/[\/]/$opt_s/g;
 989                $branch = $_;
 990                $state = 5;
 991        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 992                s/\s+$//;
 993                $ancestor = $_;
 994                $ancestor = $opt_o if $ancestor eq "HEAD";
 995                $state = 6;
 996        } elsif ($state == 5) {
 997                $ancestor = undef;
 998                $state = 6;
 999                redo;
1000        } elsif ($state == 6 and s/^Tag:\s+//) {
1001                s/\s+$//;
1002                if ($_ eq "(none)") {
1003                        $tag = undef;
1004                } else {
1005                        $tag = $_;
1006                }
1007                $state = 7;
1008        } elsif ($state == 7 and /^Log:/) {
1009                $logmsg = "";
1010                $state = 8;
1011        } elsif ($state == 8 and /^Members:/) {
1012                $branch = $opt_o if $branch eq "HEAD";
1013                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1014                        # skip
1015                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1016                        $state = 11;
1017                        next;
1018                }
1019                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1020                        # skip if the commit is too recent
1021                        # given that the cvsps default fuzz is 300s, we give ourselves another
1022                        # 300s just in case -- this also prevents skipping commits
1023                        # due to server clock drift
1024                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
1025                        $state = 11;
1026                        next;
1027                }
1028                if (exists $ignorebranch{$branch}) {
1029                        print STDERR "Skipping $branch\n";
1030                        $state = 11;
1031                        next;
1032                }
1033                if ($ancestor) {
1034                        if ($ancestor eq $branch) {
1035                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1036                                $ancestor = $opt_o;
1037                        }
1038                        if (defined get_headref("$remote/$branch")) {
1039                                print STDERR "Branch $branch already exists!\n";
1040                                $state=11;
1041                                next;
1042                        }
1043                        my $id = get_headref("$remote/$ancestor");
1044                        if (!$id) {
1045                                print STDERR "Branch $ancestor does not exist!\n";
1046                                $ignorebranch{$branch} = 1;
1047                                $state=11;
1048                                next;
1049                        }
1050
1051                        system(qw(git update-ref -m cvsimport),
1052                                "$remote/$branch", $id);
1053                        if($? != 0) {
1054                                print STDERR "Could not create branch $branch\n";
1055                                $ignorebranch{$branch} = 1;
1056                                $state=11;
1057                                next;
1058                        }
1059                }
1060                $last_branch = $branch if $branch ne $last_branch;
1061                $state = 9;
1062        } elsif ($state == 8) {
1063                $logmsg .= "$_\n";
1064        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1065#       VERSION:1.96->1.96.2.1
1066                my $init = ($2 eq "INITIAL");
1067                my $fn = $1;
1068                my $rev = $3;
1069                $fn =~ s#^/+##;
1070                if ($opt_S && $fn =~ m/$opt_S/) {
1071                    print "SKIPPING $fn v $rev\n";
1072                    push(@skipped, $fn);
1073                    next;
1074                }
1075                push @commit_revisions, [$fn, $rev];
1076                print "Fetching $fn   v $rev\n" if $opt_v;
1077                my ($tmpname, $size) = $cvs->file($fn,$rev);
1078                if ($size == -1) {
1079                        push(@old,$fn);
1080                        print "Drop $fn\n" if $opt_v;
1081                } else {
1082                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1083                        my $pid = open(my $F, '-|');
1084                        die $! unless defined $pid;
1085                        if (!$pid) {
1086                            exec("git", "hash-object", "-w", $tmpname)
1087                                or die "Cannot create object: $!\n";
1088                        }
1089                        my $sha = <$F>;
1090                        chomp $sha;
1091                        close $F;
1092                        my $mode = pmode($cvs->{'mode'});
1093                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1094                }
1095                unlink($tmpname);
1096        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1097                my $fn = $1;
1098                my $rev = $2;
1099                $fn =~ s#^/+##;
1100                push @commit_revisions, [$fn, $rev];
1101                push(@old,$fn);
1102                print "Delete $fn\n" if $opt_v;
1103        } elsif ($state == 9 and /^\s*$/) {
1104                $state = 10;
1105        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1106                $commitcount++;
1107                if ($opt_L && $commitcount > $opt_L) {
1108                        last;
1109                }
1110                commit();
1111                if (($commitcount & 1023) == 0) {
1112                        system(qw(git repack -a -d));
1113                }
1114                $state = 1;
1115        } elsif ($state == 11 and /^-+$/) {
1116                $state = 1;
1117        } elsif (/^-+$/) { # end of unknown-line processing
1118                $state = 1;
1119        } elsif ($state != 11) { # ignore stuff when skipping
1120                print STDERR "* UNKNOWN LINE * $_\n";
1121        }
1122}
1123commit() if $branch and $state != 11;
1124
1125unless ($opt_P) {
1126        unlink($cvspsfile);
1127}
1128
1129# The heuristic of repacking every 1024 commits can leave a
1130# lot of unpacked data.  If there is more than 1MB worth of
1131# not-packed objects, repack once more.
1132my $line = `git count-objects`;
1133if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1134  my ($n_objects, $kb) = ($1, $2);
1135  1024 < $kb
1136    and system(qw(git repack -a -d));
1137}
1138
1139foreach my $git_index (values %index) {
1140    if ($git_index ne "$git_dir/index") {
1141        unlink($git_index);
1142    }
1143}
1144
1145if (defined $orig_git_index) {
1146        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1147} else {
1148        delete $ENV{GIT_INDEX_FILE};
1149}
1150
1151# Now switch back to the branch we were in before all of this happened
1152if ($orig_branch) {
1153        print "DONE.\n" if $opt_v;
1154        if ($opt_i) {
1155                exit 0;
1156        }
1157        my $tip_at_end = `git rev-parse --verify HEAD`;
1158        if ($tip_at_start ne $tip_at_end) {
1159                for ($tip_at_start, $tip_at_end) { chomp; }
1160                print "Fetched into the current branch.\n" if $opt_v;
1161                system(qw(git read-tree -u -m),
1162                       $tip_at_start, $tip_at_end);
1163                die "Fast-forward update failed: $?\n" if $?;
1164        }
1165        else {
1166                system(qw(git merge -m cvsimport), "$remote/$opt_o");
1167                die "Could not merge $opt_o into the current branch.\n" if $?;
1168        }
1169} else {
1170        $orig_branch = "master";
1171        print "DONE; creating $orig_branch branch\n" if $opt_v;
1172        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1173                unless defined get_headref('refs/heads/master');
1174        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1175                if ($opt_r && $opt_o ne 'HEAD');
1176        system('git', 'update-ref', 'HEAD', "$orig_branch");
1177        unless ($opt_i) {
1178                system(qw(git checkout -f));
1179                die "checkout failed: $?\n" if $?;
1180        }
1181}