git-cvsimport.perlon commit mailmap: refactor mailmap parsing for non-file sources (7c8ce30)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime tzset dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31set_timezone('UTC');
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email, %conv_author_tz);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # or with an optional timezone:
  63                #   spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
  64                elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
  65                        $user = $1;
  66                        $conv_author_name{$user} = $2;
  67                        $conv_author_email{$user} = $3;
  68                        $conv_author_tz{$user} = $4;
  69                }
  70                # However, we also read from CVSROOT/users format
  71                # to ease migration.
  72                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  73                        my $mapped;
  74                        ($user, $mapped) = ($1, $3);
  75                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  76                                $conv_author_name{$user} = $1;
  77                                $conv_author_email{$user} = $2;
  78                        }
  79                        elsif ($mapped =~ /^<?(.*)>?$/) {
  80                                $conv_author_name{$user} = $user;
  81                                $conv_author_email{$user} = $1;
  82                        }
  83                }
  84                # NEEDSWORK: Maybe warn on unrecognized lines?
  85        }
  86        close ($f);
  87}
  88
  89sub write_author_info($) {
  90        my ($file) = @_;
  91        open my $f, '>', $file or
  92          die("Failed to open $file for writing: $!");
  93
  94        foreach (keys %conv_author_name) {
  95                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
  96                print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
  97                print $f "\n";
  98        }
  99        close ($f);
 100}
 101
 102# Versions of perl before 5.10.0 may not automatically check $TZ each
 103# time localtime is run (most platforms will do so only the first time).
 104# We can work around this by using tzset() to update the internal
 105# variable whenever we change the environment.
 106sub set_timezone {
 107        $ENV{TZ} = shift;
 108        tzset();
 109}
 110
 111# convert getopts specs for use by git config
 112my %longmap = (
 113        'A:' => 'authors-file',
 114        'M:' => 'merge-regex',
 115        'P:' => undef,
 116        'R' => 'track-revisions',
 117        'S:' => 'ignore-paths',
 118);
 119
 120sub read_repo_config {
 121        # Split the string between characters, unless there is a ':'
 122        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 123        my @opts = split(/ *(?!:)/, shift);
 124        foreach my $o (@opts) {
 125                my $key = $o;
 126                $key =~ s/://g;
 127                my $arg = 'git config';
 128                $arg .= ' --bool' if ($o !~ /:$/);
 129                my $ckey = $key;
 130
 131                if (exists $longmap{$o}) {
 132                        # An uppercase option like -R cannot be
 133                        # expressed in the configuration, as the
 134                        # variable names are downcased.
 135                        $ckey = $longmap{$o};
 136                        next if (! defined $ckey);
 137                        $ckey =~ s/-//g;
 138                }
 139                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 140                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 141                        no strict 'refs';
 142                        my $opt_name = "opt_" . $key;
 143                        if (!$$opt_name) {
 144                                $$opt_name = $tmp;
 145                        }
 146                }
 147        }
 148}
 149
 150my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 151read_repo_config($opts);
 152Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 153
 154# turn the Getopt::Std specification in a Getopt::Long one,
 155# with support for multiple -M options
 156GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 157    or usage();
 158usage if $opt_h;
 159
 160if (@ARGV == 0) {
 161                chomp(my $module = `git config --get cvsimport.module`);
 162                push(@ARGV, $module) if $? == 0;
 163}
 164@ARGV <= 1 or usage("You can't specify more than one CVS module");
 165
 166if ($opt_d) {
 167        $ENV{"CVSROOT"} = $opt_d;
 168} elsif (-f 'CVS/Root') {
 169        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 170        $opt_d = <$f>;
 171        chomp $opt_d;
 172        close $f;
 173        $ENV{"CVSROOT"} = $opt_d;
 174} elsif ($ENV{"CVSROOT"}) {
 175        $opt_d = $ENV{"CVSROOT"};
 176} else {
 177        usage("CVSROOT needs to be set");
 178}
 179$opt_s ||= "-";
 180$opt_a ||= 0;
 181
 182my $git_tree = $opt_C;
 183$git_tree ||= ".";
 184
 185my $remote;
 186if (defined $opt_r) {
 187        $remote = 'refs/remotes/' . $opt_r;
 188        $opt_o ||= "master";
 189} else {
 190        $opt_o ||= "origin";
 191        $remote = 'refs/heads';
 192}
 193
 194my $cvs_tree;
 195if ($#ARGV == 0) {
 196        $cvs_tree = $ARGV[0];
 197} elsif (-f 'CVS/Repository') {
 198        open my $f, '<', 'CVS/Repository' or
 199            die 'Failed to open CVS/Repository';
 200        $cvs_tree = <$f>;
 201        chomp $cvs_tree;
 202        close $f;
 203} else {
 204        usage("CVS module has to be specified");
 205}
 206
 207our @mergerx = ();
 208if ($opt_m) {
 209        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 210}
 211if (@opt_M) {
 212        push (@mergerx, map { qr/$_/ } @opt_M);
 213}
 214
 215# Remember UTC of our starting time
 216# we'll want to avoid importing commits
 217# that are too recent
 218our $starttime = time();
 219
 220select(STDERR); $|=1; select(STDOUT);
 221
 222
 223package CVSconn;
 224# Basic CVS dialog.
 225# We're only interested in connecting and downloading, so ...
 226
 227use File::Spec;
 228use File::Temp qw(tempfile);
 229use POSIX qw(strftime dup2);
 230
 231sub new {
 232        my ($what,$repo,$subdir) = @_;
 233        $what=ref($what) if ref($what);
 234
 235        my $self = {};
 236        $self->{'buffer'} = "";
 237        bless($self,$what);
 238
 239        $repo =~ s#/+$##;
 240        $self->{'fullrep'} = $repo;
 241        $self->conn();
 242
 243        $self->{'subdir'} = $subdir;
 244        $self->{'lines'} = undef;
 245
 246        return $self;
 247}
 248
 249sub find_password_entry {
 250        my ($cvspass, @cvsroot) = @_;
 251        my ($file, $delim) = @$cvspass;
 252        my $pass;
 253        local ($_);
 254
 255        if (open(my $fh, $file)) {
 256                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 257                CVSPASSFILE:
 258                while (<$fh>) {
 259                        chomp;
 260                        s/^\/\d+\s+//;
 261                        my ($w, $p) = split($delim,$_,2);
 262                        for my $cvsroot (@cvsroot) {
 263                                if ($w eq $cvsroot) {
 264                                        $pass = $p;
 265                                        last CVSPASSFILE;
 266                                }
 267                        }
 268                }
 269                close($fh);
 270        }
 271        return $pass;
 272}
 273
 274sub conn {
 275        my $self = shift;
 276        my $repo = $self->{'fullrep'};
 277        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 278                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 279
 280                my ($proxyhost,$proxyport);
 281                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 282                        $proxyhost = $1;
 283                        # Default proxyport, if not specified, is 8080.
 284                        $proxyport = 8080;
 285                        if ($ENV{"CVS_PROXY_PORT"}) {
 286                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 287                        }
 288                        if ($param =~ m/proxyport=([^;]+)/) {
 289                                $proxyport = $1;
 290                        }
 291                }
 292                $repo ||= '/';
 293
 294                # if username is not explicit in CVSROOT, then use current user, as cvs would
 295                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 296                my $rr2 = "-";
 297                unless ($port) {
 298                        $rr2 = ":pserver:$user\@$serv:$repo";
 299                        $port=2401;
 300                }
 301                my $rr = ":pserver:$user\@$serv:$port$repo";
 302
 303                if ($pass) {
 304                        $pass = $self->_scramble($pass);
 305                } else {
 306                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 307                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 308                        my @loc = ();
 309                        foreach my $cvspass (@cvspass) {
 310                                my $p = find_password_entry($cvspass, $rr, $rr2);
 311                                if ($p) {
 312                                        push @loc, $cvspass->[0];
 313                                        $pass = $p;
 314                                }
 315                        }
 316
 317                        if (1 < @loc) {
 318                                die("Multiple cvs password files have ".
 319                                    "entries for CVSROOT $opt_d: @loc");
 320                        } elsif (!$pass) {
 321                                $pass = "A";
 322                        }
 323                }
 324
 325                my ($s, $rep);
 326                if ($proxyhost) {
 327
 328                        # Use a HTTP Proxy. Only works for HTTP proxies that
 329                        # don't require user authentication
 330                        #
 331                        # See: http://www.ietf.org/rfc/rfc2817.txt
 332
 333                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 334                        die "Socket to $proxyhost: $!\n" unless defined $s;
 335                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 336                                or die "Write to $proxyhost: $!\n";
 337                        $s->flush();
 338
 339                        $rep = <$s>;
 340
 341                        # The answer should look like 'HTTP/1.x 2yy ....'
 342                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 343                                die "Proxy connect: $rep\n";
 344                        }
 345                        # Skip up to the empty line of the proxy server output
 346                        # including the response headers.
 347                        while ($rep = <$s>) {
 348                                last if (!defined $rep ||
 349                                         $rep eq "\n" ||
 350                                         $rep eq "\r\n");
 351                        }
 352                } else {
 353                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 354                        die "Socket to $serv: $!\n" unless defined $s;
 355                }
 356
 357                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 358                        or die "Write to $serv: $!\n";
 359                $s->flush();
 360
 361                $rep = <$s>;
 362
 363                if ($rep ne "I LOVE YOU\n") {
 364                        $rep="<unknown>" unless $rep;
 365                        die "AuthReply: $rep\n";
 366                }
 367                $self->{'socketo'} = $s;
 368                $self->{'socketi'} = $s;
 369        } else { # local or ext: Fork off our own cvs server.
 370                my $pr = IO::Pipe->new();
 371                my $pw = IO::Pipe->new();
 372                my $pid = fork();
 373                die "Fork: $!\n" unless defined $pid;
 374                my $cvs = 'cvs';
 375                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 376                my $rsh = 'rsh';
 377                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 378
 379                my @cvs = ($cvs, 'server');
 380                my ($local, $user, $host);
 381                $local = $repo =~ s/:local://;
 382                if (!$local) {
 383                    $repo =~ s/:ext://;
 384                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 385                    ($user, $host) = ($1, $2);
 386                }
 387                if (!$local) {
 388                    if ($user) {
 389                        unshift @cvs, $rsh, '-l', $user, $host;
 390                    } else {
 391                        unshift @cvs, $rsh, $host;
 392                    }
 393                }
 394
 395                unless ($pid) {
 396                        $pr->writer();
 397                        $pw->reader();
 398                        dup2($pw->fileno(),0);
 399                        dup2($pr->fileno(),1);
 400                        $pr->close();
 401                        $pw->close();
 402                        exec(@cvs);
 403                }
 404                $pw->writer();
 405                $pr->reader();
 406                $self->{'socketo'} = $pw;
 407                $self->{'socketi'} = $pr;
 408        }
 409        $self->{'socketo'}->write("Root $repo\n");
 410
 411        # Trial and error says that this probably is the minimum set
 412        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 413
 414        $self->{'socketo'}->write("valid-requests\n");
 415        $self->{'socketo'}->flush();
 416
 417        my $rep=$self->readline();
 418        die "Failed to read from server" unless defined $rep;
 419        chomp($rep);
 420        if ($rep !~ s/^Valid-requests\s*//) {
 421                $rep="<unknown>" unless $rep;
 422                die "Expected Valid-requests from server, but got: $rep\n";
 423        }
 424        chomp(my $res=$self->readline());
 425        die "validReply: $res\n" if $res ne "ok";
 426
 427        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 428        $self->{'repo'} = $repo;
 429}
 430
 431sub readline {
 432        my ($self) = @_;
 433        return $self->{'socketi'}->getline();
 434}
 435
 436sub _file {
 437        # Request a file with a given revision.
 438        # Trial and error says this is a good way to do it. :-/
 439        my ($self,$fn,$rev) = @_;
 440        $self->{'socketo'}->write("Argument -N\n") or return undef;
 441        $self->{'socketo'}->write("Argument -P\n") or return undef;
 442        # -kk: Linus' version doesn't use it - defaults to off
 443        if ($opt_k) {
 444            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 445        }
 446        $self->{'socketo'}->write("Argument -r\n") or return undef;
 447        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 448        $self->{'socketo'}->write("Argument --\n") or return undef;
 449        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 450        $self->{'socketo'}->write("Directory .\n") or return undef;
 451        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 452        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 453        $self->{'socketo'}->write("co\n") or return undef;
 454        $self->{'socketo'}->flush() or return undef;
 455        $self->{'lines'} = 0;
 456        return 1;
 457}
 458sub _line {
 459        # Read a line from the server.
 460        # ... except that 'line' may be an entire file. ;-)
 461        my ($self, $fh) = @_;
 462        die "Not in lines" unless defined $self->{'lines'};
 463
 464        my $line;
 465        my $res=0;
 466        while (defined($line = $self->readline())) {
 467                # M U gnupg-cvs-rep/AUTHORS
 468                # Updated gnupg-cvs-rep/
 469                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 470                # /AUTHORS/1.1///T1.1
 471                # u=rw,g=rw,o=rw
 472                # 0
 473                # ok
 474
 475                if ($line =~ s/^(?:Created|Updated) //) {
 476                        $line = $self->readline(); # path
 477                        $line = $self->readline(); # Entries line
 478                        my $mode = $self->readline(); chomp $mode;
 479                        $self->{'mode'} = $mode;
 480                        defined (my $cnt = $self->readline())
 481                                or die "EOF from server after 'Changed'\n";
 482                        chomp $cnt;
 483                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 484                        $line="";
 485                        $res = $self->_fetchfile($fh, $cnt);
 486                } elsif ($line =~ s/^ //) {
 487                        print $fh $line;
 488                        $res += length($line);
 489                } elsif ($line =~ /^M\b/) {
 490                        # output, do nothing
 491                } elsif ($line =~ /^Mbinary\b/) {
 492                        my $cnt;
 493                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 494                        chomp $cnt;
 495                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 496                        $line="";
 497                        $res += $self->_fetchfile($fh, $cnt);
 498                } else {
 499                        chomp $line;
 500                        if ($line eq "ok") {
 501                                # print STDERR "S: ok (".length($res).")\n";
 502                                return $res;
 503                        } elsif ($line =~ s/^E //) {
 504                                # print STDERR "S: $line\n";
 505                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 506                                $line = $self->readline(); # filename
 507                                $line = $self->readline(); # OK
 508                                chomp $line;
 509                                die "Unknown: $line" if $line ne "ok";
 510                                return -1;
 511                        } else {
 512                                die "Unknown: $line\n";
 513                        }
 514                }
 515        }
 516        return undef;
 517}
 518sub file {
 519        my ($self,$fn,$rev) = @_;
 520        my $res;
 521
 522        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 523                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 524
 525        $self->_file($fn,$rev) and $res = $self->_line($fh);
 526
 527        if (!defined $res) {
 528            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 529            truncate $fh, 0;
 530            $self->conn();
 531            $self->_file($fn,$rev) or die "No file command send";
 532            $res = $self->_line($fh);
 533            die "Retry failed" unless defined $res;
 534        }
 535        close ($fh);
 536
 537        return ($name, $res);
 538}
 539sub _fetchfile {
 540        my ($self, $fh, $cnt) = @_;
 541        my $res = 0;
 542        my $bufsize = 1024 * 1024;
 543        while ($cnt) {
 544            if ($bufsize > $cnt) {
 545                $bufsize = $cnt;
 546            }
 547            my $buf;
 548            my $num = $self->{'socketi'}->read($buf,$bufsize);
 549            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 550            print $fh $buf;
 551            $res += $num;
 552            $cnt -= $num;
 553        }
 554        return $res;
 555}
 556
 557sub _scramble {
 558        my ($self, $pass) = @_;
 559        my $scrambled = "A";
 560
 561        return $scrambled unless $pass;
 562
 563        my $pass_len = length($pass);
 564        my @pass_arr = split("", $pass);
 565        my $i;
 566
 567        # from cvs/src/scramble.c
 568        my @shifts = (
 569                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 570                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 571                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 572                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 573                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 574                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 575                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 576                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 577                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 578                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 579                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 580                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 581                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 582                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 583                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 584                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 585        );
 586
 587        for ($i = 0; $i < $pass_len; $i++) {
 588                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 589        }
 590
 591        return $scrambled;
 592}
 593
 594package main;
 595
 596my $cvs = CVSconn->new($opt_d, $cvs_tree);
 597
 598
 599sub pdate($) {
 600        my ($d) = @_;
 601        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 602                or die "Unparseable date: $d\n";
 603        my $y=$1; $y-=1900 if $y>1900;
 604        return timegm($6||0,$5,$4,$3,$2-1,$y);
 605}
 606
 607sub pmode($) {
 608        my ($mode) = @_;
 609        my $m = 0;
 610        my $mm = 0;
 611        my $um = 0;
 612        for my $x(split(//,$mode)) {
 613                if ($x eq ",") {
 614                        $m |= $mm&$um;
 615                        $mm = 0;
 616                        $um = 0;
 617                } elsif ($x eq "u") { $um |= 0700;
 618                } elsif ($x eq "g") { $um |= 0070;
 619                } elsif ($x eq "o") { $um |= 0007;
 620                } elsif ($x eq "r") { $mm |= 0444;
 621                } elsif ($x eq "w") { $mm |= 0222;
 622                } elsif ($x eq "x") { $mm |= 0111;
 623                } elsif ($x eq "=") { # do nothing
 624                } else { die "Unknown mode: $mode\n";
 625                }
 626        }
 627        $m |= $mm&$um;
 628        return $m;
 629}
 630
 631sub getwd() {
 632        my $pwd = `pwd`;
 633        chomp $pwd;
 634        return $pwd;
 635}
 636
 637sub is_sha1 {
 638        my $s = shift;
 639        return $s =~ /^[a-f0-9]{40}$/;
 640}
 641
 642sub get_headref ($) {
 643        my $name = shift;
 644        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 645        return undef unless $? == 0;
 646        chomp $r;
 647        return $r;
 648}
 649
 650my $user_filename_prepend = '';
 651sub munge_user_filename {
 652        my $name = shift;
 653        return File::Spec->file_name_is_absolute($name) ?
 654                $name :
 655                $user_filename_prepend . $name;
 656}
 657
 658-d $git_tree
 659        or mkdir($git_tree,0777)
 660        or die "Could not create $git_tree: $!";
 661if ($git_tree ne '.') {
 662        $user_filename_prepend = getwd() . '/';
 663        chdir($git_tree);
 664}
 665
 666my $last_branch = "";
 667my $orig_branch = "";
 668my %branch_date;
 669my $tip_at_start = undef;
 670
 671my $git_dir = $ENV{"GIT_DIR"} || ".git";
 672$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 673$ENV{"GIT_DIR"} = $git_dir;
 674my $orig_git_index;
 675$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 676
 677my %index; # holds filenames of one index per branch
 678
 679unless (-d $git_dir) {
 680        system(qw(git init));
 681        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 682        system(qw(git read-tree --empty));
 683        die "Cannot init an empty tree: $?\n" if $?;
 684
 685        $last_branch = $opt_o;
 686        $orig_branch = "";
 687} else {
 688        open(F, "-|", qw(git symbolic-ref HEAD)) or
 689                die "Cannot run git symbolic-ref: $!\n";
 690        chomp ($last_branch = <F>);
 691        $last_branch = basename($last_branch);
 692        close(F);
 693        unless ($last_branch) {
 694                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 695                $last_branch = "master";
 696        }
 697        $orig_branch = $last_branch;
 698        $tip_at_start = `git rev-parse --verify HEAD`;
 699
 700        # Get the last import timestamps
 701        my $fmt = '($ref, $author) = (%(refname), %(author));';
 702        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 703        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 704        while (defined(my $entry = <H>)) {
 705                my ($ref, $author);
 706                eval($entry) || die "cannot eval refs list: $@";
 707                my ($head) = ($ref =~ m|^$remote/(.*)|);
 708                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 709                $branch_date{$head} = $1;
 710        }
 711        close(H);
 712        if (!exists $branch_date{$opt_o}) {
 713                die "Branch '$opt_o' does not exist.\n".
 714                       "Either use the correct '-o branch' option,\n".
 715                       "or import to a new repository.\n";
 716        }
 717}
 718
 719-d $git_dir
 720        or die "Could not create git subdir ($git_dir).\n";
 721
 722# now we read (and possibly save) author-info as well
 723-f "$git_dir/cvs-authors" and
 724  read_author_info("$git_dir/cvs-authors");
 725if ($opt_A) {
 726        read_author_info(munge_user_filename($opt_A));
 727        write_author_info("$git_dir/cvs-authors");
 728}
 729
 730# open .git/cvs-revisions, if requested
 731open my $revision_map, '>>', "$git_dir/cvs-revisions"
 732    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 733        if defined $opt_R;
 734
 735
 736#
 737# run cvsps into a file unless we are getting
 738# it passed as a file via $opt_P
 739#
 740my $cvspsfile;
 741unless ($opt_P) {
 742        print "Running cvsps...\n" if $opt_v;
 743        my $pid = open(CVSPS,"-|");
 744        my $cvspsfh;
 745        die "Cannot fork: $!\n" unless defined $pid;
 746        unless ($pid) {
 747                my @opt;
 748                @opt = split(/,/,$opt_p) if defined $opt_p;
 749                unshift @opt, '-z', $opt_z if defined $opt_z;
 750                unshift @opt, '-q'         unless defined $opt_v;
 751                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 752                        push @opt, '--cvs-direct';
 753                }
 754                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 755                die "Could not start cvsps: $!\n";
 756        }
 757        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 758                                          DIR => File::Spec->tmpdir());
 759        while (<CVSPS>) {
 760            print $cvspsfh $_;
 761        }
 762        close CVSPS;
 763        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 764        close $cvspsfh;
 765} else {
 766        $cvspsfile = munge_user_filename($opt_P);
 767}
 768
 769open(CVS, "<$cvspsfile") or die $!;
 770
 771## cvsps output:
 772#---------------------
 773#PatchSet 314
 774#Date: 1999/09/18 13:03:59
 775#Author: wkoch
 776#Branch: STABLE-BRANCH-1-0
 777#Ancestor branch: HEAD
 778#Tag: (none)
 779#Log:
 780#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 781#Members:
 782#       README:1.57->1.57.2.1
 783#       VERSION:1.96->1.96.2.1
 784#
 785#---------------------
 786
 787my $state = 0;
 788
 789sub update_index (\@\@) {
 790        my $old = shift;
 791        my $new = shift;
 792        open(my $fh, '|-', qw(git update-index -z --index-info))
 793                or die "unable to open git update-index: $!";
 794        print $fh
 795                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 796                        @$old),
 797                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 798                        @$new)
 799                or die "unable to write to git update-index: $!";
 800        close $fh
 801                or die "unable to write to git update-index: $!";
 802        $? and die "git update-index reported error: $?";
 803}
 804
 805sub write_tree () {
 806        open(my $fh, '-|', qw(git write-tree))
 807                or die "unable to open git write-tree: $!";
 808        chomp(my $tree = <$fh>);
 809        is_sha1($tree)
 810                or die "Cannot get tree id ($tree): $!";
 811        close($fh)
 812                or die "Error running git write-tree: $?\n";
 813        print "Tree ID $tree\n" if $opt_v;
 814        return $tree;
 815}
 816
 817my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
 818my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 819
 820# commits that cvsps cannot place anywhere...
 821$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 822
 823sub commit {
 824        if ($branch eq $opt_o && !$index{branch} &&
 825                !get_headref("$remote/$branch")) {
 826            # looks like an initial commit
 827            # use the index primed by git init
 828            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 829            $index{$branch} = "$git_dir/index";
 830        } else {
 831            # use an index per branch to speed up
 832            # imports of projects with many branches
 833            unless ($index{$branch}) {
 834                $index{$branch} = tmpnam();
 835                $ENV{GIT_INDEX_FILE} = $index{$branch};
 836                if ($ancestor) {
 837                    system("git", "read-tree", "$remote/$ancestor");
 838                } else {
 839                    system("git", "read-tree", "$remote/$branch");
 840                }
 841                die "read-tree failed: $?\n" if $?;
 842            }
 843        }
 844        $ENV{GIT_INDEX_FILE} = $index{$branch};
 845
 846        update_index(@old, @new);
 847        @old = @new = ();
 848        my $tree = write_tree();
 849        my $parent = get_headref("$remote/$last_branch");
 850        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 851
 852        my @commit_args;
 853        push @commit_args, ("-p", $parent) if $parent;
 854
 855        # loose detection of merges
 856        # based on the commit msg
 857        foreach my $rx (@mergerx) {
 858                next unless $logmsg =~ $rx && $1;
 859                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 860                if (my $sha1 = get_headref("$remote/$mparent")) {
 861                        push @commit_args, '-p', "$remote/$mparent";
 862                        print "Merge parent branch: $mparent\n" if $opt_v;
 863                }
 864        }
 865
 866        set_timezone($author_tz);
 867        my $commit_date = strftime("%s %z", localtime($date));
 868        set_timezone('UTC');
 869        $ENV{GIT_AUTHOR_NAME} = $author_name;
 870        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 871        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 872        $ENV{GIT_COMMITTER_NAME} = $author_name;
 873        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 874        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 875        my $pid = open2(my $commit_read, my $commit_write,
 876                'git', 'commit-tree', $tree, @commit_args);
 877
 878        # compatibility with git2cvs
 879        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 880        $logmsg =~ s/[\s\n]+\z//;
 881
 882        if (@skipped) {
 883            $logmsg .= "\n\n\nSKIPPED:\n\t";
 884            $logmsg .= join("\n\t", @skipped) . "\n";
 885            @skipped = ();
 886        }
 887
 888        print($commit_write "$logmsg\n") && close($commit_write)
 889                or die "Error writing to git commit-tree: $!\n";
 890
 891        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 892        chomp(my $cid = <$commit_read>);
 893        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 894        print "Commit ID $cid\n" if $opt_v;
 895        close($commit_read);
 896
 897        waitpid($pid,0);
 898        die "Error running git commit-tree: $?\n" if $?;
 899
 900        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 901                or die "Cannot write branch $branch for update: $!\n";
 902
 903        if ($revision_map) {
 904                print $revision_map "@$_ $cid\n" for @commit_revisions;
 905        }
 906        @commit_revisions = ();
 907
 908        if ($tag) {
 909                my ($xtag) = $tag;
 910                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 911                $xtag =~ tr/_/\./ if ( $opt_u );
 912                $xtag =~ s/[\/]/$opt_s/g;
 913
 914                # See refs.c for these rules.
 915                # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
 916                $xtag =~ s/[ ~\^:\\\*\?\[]//g;
 917                # Other bad strings for tags:
 918                # (See check_refname_component in refs.c.)
 919                1 while $xtag =~ s/
 920                        (?: \.\.        # Tag cannot contain '..'.
 921                        |   \@{         # Tag cannot contain '@{'.
 922                        | ^ -           # Tag cannot begin with '-'.
 923                        |   \.lock $    # Tag cannot end with '.lock'.
 924                        | ^ \.          # Tag cannot begin...
 925                        |   \. $        # ...or end with '.'
 926                        )//xg;
 927                # Tag cannot be empty.
 928                if ($xtag eq '') {
 929                        warn("warning: ignoring tag '$tag'",
 930                        " with invalid tagname\n");
 931                        return;
 932                }
 933
 934                if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
 935                        # We did our best to sanitize the tag, but still failed
 936                        # for whatever reason. Bail out, and give the user
 937                        # enough information to understand if/how we should
 938                        # improve the translation in the future.
 939                        if ($tag ne $xtag) {
 940                                print "Translated '$tag' tag to '$xtag'\n";
 941                        }
 942                        die "Cannot create tag $xtag: $!\n";
 943                }
 944
 945                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 946        }
 947};
 948
 949my $commitcount = 1;
 950while (<CVS>) {
 951        chomp;
 952        if ($state == 0 and /^-+$/) {
 953                $state = 1;
 954        } elsif ($state == 0) {
 955                $state = 1;
 956                redo;
 957        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 958                $patchset = 0+$_;
 959                $state=2;
 960        } elsif ($state == 2 and s/^Date:\s+//) {
 961                $date = pdate($_);
 962                unless ($date) {
 963                        print STDERR "Could not parse date: $_\n";
 964                        $state=0;
 965                        next;
 966                }
 967                $state=3;
 968        } elsif ($state == 3 and s/^Author:\s+//) {
 969                $author_tz = "UTC";
 970                s/\s+$//;
 971                if (/^(.*?)\s+<(.*)>/) {
 972                    ($author_name, $author_email) = ($1, $2);
 973                } elsif ($conv_author_name{$_}) {
 974                        $author_name = $conv_author_name{$_};
 975                        $author_email = $conv_author_email{$_};
 976                        $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
 977                } else {
 978                    $author_name = $author_email = $_;
 979                }
 980                $state = 4;
 981        } elsif ($state == 4 and s/^Branch:\s+//) {
 982                s/\s+$//;
 983                tr/_/\./ if ( $opt_u );
 984                s/[\/]/$opt_s/g;
 985                $branch = $_;
 986                $state = 5;
 987        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 988                s/\s+$//;
 989                $ancestor = $_;
 990                $ancestor = $opt_o if $ancestor eq "HEAD";
 991                $state = 6;
 992        } elsif ($state == 5) {
 993                $ancestor = undef;
 994                $state = 6;
 995                redo;
 996        } elsif ($state == 6 and s/^Tag:\s+//) {
 997                s/\s+$//;
 998                if ($_ eq "(none)") {
 999                        $tag = undef;
1000                } else {
1001                        $tag = $_;
1002                }
1003                $state = 7;
1004        } elsif ($state == 7 and /^Log:/) {
1005                $logmsg = "";
1006                $state = 8;
1007        } elsif ($state == 8 and /^Members:/) {
1008                $branch = $opt_o if $branch eq "HEAD";
1009                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1010                        # skip
1011                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1012                        $state = 11;
1013                        next;
1014                }
1015                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1016                        # skip if the commit is too recent
1017                        # given that the cvsps default fuzz is 300s, we give ourselves another
1018                        # 300s just in case -- this also prevents skipping commits
1019                        # due to server clock drift
1020                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
1021                        $state = 11;
1022                        next;
1023                }
1024                if (exists $ignorebranch{$branch}) {
1025                        print STDERR "Skipping $branch\n";
1026                        $state = 11;
1027                        next;
1028                }
1029                if ($ancestor) {
1030                        if ($ancestor eq $branch) {
1031                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1032                                $ancestor = $opt_o;
1033                        }
1034                        if (defined get_headref("$remote/$branch")) {
1035                                print STDERR "Branch $branch already exists!\n";
1036                                $state=11;
1037                                next;
1038                        }
1039                        my $id = get_headref("$remote/$ancestor");
1040                        if (!$id) {
1041                                print STDERR "Branch $ancestor does not exist!\n";
1042                                $ignorebranch{$branch} = 1;
1043                                $state=11;
1044                                next;
1045                        }
1046
1047                        system(qw(git update-ref -m cvsimport),
1048                                "$remote/$branch", $id);
1049                        if($? != 0) {
1050                                print STDERR "Could not create branch $branch\n";
1051                                $ignorebranch{$branch} = 1;
1052                                $state=11;
1053                                next;
1054                        }
1055                }
1056                $last_branch = $branch if $branch ne $last_branch;
1057                $state = 9;
1058        } elsif ($state == 8) {
1059                $logmsg .= "$_\n";
1060        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1061#       VERSION:1.96->1.96.2.1
1062                my $init = ($2 eq "INITIAL");
1063                my $fn = $1;
1064                my $rev = $3;
1065                $fn =~ s#^/+##;
1066                if ($opt_S && $fn =~ m/$opt_S/) {
1067                    print "SKIPPING $fn v $rev\n";
1068                    push(@skipped, $fn);
1069                    next;
1070                }
1071                push @commit_revisions, [$fn, $rev];
1072                print "Fetching $fn   v $rev\n" if $opt_v;
1073                my ($tmpname, $size) = $cvs->file($fn,$rev);
1074                if ($size == -1) {
1075                        push(@old,$fn);
1076                        print "Drop $fn\n" if $opt_v;
1077                } else {
1078                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1079                        my $pid = open(my $F, '-|');
1080                        die $! unless defined $pid;
1081                        if (!$pid) {
1082                            exec("git", "hash-object", "-w", $tmpname)
1083                                or die "Cannot create object: $!\n";
1084                        }
1085                        my $sha = <$F>;
1086                        chomp $sha;
1087                        close $F;
1088                        my $mode = pmode($cvs->{'mode'});
1089                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1090                }
1091                unlink($tmpname);
1092        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1093                my $fn = $1;
1094                my $rev = $2;
1095                $fn =~ s#^/+##;
1096                push @commit_revisions, [$fn, $rev];
1097                push(@old,$fn);
1098                print "Delete $fn\n" if $opt_v;
1099        } elsif ($state == 9 and /^\s*$/) {
1100                $state = 10;
1101        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1102                $commitcount++;
1103                if ($opt_L && $commitcount > $opt_L) {
1104                        last;
1105                }
1106                commit();
1107                if (($commitcount & 1023) == 0) {
1108                        system(qw(git repack -a -d));
1109                }
1110                $state = 1;
1111        } elsif ($state == 11 and /^-+$/) {
1112                $state = 1;
1113        } elsif (/^-+$/) { # end of unknown-line processing
1114                $state = 1;
1115        } elsif ($state != 11) { # ignore stuff when skipping
1116                print STDERR "* UNKNOWN LINE * $_\n";
1117        }
1118}
1119commit() if $branch and $state != 11;
1120
1121unless ($opt_P) {
1122        unlink($cvspsfile);
1123}
1124
1125# The heuristic of repacking every 1024 commits can leave a
1126# lot of unpacked data.  If there is more than 1MB worth of
1127# not-packed objects, repack once more.
1128my $line = `git count-objects`;
1129if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1130  my ($n_objects, $kb) = ($1, $2);
1131  1024 < $kb
1132    and system(qw(git repack -a -d));
1133}
1134
1135foreach my $git_index (values %index) {
1136    if ($git_index ne "$git_dir/index") {
1137        unlink($git_index);
1138    }
1139}
1140
1141if (defined $orig_git_index) {
1142        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1143} else {
1144        delete $ENV{GIT_INDEX_FILE};
1145}
1146
1147# Now switch back to the branch we were in before all of this happened
1148if ($orig_branch) {
1149        print "DONE.\n" if $opt_v;
1150        if ($opt_i) {
1151                exit 0;
1152        }
1153        my $tip_at_end = `git rev-parse --verify HEAD`;
1154        if ($tip_at_start ne $tip_at_end) {
1155                for ($tip_at_start, $tip_at_end) { chomp; }
1156                print "Fetched into the current branch.\n" if $opt_v;
1157                system(qw(git read-tree -u -m),
1158                       $tip_at_start, $tip_at_end);
1159                die "Fast-forward update failed: $?\n" if $?;
1160        }
1161        else {
1162                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1163                die "Could not merge $opt_o into the current branch.\n" if $?;
1164        }
1165} else {
1166        $orig_branch = "master";
1167        print "DONE; creating $orig_branch branch\n" if $opt_v;
1168        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1169                unless defined get_headref('refs/heads/master');
1170        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1171                if ($opt_r && $opt_o ne 'HEAD');
1172        system('git', 'update-ref', 'HEAD', "$orig_branch");
1173        unless ($opt_i) {
1174                system(qw(git checkout -f));
1175                die "checkout failed: $?\n" if $?;
1176        }
1177}