git-cvsimport.perlon commit teach --histogram to diff (8c912ee)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # However, we also read from CVSROOT/users format
  63                # to ease migration.
  64                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  65                        my $mapped;
  66                        ($user, $mapped) = ($1, $3);
  67                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  68                                $conv_author_name{$user} = $1;
  69                                $conv_author_email{$user} = $2;
  70                        }
  71                        elsif ($mapped =~ /^<?(.*)>?$/) {
  72                                $conv_author_name{$user} = $user;
  73                                $conv_author_email{$user} = $1;
  74                        }
  75                }
  76                # NEEDSWORK: Maybe warn on unrecognized lines?
  77        }
  78        close ($f);
  79}
  80
  81sub write_author_info($) {
  82        my ($file) = @_;
  83        open my $f, '>', $file or
  84          die("Failed to open $file for writing: $!");
  85
  86        foreach (keys %conv_author_name) {
  87                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  88        }
  89        close ($f);
  90}
  91
  92# convert getopts specs for use by git config
  93my %longmap = (
  94        'A:' => 'authors-file',
  95        'M:' => 'merge-regex',
  96        'P:' => undef,
  97        'R' => 'track-revisions',
  98        'S:' => 'ignore-paths',
  99);
 100
 101sub read_repo_config {
 102        # Split the string between characters, unless there is a ':'
 103        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 104        my @opts = split(/ *(?!:)/, shift);
 105        foreach my $o (@opts) {
 106                my $key = $o;
 107                $key =~ s/://g;
 108                my $arg = 'git config';
 109                $arg .= ' --bool' if ($o !~ /:$/);
 110                my $ckey = $key;
 111
 112                if (exists $longmap{$o}) {
 113                        # An uppercase option like -R cannot be
 114                        # expressed in the configuration, as the
 115                        # variable names are downcased.
 116                        $ckey = $longmap{$o};
 117                        next if (! defined $ckey);
 118                        $ckey =~ s/-//g;
 119                }
 120                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 121                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 122                        no strict 'refs';
 123                        my $opt_name = "opt_" . $key;
 124                        if (!$$opt_name) {
 125                                $$opt_name = $tmp;
 126                        }
 127                }
 128        }
 129}
 130
 131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 132read_repo_config($opts);
 133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 134
 135# turn the Getopt::Std specification in a Getopt::Long one,
 136# with support for multiple -M options
 137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 138    or usage();
 139usage if $opt_h;
 140
 141if (@ARGV == 0) {
 142                chomp(my $module = `git config --get cvsimport.module`);
 143                push(@ARGV, $module) if $? == 0;
 144}
 145@ARGV <= 1 or usage("You can't specify more than one CVS module");
 146
 147if ($opt_d) {
 148        $ENV{"CVSROOT"} = $opt_d;
 149} elsif (-f 'CVS/Root') {
 150        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 151        $opt_d = <$f>;
 152        chomp $opt_d;
 153        close $f;
 154        $ENV{"CVSROOT"} = $opt_d;
 155} elsif ($ENV{"CVSROOT"}) {
 156        $opt_d = $ENV{"CVSROOT"};
 157} else {
 158        usage("CVSROOT needs to be set");
 159}
 160$opt_s ||= "-";
 161$opt_a ||= 0;
 162
 163my $git_tree = $opt_C;
 164$git_tree ||= ".";
 165
 166my $remote;
 167if (defined $opt_r) {
 168        $remote = 'refs/remotes/' . $opt_r;
 169        $opt_o ||= "master";
 170} else {
 171        $opt_o ||= "origin";
 172        $remote = 'refs/heads';
 173}
 174
 175my $cvs_tree;
 176if ($#ARGV == 0) {
 177        $cvs_tree = $ARGV[0];
 178} elsif (-f 'CVS/Repository') {
 179        open my $f, '<', 'CVS/Repository' or
 180            die 'Failed to open CVS/Repository';
 181        $cvs_tree = <$f>;
 182        chomp $cvs_tree;
 183        close $f;
 184} else {
 185        usage("CVS module has to be specified");
 186}
 187
 188our @mergerx = ();
 189if ($opt_m) {
 190        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 191}
 192if (@opt_M) {
 193        push (@mergerx, map { qr/$_/ } @opt_M);
 194}
 195
 196# Remember UTC of our starting time
 197# we'll want to avoid importing commits
 198# that are too recent
 199our $starttime = time();
 200
 201select(STDERR); $|=1; select(STDOUT);
 202
 203
 204package CVSconn;
 205# Basic CVS dialog.
 206# We're only interested in connecting and downloading, so ...
 207
 208use File::Spec;
 209use File::Temp qw(tempfile);
 210use POSIX qw(strftime dup2);
 211
 212sub new {
 213        my ($what,$repo,$subdir) = @_;
 214        $what=ref($what) if ref($what);
 215
 216        my $self = {};
 217        $self->{'buffer'} = "";
 218        bless($self,$what);
 219
 220        $repo =~ s#/+$##;
 221        $self->{'fullrep'} = $repo;
 222        $self->conn();
 223
 224        $self->{'subdir'} = $subdir;
 225        $self->{'lines'} = undef;
 226
 227        return $self;
 228}
 229
 230sub find_password_entry {
 231        my ($cvspass, @cvsroot) = @_;
 232        my ($file, $delim) = @$cvspass;
 233        my $pass;
 234        local ($_);
 235
 236        if (open(my $fh, $file)) {
 237                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 238                CVSPASSFILE:
 239                while (<$fh>) {
 240                        chomp;
 241                        s/^\/\d+\s+//;
 242                        my ($w, $p) = split($delim,$_,2);
 243                        for my $cvsroot (@cvsroot) {
 244                                if ($w eq $cvsroot) {
 245                                        $pass = $p;
 246                                        last CVSPASSFILE;
 247                                }
 248                        }
 249                }
 250                close($fh);
 251        }
 252        return $pass;
 253}
 254
 255sub conn {
 256        my $self = shift;
 257        my $repo = $self->{'fullrep'};
 258        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 259                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 260
 261                my ($proxyhost,$proxyport);
 262                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 263                        $proxyhost = $1;
 264                        # Default proxyport, if not specified, is 8080.
 265                        $proxyport = 8080;
 266                        if ($ENV{"CVS_PROXY_PORT"}) {
 267                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 268                        }
 269                        if ($param =~ m/proxyport=([^;]+)/) {
 270                                $proxyport = $1;
 271                        }
 272                }
 273                $repo ||= '/';
 274
 275                # if username is not explicit in CVSROOT, then use current user, as cvs would
 276                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 277                my $rr2 = "-";
 278                unless ($port) {
 279                        $rr2 = ":pserver:$user\@$serv:$repo";
 280                        $port=2401;
 281                }
 282                my $rr = ":pserver:$user\@$serv:$port$repo";
 283
 284                if ($pass) {
 285                        $pass = $self->_scramble($pass);
 286                } else {
 287                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 288                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 289                        my @loc = ();
 290                        foreach my $cvspass (@cvspass) {
 291                                my $p = find_password_entry($cvspass, $rr, $rr2);
 292                                if ($p) {
 293                                        push @loc, $cvspass->[0];
 294                                        $pass = $p;
 295                                }
 296                        }
 297
 298                        if (1 < @loc) {
 299                                die("Multiple cvs password files have ".
 300                                    "entries for CVSROOT $opt_d: @loc");
 301                        } elsif (!$pass) {
 302                                $pass = "A";
 303                        }
 304                }
 305
 306                my ($s, $rep);
 307                if ($proxyhost) {
 308
 309                        # Use a HTTP Proxy. Only works for HTTP proxies that
 310                        # don't require user authentication
 311                        #
 312                        # See: http://www.ietf.org/rfc/rfc2817.txt
 313
 314                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 315                        die "Socket to $proxyhost: $!\n" unless defined $s;
 316                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 317                                or die "Write to $proxyhost: $!\n";
 318                        $s->flush();
 319
 320                        $rep = <$s>;
 321
 322                        # The answer should look like 'HTTP/1.x 2yy ....'
 323                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 324                                die "Proxy connect: $rep\n";
 325                        }
 326                        # Skip up to the empty line of the proxy server output
 327                        # including the response headers.
 328                        while ($rep = <$s>) {
 329                                last if (!defined $rep ||
 330                                         $rep eq "\n" ||
 331                                         $rep eq "\r\n");
 332                        }
 333                } else {
 334                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 335                        die "Socket to $serv: $!\n" unless defined $s;
 336                }
 337
 338                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 339                        or die "Write to $serv: $!\n";
 340                $s->flush();
 341
 342                $rep = <$s>;
 343
 344                if ($rep ne "I LOVE YOU\n") {
 345                        $rep="<unknown>" unless $rep;
 346                        die "AuthReply: $rep\n";
 347                }
 348                $self->{'socketo'} = $s;
 349                $self->{'socketi'} = $s;
 350        } else { # local or ext: Fork off our own cvs server.
 351                my $pr = IO::Pipe->new();
 352                my $pw = IO::Pipe->new();
 353                my $pid = fork();
 354                die "Fork: $!\n" unless defined $pid;
 355                my $cvs = 'cvs';
 356                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 357                my $rsh = 'rsh';
 358                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 359
 360                my @cvs = ($cvs, 'server');
 361                my ($local, $user, $host);
 362                $local = $repo =~ s/:local://;
 363                if (!$local) {
 364                    $repo =~ s/:ext://;
 365                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 366                    ($user, $host) = ($1, $2);
 367                }
 368                if (!$local) {
 369                    if ($user) {
 370                        unshift @cvs, $rsh, '-l', $user, $host;
 371                    } else {
 372                        unshift @cvs, $rsh, $host;
 373                    }
 374                }
 375
 376                unless ($pid) {
 377                        $pr->writer();
 378                        $pw->reader();
 379                        dup2($pw->fileno(),0);
 380                        dup2($pr->fileno(),1);
 381                        $pr->close();
 382                        $pw->close();
 383                        exec(@cvs);
 384                }
 385                $pw->writer();
 386                $pr->reader();
 387                $self->{'socketo'} = $pw;
 388                $self->{'socketi'} = $pr;
 389        }
 390        $self->{'socketo'}->write("Root $repo\n");
 391
 392        # Trial and error says that this probably is the minimum set
 393        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 394
 395        $self->{'socketo'}->write("valid-requests\n");
 396        $self->{'socketo'}->flush();
 397
 398        my $rep=$self->readline();
 399        die "Failed to read from server" unless defined $rep;
 400        chomp($rep);
 401        if ($rep !~ s/^Valid-requests\s*//) {
 402                $rep="<unknown>" unless $rep;
 403                die "Expected Valid-requests from server, but got: $rep\n";
 404        }
 405        chomp(my $res=$self->readline());
 406        die "validReply: $res\n" if $res ne "ok";
 407
 408        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 409        $self->{'repo'} = $repo;
 410}
 411
 412sub readline {
 413        my ($self) = @_;
 414        return $self->{'socketi'}->getline();
 415}
 416
 417sub _file {
 418        # Request a file with a given revision.
 419        # Trial and error says this is a good way to do it. :-/
 420        my ($self,$fn,$rev) = @_;
 421        $self->{'socketo'}->write("Argument -N\n") or return undef;
 422        $self->{'socketo'}->write("Argument -P\n") or return undef;
 423        # -kk: Linus' version doesn't use it - defaults to off
 424        if ($opt_k) {
 425            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 426        }
 427        $self->{'socketo'}->write("Argument -r\n") or return undef;
 428        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 429        $self->{'socketo'}->write("Argument --\n") or return undef;
 430        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 431        $self->{'socketo'}->write("Directory .\n") or return undef;
 432        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 433        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 434        $self->{'socketo'}->write("co\n") or return undef;
 435        $self->{'socketo'}->flush() or return undef;
 436        $self->{'lines'} = 0;
 437        return 1;
 438}
 439sub _line {
 440        # Read a line from the server.
 441        # ... except that 'line' may be an entire file. ;-)
 442        my ($self, $fh) = @_;
 443        die "Not in lines" unless defined $self->{'lines'};
 444
 445        my $line;
 446        my $res=0;
 447        while (defined($line = $self->readline())) {
 448                # M U gnupg-cvs-rep/AUTHORS
 449                # Updated gnupg-cvs-rep/
 450                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 451                # /AUTHORS/1.1///T1.1
 452                # u=rw,g=rw,o=rw
 453                # 0
 454                # ok
 455
 456                if ($line =~ s/^(?:Created|Updated) //) {
 457                        $line = $self->readline(); # path
 458                        $line = $self->readline(); # Entries line
 459                        my $mode = $self->readline(); chomp $mode;
 460                        $self->{'mode'} = $mode;
 461                        defined (my $cnt = $self->readline())
 462                                or die "EOF from server after 'Changed'\n";
 463                        chomp $cnt;
 464                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 465                        $line="";
 466                        $res = $self->_fetchfile($fh, $cnt);
 467                } elsif ($line =~ s/^ //) {
 468                        print $fh $line;
 469                        $res += length($line);
 470                } elsif ($line =~ /^M\b/) {
 471                        # output, do nothing
 472                } elsif ($line =~ /^Mbinary\b/) {
 473                        my $cnt;
 474                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 475                        chomp $cnt;
 476                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 477                        $line="";
 478                        $res += $self->_fetchfile($fh, $cnt);
 479                } else {
 480                        chomp $line;
 481                        if ($line eq "ok") {
 482                                # print STDERR "S: ok (".length($res).")\n";
 483                                return $res;
 484                        } elsif ($line =~ s/^E //) {
 485                                # print STDERR "S: $line\n";
 486                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 487                                $line = $self->readline(); # filename
 488                                $line = $self->readline(); # OK
 489                                chomp $line;
 490                                die "Unknown: $line" if $line ne "ok";
 491                                return -1;
 492                        } else {
 493                                die "Unknown: $line\n";
 494                        }
 495                }
 496        }
 497        return undef;
 498}
 499sub file {
 500        my ($self,$fn,$rev) = @_;
 501        my $res;
 502
 503        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 504                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 505
 506        $self->_file($fn,$rev) and $res = $self->_line($fh);
 507
 508        if (!defined $res) {
 509            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 510            truncate $fh, 0;
 511            $self->conn();
 512            $self->_file($fn,$rev) or die "No file command send";
 513            $res = $self->_line($fh);
 514            die "Retry failed" unless defined $res;
 515        }
 516        close ($fh);
 517
 518        return ($name, $res);
 519}
 520sub _fetchfile {
 521        my ($self, $fh, $cnt) = @_;
 522        my $res = 0;
 523        my $bufsize = 1024 * 1024;
 524        while ($cnt) {
 525            if ($bufsize > $cnt) {
 526                $bufsize = $cnt;
 527            }
 528            my $buf;
 529            my $num = $self->{'socketi'}->read($buf,$bufsize);
 530            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 531            print $fh $buf;
 532            $res += $num;
 533            $cnt -= $num;
 534        }
 535        return $res;
 536}
 537
 538sub _scramble {
 539        my ($self, $pass) = @_;
 540        my $scrambled = "A";
 541
 542        return $scrambled unless $pass;
 543
 544        my $pass_len = length($pass);
 545        my @pass_arr = split("", $pass);
 546        my $i;
 547
 548        # from cvs/src/scramble.c
 549        my @shifts = (
 550                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 551                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 552                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 553                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 554                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 555                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 556                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 557                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 558                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 559                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 560                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 561                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 562                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 563                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 564                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 565                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 566        );
 567
 568        for ($i = 0; $i < $pass_len; $i++) {
 569                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 570        }
 571
 572        return $scrambled;
 573}
 574
 575package main;
 576
 577my $cvs = CVSconn->new($opt_d, $cvs_tree);
 578
 579
 580sub pdate($) {
 581        my ($d) = @_;
 582        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 583                or die "Unparseable date: $d\n";
 584        my $y=$1; $y-=1900 if $y>1900;
 585        return timegm($6||0,$5,$4,$3,$2-1,$y);
 586}
 587
 588sub pmode($) {
 589        my ($mode) = @_;
 590        my $m = 0;
 591        my $mm = 0;
 592        my $um = 0;
 593        for my $x(split(//,$mode)) {
 594                if ($x eq ",") {
 595                        $m |= $mm&$um;
 596                        $mm = 0;
 597                        $um = 0;
 598                } elsif ($x eq "u") { $um |= 0700;
 599                } elsif ($x eq "g") { $um |= 0070;
 600                } elsif ($x eq "o") { $um |= 0007;
 601                } elsif ($x eq "r") { $mm |= 0444;
 602                } elsif ($x eq "w") { $mm |= 0222;
 603                } elsif ($x eq "x") { $mm |= 0111;
 604                } elsif ($x eq "=") { # do nothing
 605                } else { die "Unknown mode: $mode\n";
 606                }
 607        }
 608        $m |= $mm&$um;
 609        return $m;
 610}
 611
 612sub getwd() {
 613        my $pwd = `pwd`;
 614        chomp $pwd;
 615        return $pwd;
 616}
 617
 618sub is_sha1 {
 619        my $s = shift;
 620        return $s =~ /^[a-f0-9]{40}$/;
 621}
 622
 623sub get_headref ($) {
 624        my $name = shift;
 625        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 626        return undef unless $? == 0;
 627        chomp $r;
 628        return $r;
 629}
 630
 631my $user_filename_prepend = '';
 632sub munge_user_filename {
 633        my $name = shift;
 634        return File::Spec->file_name_is_absolute($name) ?
 635                $name :
 636                $user_filename_prepend . $name;
 637}
 638
 639-d $git_tree
 640        or mkdir($git_tree,0777)
 641        or die "Could not create $git_tree: $!";
 642if ($git_tree ne '.') {
 643        $user_filename_prepend = getwd() . '/';
 644        chdir($git_tree);
 645}
 646
 647my $last_branch = "";
 648my $orig_branch = "";
 649my %branch_date;
 650my $tip_at_start = undef;
 651
 652my $git_dir = $ENV{"GIT_DIR"} || ".git";
 653$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 654$ENV{"GIT_DIR"} = $git_dir;
 655my $orig_git_index;
 656$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 657
 658my %index; # holds filenames of one index per branch
 659
 660unless (-d $git_dir) {
 661        system(qw(git init));
 662        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 663        system(qw(git read-tree --empty));
 664        die "Cannot init an empty tree: $?\n" if $?;
 665
 666        $last_branch = $opt_o;
 667        $orig_branch = "";
 668} else {
 669        open(F, "-|", qw(git symbolic-ref HEAD)) or
 670                die "Cannot run git symbolic-ref: $!\n";
 671        chomp ($last_branch = <F>);
 672        $last_branch = basename($last_branch);
 673        close(F);
 674        unless ($last_branch) {
 675                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 676                $last_branch = "master";
 677        }
 678        $orig_branch = $last_branch;
 679        $tip_at_start = `git rev-parse --verify HEAD`;
 680
 681        # Get the last import timestamps
 682        my $fmt = '($ref, $author) = (%(refname), %(author));';
 683        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 684        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 685        while (defined(my $entry = <H>)) {
 686                my ($ref, $author);
 687                eval($entry) || die "cannot eval refs list: $@";
 688                my ($head) = ($ref =~ m|^$remote/(.*)|);
 689                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 690                $branch_date{$head} = $1;
 691        }
 692        close(H);
 693        if (!exists $branch_date{$opt_o}) {
 694                die "Branch '$opt_o' does not exist.\n".
 695                       "Either use the correct '-o branch' option,\n".
 696                       "or import to a new repository.\n";
 697        }
 698}
 699
 700-d $git_dir
 701        or die "Could not create git subdir ($git_dir).\n";
 702
 703# now we read (and possibly save) author-info as well
 704-f "$git_dir/cvs-authors" and
 705  read_author_info("$git_dir/cvs-authors");
 706if ($opt_A) {
 707        read_author_info(munge_user_filename($opt_A));
 708        write_author_info("$git_dir/cvs-authors");
 709}
 710
 711# open .git/cvs-revisions, if requested
 712open my $revision_map, '>>', "$git_dir/cvs-revisions"
 713    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 714        if defined $opt_R;
 715
 716
 717#
 718# run cvsps into a file unless we are getting
 719# it passed as a file via $opt_P
 720#
 721my $cvspsfile;
 722unless ($opt_P) {
 723        print "Running cvsps...\n" if $opt_v;
 724        my $pid = open(CVSPS,"-|");
 725        my $cvspsfh;
 726        die "Cannot fork: $!\n" unless defined $pid;
 727        unless ($pid) {
 728                my @opt;
 729                @opt = split(/,/,$opt_p) if defined $opt_p;
 730                unshift @opt, '-z', $opt_z if defined $opt_z;
 731                unshift @opt, '-q'         unless defined $opt_v;
 732                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 733                        push @opt, '--cvs-direct';
 734                }
 735                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 736                die "Could not start cvsps: $!\n";
 737        }
 738        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 739                                          DIR => File::Spec->tmpdir());
 740        while (<CVSPS>) {
 741            print $cvspsfh $_;
 742        }
 743        close CVSPS;
 744        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 745        close $cvspsfh;
 746} else {
 747        $cvspsfile = munge_user_filename($opt_P);
 748}
 749
 750open(CVS, "<$cvspsfile") or die $!;
 751
 752## cvsps output:
 753#---------------------
 754#PatchSet 314
 755#Date: 1999/09/18 13:03:59
 756#Author: wkoch
 757#Branch: STABLE-BRANCH-1-0
 758#Ancestor branch: HEAD
 759#Tag: (none)
 760#Log:
 761#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 762#Members:
 763#       README:1.57->1.57.2.1
 764#       VERSION:1.96->1.96.2.1
 765#
 766#---------------------
 767
 768my $state = 0;
 769
 770sub update_index (\@\@) {
 771        my $old = shift;
 772        my $new = shift;
 773        open(my $fh, '|-', qw(git update-index -z --index-info))
 774                or die "unable to open git update-index: $!";
 775        print $fh
 776                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 777                        @$old),
 778                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 779                        @$new)
 780                or die "unable to write to git update-index: $!";
 781        close $fh
 782                or die "unable to write to git update-index: $!";
 783        $? and die "git update-index reported error: $?";
 784}
 785
 786sub write_tree () {
 787        open(my $fh, '-|', qw(git write-tree))
 788                or die "unable to open git write-tree: $!";
 789        chomp(my $tree = <$fh>);
 790        is_sha1($tree)
 791                or die "Cannot get tree id ($tree): $!";
 792        close($fh)
 793                or die "Error running git write-tree: $?\n";
 794        print "Tree ID $tree\n" if $opt_v;
 795        return $tree;
 796}
 797
 798my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 799my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 800
 801# commits that cvsps cannot place anywhere...
 802$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 803
 804sub commit {
 805        if ($branch eq $opt_o && !$index{branch} &&
 806                !get_headref("$remote/$branch")) {
 807            # looks like an initial commit
 808            # use the index primed by git init
 809            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 810            $index{$branch} = "$git_dir/index";
 811        } else {
 812            # use an index per branch to speed up
 813            # imports of projects with many branches
 814            unless ($index{$branch}) {
 815                $index{$branch} = tmpnam();
 816                $ENV{GIT_INDEX_FILE} = $index{$branch};
 817                if ($ancestor) {
 818                    system("git", "read-tree", "$remote/$ancestor");
 819                } else {
 820                    system("git", "read-tree", "$remote/$branch");
 821                }
 822                die "read-tree failed: $?\n" if $?;
 823            }
 824        }
 825        $ENV{GIT_INDEX_FILE} = $index{$branch};
 826
 827        update_index(@old, @new);
 828        @old = @new = ();
 829        my $tree = write_tree();
 830        my $parent = get_headref("$remote/$last_branch");
 831        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 832
 833        my @commit_args;
 834        push @commit_args, ("-p", $parent) if $parent;
 835
 836        # loose detection of merges
 837        # based on the commit msg
 838        foreach my $rx (@mergerx) {
 839                next unless $logmsg =~ $rx && $1;
 840                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 841                if (my $sha1 = get_headref("$remote/$mparent")) {
 842                        push @commit_args, '-p', "$remote/$mparent";
 843                        print "Merge parent branch: $mparent\n" if $opt_v;
 844                }
 845        }
 846
 847        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 848        $ENV{GIT_AUTHOR_NAME} = $author_name;
 849        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 850        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 851        $ENV{GIT_COMMITTER_NAME} = $author_name;
 852        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 853        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 854        my $pid = open2(my $commit_read, my $commit_write,
 855                'git', 'commit-tree', $tree, @commit_args);
 856
 857        # compatibility with git2cvs
 858        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 859        $logmsg =~ s/[\s\n]+\z//;
 860
 861        if (@skipped) {
 862            $logmsg .= "\n\n\nSKIPPED:\n\t";
 863            $logmsg .= join("\n\t", @skipped) . "\n";
 864            @skipped = ();
 865        }
 866
 867        print($commit_write "$logmsg\n") && close($commit_write)
 868                or die "Error writing to git commit-tree: $!\n";
 869
 870        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 871        chomp(my $cid = <$commit_read>);
 872        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 873        print "Commit ID $cid\n" if $opt_v;
 874        close($commit_read);
 875
 876        waitpid($pid,0);
 877        die "Error running git commit-tree: $?\n" if $?;
 878
 879        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 880                or die "Cannot write branch $branch for update: $!\n";
 881
 882        if ($revision_map) {
 883                print $revision_map "@$_ $cid\n" for @commit_revisions;
 884        }
 885        @commit_revisions = ();
 886
 887        if ($tag) {
 888                my ($xtag) = $tag;
 889                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 890                $xtag =~ tr/_/\./ if ( $opt_u );
 891                $xtag =~ s/[\/]/$opt_s/g;
 892                $xtag =~ s/\[//g;
 893
 894                system('git' , 'tag', '-f', $xtag, $cid) == 0
 895                        or die "Cannot create tag $xtag: $!\n";
 896
 897                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 898        }
 899};
 900
 901my $commitcount = 1;
 902while (<CVS>) {
 903        chomp;
 904        if ($state == 0 and /^-+$/) {
 905                $state = 1;
 906        } elsif ($state == 0) {
 907                $state = 1;
 908                redo;
 909        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 910                $patchset = 0+$_;
 911                $state=2;
 912        } elsif ($state == 2 and s/^Date:\s+//) {
 913                $date = pdate($_);
 914                unless ($date) {
 915                        print STDERR "Could not parse date: $_\n";
 916                        $state=0;
 917                        next;
 918                }
 919                $state=3;
 920        } elsif ($state == 3 and s/^Author:\s+//) {
 921                s/\s+$//;
 922                if (/^(.*?)\s+<(.*)>/) {
 923                    ($author_name, $author_email) = ($1, $2);
 924                } elsif ($conv_author_name{$_}) {
 925                        $author_name = $conv_author_name{$_};
 926                        $author_email = $conv_author_email{$_};
 927                } else {
 928                    $author_name = $author_email = $_;
 929                }
 930                $state = 4;
 931        } elsif ($state == 4 and s/^Branch:\s+//) {
 932                s/\s+$//;
 933                tr/_/\./ if ( $opt_u );
 934                s/[\/]/$opt_s/g;
 935                $branch = $_;
 936                $state = 5;
 937        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 938                s/\s+$//;
 939                $ancestor = $_;
 940                $ancestor = $opt_o if $ancestor eq "HEAD";
 941                $state = 6;
 942        } elsif ($state == 5) {
 943                $ancestor = undef;
 944                $state = 6;
 945                redo;
 946        } elsif ($state == 6 and s/^Tag:\s+//) {
 947                s/\s+$//;
 948                if ($_ eq "(none)") {
 949                        $tag = undef;
 950                } else {
 951                        $tag = $_;
 952                }
 953                $state = 7;
 954        } elsif ($state == 7 and /^Log:/) {
 955                $logmsg = "";
 956                $state = 8;
 957        } elsif ($state == 8 and /^Members:/) {
 958                $branch = $opt_o if $branch eq "HEAD";
 959                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 960                        # skip
 961                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 962                        $state = 11;
 963                        next;
 964                }
 965                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 966                        # skip if the commit is too recent
 967                        # given that the cvsps default fuzz is 300s, we give ourselves another
 968                        # 300s just in case -- this also prevents skipping commits
 969                        # due to server clock drift
 970                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 971                        $state = 11;
 972                        next;
 973                }
 974                if (exists $ignorebranch{$branch}) {
 975                        print STDERR "Skipping $branch\n";
 976                        $state = 11;
 977                        next;
 978                }
 979                if ($ancestor) {
 980                        if ($ancestor eq $branch) {
 981                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 982                                $ancestor = $opt_o;
 983                        }
 984                        if (defined get_headref("$remote/$branch")) {
 985                                print STDERR "Branch $branch already exists!\n";
 986                                $state=11;
 987                                next;
 988                        }
 989                        my $id = get_headref("$remote/$ancestor");
 990                        if (!$id) {
 991                                print STDERR "Branch $ancestor does not exist!\n";
 992                                $ignorebranch{$branch} = 1;
 993                                $state=11;
 994                                next;
 995                        }
 996
 997                        system(qw(git update-ref -m cvsimport),
 998                                "$remote/$branch", $id);
 999                        if($? != 0) {
1000                                print STDERR "Could not create branch $branch\n";
1001                                $ignorebranch{$branch} = 1;
1002                                $state=11;
1003                                next;
1004                        }
1005                }
1006                $last_branch = $branch if $branch ne $last_branch;
1007                $state = 9;
1008        } elsif ($state == 8) {
1009                $logmsg .= "$_\n";
1010        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1011#       VERSION:1.96->1.96.2.1
1012                my $init = ($2 eq "INITIAL");
1013                my $fn = $1;
1014                my $rev = $3;
1015                $fn =~ s#^/+##;
1016                if ($opt_S && $fn =~ m/$opt_S/) {
1017                    print "SKIPPING $fn v $rev\n";
1018                    push(@skipped, $fn);
1019                    next;
1020                }
1021                push @commit_revisions, [$fn, $rev];
1022                print "Fetching $fn   v $rev\n" if $opt_v;
1023                my ($tmpname, $size) = $cvs->file($fn,$rev);
1024                if ($size == -1) {
1025                        push(@old,$fn);
1026                        print "Drop $fn\n" if $opt_v;
1027                } else {
1028                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1029                        my $pid = open(my $F, '-|');
1030                        die $! unless defined $pid;
1031                        if (!$pid) {
1032                            exec("git", "hash-object", "-w", $tmpname)
1033                                or die "Cannot create object: $!\n";
1034                        }
1035                        my $sha = <$F>;
1036                        chomp $sha;
1037                        close $F;
1038                        my $mode = pmode($cvs->{'mode'});
1039                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1040                }
1041                unlink($tmpname);
1042        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1043                my $fn = $1;
1044                my $rev = $2;
1045                $fn =~ s#^/+##;
1046                push @commit_revisions, [$fn, $rev];
1047                push(@old,$fn);
1048                print "Delete $fn\n" if $opt_v;
1049        } elsif ($state == 9 and /^\s*$/) {
1050                $state = 10;
1051        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1052                $commitcount++;
1053                if ($opt_L && $commitcount > $opt_L) {
1054                        last;
1055                }
1056                commit();
1057                if (($commitcount & 1023) == 0) {
1058                        system(qw(git repack -a -d));
1059                }
1060                $state = 1;
1061        } elsif ($state == 11 and /^-+$/) {
1062                $state = 1;
1063        } elsif (/^-+$/) { # end of unknown-line processing
1064                $state = 1;
1065        } elsif ($state != 11) { # ignore stuff when skipping
1066                print STDERR "* UNKNOWN LINE * $_\n";
1067        }
1068}
1069commit() if $branch and $state != 11;
1070
1071unless ($opt_P) {
1072        unlink($cvspsfile);
1073}
1074
1075# The heuristic of repacking every 1024 commits can leave a
1076# lot of unpacked data.  If there is more than 1MB worth of
1077# not-packed objects, repack once more.
1078my $line = `git count-objects`;
1079if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1080  my ($n_objects, $kb) = ($1, $2);
1081  1024 < $kb
1082    and system(qw(git repack -a -d));
1083}
1084
1085foreach my $git_index (values %index) {
1086    if ($git_index ne "$git_dir/index") {
1087        unlink($git_index);
1088    }
1089}
1090
1091if (defined $orig_git_index) {
1092        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1093} else {
1094        delete $ENV{GIT_INDEX_FILE};
1095}
1096
1097# Now switch back to the branch we were in before all of this happened
1098if ($orig_branch) {
1099        print "DONE.\n" if $opt_v;
1100        if ($opt_i) {
1101                exit 0;
1102        }
1103        my $tip_at_end = `git rev-parse --verify HEAD`;
1104        if ($tip_at_start ne $tip_at_end) {
1105                for ($tip_at_start, $tip_at_end) { chomp; }
1106                print "Fetched into the current branch.\n" if $opt_v;
1107                system(qw(git read-tree -u -m),
1108                       $tip_at_start, $tip_at_end);
1109                die "Fast-forward update failed: $?\n" if $?;
1110        }
1111        else {
1112                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1113                die "Could not merge $opt_o into the current branch.\n" if $?;
1114        }
1115} else {
1116        $orig_branch = "master";
1117        print "DONE; creating $orig_branch branch\n" if $opt_v;
1118        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1119                unless defined get_headref('refs/heads/master');
1120        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1121                if ($opt_r && $opt_o ne 'HEAD');
1122        system('git', 'update-ref', 'HEAD', "$orig_branch");
1123        unless ($opt_i) {
1124                system(qw(git checkout -f));
1125                die "checkout failed: $?\n" if $?;
1126        }
1127}