git-cvsimport.perlon commit Fix memory leak in submodule.c (de7a796)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use strict;
  17use warnings;
  18use Getopt::Long;
  19use File::Spec;
  20use File::Temp qw(tempfile tmpnam);
  21use File::Path qw(mkpath);
  22use File::Basename qw(basename dirname);
  23use Time::Local;
  24use IO::Socket;
  25use IO::Pipe;
  26use POSIX qw(strftime dup2 ENOENT);
  27use IPC::Open2;
  28
  29$SIG{'PIPE'}="IGNORE";
  30$ENV{'TZ'}="UTC";
  31
  32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
  33my (%conv_author_name, %conv_author_email);
  34
  35sub usage(;$) {
  36        my $msg = shift;
  37        print(STDERR "Error: $msg\n") if $msg;
  38        print STDERR <<END;
  39Usage: git cvsimport     # fetch/update GIT from CVS
  40       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  41       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  42       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  43       [-r remote] [CVS_module]
  44END
  45        exit(1);
  46}
  47
  48sub read_author_info($) {
  49        my ($file) = @_;
  50        my $user;
  51        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  52
  53        while (<$f>) {
  54                # Expected format is this:
  55                #   exon=Andreas Ericsson <ae@op5.se>
  56                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  57                        $user = $1;
  58                        $conv_author_name{$user} = $2;
  59                        $conv_author_email{$user} = $3;
  60                }
  61                # However, we also read from CVSROOT/users format
  62                # to ease migration.
  63                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  64                        my $mapped;
  65                        ($user, $mapped) = ($1, $3);
  66                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  67                                $conv_author_name{$user} = $1;
  68                                $conv_author_email{$user} = $2;
  69                        }
  70                        elsif ($mapped =~ /^<?(.*)>?$/) {
  71                                $conv_author_name{$user} = $user;
  72                                $conv_author_email{$user} = $1;
  73                        }
  74                }
  75                # NEEDSWORK: Maybe warn on unrecognized lines?
  76        }
  77        close ($f);
  78}
  79
  80sub write_author_info($) {
  81        my ($file) = @_;
  82        open my $f, '>', $file or
  83          die("Failed to open $file for writing: $!");
  84
  85        foreach (keys %conv_author_name) {
  86                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  87        }
  88        close ($f);
  89}
  90
  91# convert getopts specs for use by git config
  92sub read_repo_config {
  93    # Split the string between characters, unless there is a ':'
  94    # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
  95        my @opts = split(/ *(?!:)/, shift);
  96        foreach my $o (@opts) {
  97                my $key = $o;
  98                $key =~ s/://g;
  99                my $arg = 'git config';
 100                $arg .= ' --bool' if ($o !~ /:$/);
 101
 102        chomp(my $tmp = `$arg --get cvsimport.$key`);
 103                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 104            no strict 'refs';
 105            my $opt_name = "opt_" . $key;
 106            if (!$$opt_name) {
 107                $$opt_name = $tmp;
 108            }
 109                }
 110        }
 111}
 112
 113my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
 114read_repo_config($opts);
 115Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 116
 117# turn the Getopt::Std specification in a Getopt::Long one,
 118# with support for multiple -M options
 119GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 120    or usage();
 121usage if $opt_h;
 122
 123if (@ARGV == 0) {
 124                chomp(my $module = `git config --get cvsimport.module`);
 125                push(@ARGV, $module) if $? == 0;
 126}
 127@ARGV <= 1 or usage("You can't specify more than one CVS module");
 128
 129if ($opt_d) {
 130        $ENV{"CVSROOT"} = $opt_d;
 131} elsif (-f 'CVS/Root') {
 132        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 133        $opt_d = <$f>;
 134        chomp $opt_d;
 135        close $f;
 136        $ENV{"CVSROOT"} = $opt_d;
 137} elsif ($ENV{"CVSROOT"}) {
 138        $opt_d = $ENV{"CVSROOT"};
 139} else {
 140        usage("CVSROOT needs to be set");
 141}
 142$opt_s ||= "-";
 143$opt_a ||= 0;
 144
 145my $git_tree = $opt_C;
 146$git_tree ||= ".";
 147
 148my $remote;
 149if (defined $opt_r) {
 150        $remote = 'refs/remotes/' . $opt_r;
 151        $opt_o ||= "master";
 152} else {
 153        $opt_o ||= "origin";
 154        $remote = 'refs/heads';
 155}
 156
 157my $cvs_tree;
 158if ($#ARGV == 0) {
 159        $cvs_tree = $ARGV[0];
 160} elsif (-f 'CVS/Repository') {
 161        open my $f, '<', 'CVS/Repository' or
 162            die 'Failed to open CVS/Repository';
 163        $cvs_tree = <$f>;
 164        chomp $cvs_tree;
 165        close $f;
 166} else {
 167        usage("CVS module has to be specified");
 168}
 169
 170our @mergerx = ();
 171if ($opt_m) {
 172        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 173}
 174if (@opt_M) {
 175        push (@mergerx, map { qr/$_/ } @opt_M);
 176}
 177
 178# Remember UTC of our starting time
 179# we'll want to avoid importing commits
 180# that are too recent
 181our $starttime = time();
 182
 183select(STDERR); $|=1; select(STDOUT);
 184
 185
 186package CVSconn;
 187# Basic CVS dialog.
 188# We're only interested in connecting and downloading, so ...
 189
 190use File::Spec;
 191use File::Temp qw(tempfile);
 192use POSIX qw(strftime dup2);
 193
 194sub new {
 195        my ($what,$repo,$subdir) = @_;
 196        $what=ref($what) if ref($what);
 197
 198        my $self = {};
 199        $self->{'buffer'} = "";
 200        bless($self,$what);
 201
 202        $repo =~ s#/+$##;
 203        $self->{'fullrep'} = $repo;
 204        $self->conn();
 205
 206        $self->{'subdir'} = $subdir;
 207        $self->{'lines'} = undef;
 208
 209        return $self;
 210}
 211
 212sub conn {
 213        my $self = shift;
 214        my $repo = $self->{'fullrep'};
 215        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 216                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 217
 218                my ($proxyhost,$proxyport);
 219                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 220                        $proxyhost = $1;
 221                        # Default proxyport, if not specified, is 8080.
 222                        $proxyport = 8080;
 223                        if ($ENV{"CVS_PROXY_PORT"}) {
 224                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 225                        }
 226                        if ($param =~ m/proxyport=([^;]+)/) {
 227                                $proxyport = $1;
 228                        }
 229                }
 230                $repo ||= '/';
 231
 232                # if username is not explicit in CVSROOT, then use current user, as cvs would
 233                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 234                my $rr2 = "-";
 235                unless ($port) {
 236                        $rr2 = ":pserver:$user\@$serv:$repo";
 237                        $port=2401;
 238                }
 239                my $rr = ":pserver:$user\@$serv:$port$repo";
 240
 241                if ($pass) {
 242                        $pass = $self->_scramble($pass);
 243                } else {
 244                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 245                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 246                                while (<H>) {
 247                                        chomp;
 248                                        s/^\/\d+\s+//;
 249                                        my ($w,$p) = split(/\s/,$_,2);
 250                                        if ($w eq $rr or $w eq $rr2) {
 251                                                $pass = $p;
 252                                                last;
 253                                        }
 254                                }
 255                        };
 256                        $pass = "A" unless $pass;
 257                }
 258
 259                my ($s, $rep);
 260                if ($proxyhost) {
 261
 262                        # Use a HTTP Proxy. Only works for HTTP proxies that
 263                        # don't require user authentication
 264                        #
 265                        # See: http://www.ietf.org/rfc/rfc2817.txt
 266
 267                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 268                        die "Socket to $proxyhost: $!\n" unless defined $s;
 269                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 270                                or die "Write to $proxyhost: $!\n";
 271                        $s->flush();
 272
 273                        $rep = <$s>;
 274
 275                        # The answer should look like 'HTTP/1.x 2yy ....'
 276                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 277                                die "Proxy connect: $rep\n";
 278                        }
 279                        # Skip up to the empty line of the proxy server output
 280                        # including the response headers.
 281                        while ($rep = <$s>) {
 282                                last if (!defined $rep ||
 283                                         $rep eq "\n" ||
 284                                         $rep eq "\r\n");
 285                        }
 286                } else {
 287                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 288                        die "Socket to $serv: $!\n" unless defined $s;
 289                }
 290
 291                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 292                        or die "Write to $serv: $!\n";
 293                $s->flush();
 294
 295                $rep = <$s>;
 296
 297                if ($rep ne "I LOVE YOU\n") {
 298                        $rep="<unknown>" unless $rep;
 299                        die "AuthReply: $rep\n";
 300                }
 301                $self->{'socketo'} = $s;
 302                $self->{'socketi'} = $s;
 303        } else { # local or ext: Fork off our own cvs server.
 304                my $pr = IO::Pipe->new();
 305                my $pw = IO::Pipe->new();
 306                my $pid = fork();
 307                die "Fork: $!\n" unless defined $pid;
 308                my $cvs = 'cvs';
 309                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 310                my $rsh = 'rsh';
 311                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 312
 313                my @cvs = ($cvs, 'server');
 314                my ($local, $user, $host);
 315                $local = $repo =~ s/:local://;
 316                if (!$local) {
 317                    $repo =~ s/:ext://;
 318                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 319                    ($user, $host) = ($1, $2);
 320                }
 321                if (!$local) {
 322                    if ($user) {
 323                        unshift @cvs, $rsh, '-l', $user, $host;
 324                    } else {
 325                        unshift @cvs, $rsh, $host;
 326                    }
 327                }
 328
 329                unless ($pid) {
 330                        $pr->writer();
 331                        $pw->reader();
 332                        dup2($pw->fileno(),0);
 333                        dup2($pr->fileno(),1);
 334                        $pr->close();
 335                        $pw->close();
 336                        exec(@cvs);
 337                }
 338                $pw->writer();
 339                $pr->reader();
 340                $self->{'socketo'} = $pw;
 341                $self->{'socketi'} = $pr;
 342        }
 343        $self->{'socketo'}->write("Root $repo\n");
 344
 345        # Trial and error says that this probably is the minimum set
 346        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 347
 348        $self->{'socketo'}->write("valid-requests\n");
 349        $self->{'socketo'}->flush();
 350
 351        chomp(my $rep=$self->readline());
 352        if ($rep !~ s/^Valid-requests\s*//) {
 353                $rep="<unknown>" unless $rep;
 354                die "Expected Valid-requests from server, but got: $rep\n";
 355        }
 356        chomp(my $res=$self->readline());
 357        die "validReply: $res\n" if $res ne "ok";
 358
 359        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 360        $self->{'repo'} = $repo;
 361}
 362
 363sub readline {
 364        my ($self) = @_;
 365        return $self->{'socketi'}->getline();
 366}
 367
 368sub _file {
 369        # Request a file with a given revision.
 370        # Trial and error says this is a good way to do it. :-/
 371        my ($self,$fn,$rev) = @_;
 372        $self->{'socketo'}->write("Argument -N\n") or return undef;
 373        $self->{'socketo'}->write("Argument -P\n") or return undef;
 374        # -kk: Linus' version doesn't use it - defaults to off
 375        if ($opt_k) {
 376            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 377        }
 378        $self->{'socketo'}->write("Argument -r\n") or return undef;
 379        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 380        $self->{'socketo'}->write("Argument --\n") or return undef;
 381        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 382        $self->{'socketo'}->write("Directory .\n") or return undef;
 383        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 384        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 385        $self->{'socketo'}->write("co\n") or return undef;
 386        $self->{'socketo'}->flush() or return undef;
 387        $self->{'lines'} = 0;
 388        return 1;
 389}
 390sub _line {
 391        # Read a line from the server.
 392        # ... except that 'line' may be an entire file. ;-)
 393        my ($self, $fh) = @_;
 394        die "Not in lines" unless defined $self->{'lines'};
 395
 396        my $line;
 397        my $res=0;
 398        while (defined($line = $self->readline())) {
 399                # M U gnupg-cvs-rep/AUTHORS
 400                # Updated gnupg-cvs-rep/
 401                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 402                # /AUTHORS/1.1///T1.1
 403                # u=rw,g=rw,o=rw
 404                # 0
 405                # ok
 406
 407                if ($line =~ s/^(?:Created|Updated) //) {
 408                        $line = $self->readline(); # path
 409                        $line = $self->readline(); # Entries line
 410                        my $mode = $self->readline(); chomp $mode;
 411                        $self->{'mode'} = $mode;
 412                        defined (my $cnt = $self->readline())
 413                                or die "EOF from server after 'Changed'\n";
 414                        chomp $cnt;
 415                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 416                        $line="";
 417                        $res = $self->_fetchfile($fh, $cnt);
 418                } elsif ($line =~ s/^ //) {
 419                        print $fh $line;
 420                        $res += length($line);
 421                } elsif ($line =~ /^M\b/) {
 422                        # output, do nothing
 423                } elsif ($line =~ /^Mbinary\b/) {
 424                        my $cnt;
 425                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 426                        chomp $cnt;
 427                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 428                        $line="";
 429                        $res += $self->_fetchfile($fh, $cnt);
 430                } else {
 431                        chomp $line;
 432                        if ($line eq "ok") {
 433                                # print STDERR "S: ok (".length($res).")\n";
 434                                return $res;
 435                        } elsif ($line =~ s/^E //) {
 436                                # print STDERR "S: $line\n";
 437                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 438                                $line = $self->readline(); # filename
 439                                $line = $self->readline(); # OK
 440                                chomp $line;
 441                                die "Unknown: $line" if $line ne "ok";
 442                                return -1;
 443                        } else {
 444                                die "Unknown: $line\n";
 445                        }
 446                }
 447        }
 448        return undef;
 449}
 450sub file {
 451        my ($self,$fn,$rev) = @_;
 452        my $res;
 453
 454        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 455                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 456
 457        $self->_file($fn,$rev) and $res = $self->_line($fh);
 458
 459        if (!defined $res) {
 460            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 461            truncate $fh, 0;
 462            $self->conn();
 463            $self->_file($fn,$rev) or die "No file command send";
 464            $res = $self->_line($fh);
 465            die "Retry failed" unless defined $res;
 466        }
 467        close ($fh);
 468
 469        return ($name, $res);
 470}
 471sub _fetchfile {
 472        my ($self, $fh, $cnt) = @_;
 473        my $res = 0;
 474        my $bufsize = 1024 * 1024;
 475        while ($cnt) {
 476            if ($bufsize > $cnt) {
 477                $bufsize = $cnt;
 478            }
 479            my $buf;
 480            my $num = $self->{'socketi'}->read($buf,$bufsize);
 481            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 482            print $fh $buf;
 483            $res += $num;
 484            $cnt -= $num;
 485        }
 486        return $res;
 487}
 488
 489sub _scramble {
 490        my ($self, $pass) = @_;
 491        my $scrambled = "A";
 492
 493        return $scrambled unless $pass;
 494
 495        my $pass_len = length($pass);
 496        my @pass_arr = split("", $pass);
 497        my $i;
 498
 499        # from cvs/src/scramble.c
 500        my @shifts = (
 501                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 502                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 503                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 504                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 505                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 506                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 507                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 508                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 509                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 510                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 511                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 512                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 513                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 514                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 515                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 516                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 517        );
 518
 519        for ($i = 0; $i < $pass_len; $i++) {
 520                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 521        }
 522
 523        return $scrambled;
 524}
 525
 526package main;
 527
 528my $cvs = CVSconn->new($opt_d, $cvs_tree);
 529
 530
 531sub pdate($) {
 532        my ($d) = @_;
 533        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 534                or die "Unparseable date: $d\n";
 535        my $y=$1; $y-=1900 if $y>1900;
 536        return timegm($6||0,$5,$4,$3,$2-1,$y);
 537}
 538
 539sub pmode($) {
 540        my ($mode) = @_;
 541        my $m = 0;
 542        my $mm = 0;
 543        my $um = 0;
 544        for my $x(split(//,$mode)) {
 545                if ($x eq ",") {
 546                        $m |= $mm&$um;
 547                        $mm = 0;
 548                        $um = 0;
 549                } elsif ($x eq "u") { $um |= 0700;
 550                } elsif ($x eq "g") { $um |= 0070;
 551                } elsif ($x eq "o") { $um |= 0007;
 552                } elsif ($x eq "r") { $mm |= 0444;
 553                } elsif ($x eq "w") { $mm |= 0222;
 554                } elsif ($x eq "x") { $mm |= 0111;
 555                } elsif ($x eq "=") { # do nothing
 556                } else { die "Unknown mode: $mode\n";
 557                }
 558        }
 559        $m |= $mm&$um;
 560        return $m;
 561}
 562
 563sub getwd() {
 564        my $pwd = `pwd`;
 565        chomp $pwd;
 566        return $pwd;
 567}
 568
 569sub is_sha1 {
 570        my $s = shift;
 571        return $s =~ /^[a-f0-9]{40}$/;
 572}
 573
 574sub get_headref ($) {
 575        my $name = shift;
 576        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 577        return undef unless $? == 0;
 578        chomp $r;
 579        return $r;
 580}
 581
 582my $user_filename_prepend = '';
 583sub munge_user_filename {
 584        my $name = shift;
 585        return File::Spec->file_name_is_absolute($name) ?
 586                $name :
 587                $user_filename_prepend . $name;
 588}
 589
 590-d $git_tree
 591        or mkdir($git_tree,0777)
 592        or die "Could not create $git_tree: $!";
 593if ($git_tree ne '.') {
 594        $user_filename_prepend = getwd() . '/';
 595        chdir($git_tree);
 596}
 597
 598my $last_branch = "";
 599my $orig_branch = "";
 600my %branch_date;
 601my $tip_at_start = undef;
 602
 603my $git_dir = $ENV{"GIT_DIR"} || ".git";
 604$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 605$ENV{"GIT_DIR"} = $git_dir;
 606my $orig_git_index;
 607$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 608
 609my %index; # holds filenames of one index per branch
 610
 611unless (-d $git_dir) {
 612        system(qw(git init));
 613        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 614        system(qw(git read-tree));
 615        die "Cannot init an empty tree: $?\n" if $?;
 616
 617        $last_branch = $opt_o;
 618        $orig_branch = "";
 619} else {
 620        open(F, "-|", qw(git symbolic-ref HEAD)) or
 621                die "Cannot run git symbolic-ref: $!\n";
 622        chomp ($last_branch = <F>);
 623        $last_branch = basename($last_branch);
 624        close(F);
 625        unless ($last_branch) {
 626                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 627                $last_branch = "master";
 628        }
 629        $orig_branch = $last_branch;
 630        $tip_at_start = `git rev-parse --verify HEAD`;
 631
 632        # Get the last import timestamps
 633        my $fmt = '($ref, $author) = (%(refname), %(author));';
 634        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 635        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 636        while (defined(my $entry = <H>)) {
 637                my ($ref, $author);
 638                eval($entry) || die "cannot eval refs list: $@";
 639                my ($head) = ($ref =~ m|^$remote/(.*)|);
 640                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 641                $branch_date{$head} = $1;
 642        }
 643        close(H);
 644        if (!exists $branch_date{$opt_o}) {
 645                die "Branch '$opt_o' does not exist.\n".
 646                       "Either use the correct '-o branch' option,\n".
 647                       "or import to a new repository.\n";
 648        }
 649}
 650
 651-d $git_dir
 652        or die "Could not create git subdir ($git_dir).\n";
 653
 654# now we read (and possibly save) author-info as well
 655-f "$git_dir/cvs-authors" and
 656  read_author_info("$git_dir/cvs-authors");
 657if ($opt_A) {
 658        read_author_info(munge_user_filename($opt_A));
 659        write_author_info("$git_dir/cvs-authors");
 660}
 661
 662
 663#
 664# run cvsps into a file unless we are getting
 665# it passed as a file via $opt_P
 666#
 667my $cvspsfile;
 668unless ($opt_P) {
 669        print "Running cvsps...\n" if $opt_v;
 670        my $pid = open(CVSPS,"-|");
 671        my $cvspsfh;
 672        die "Cannot fork: $!\n" unless defined $pid;
 673        unless ($pid) {
 674                my @opt;
 675                @opt = split(/,/,$opt_p) if defined $opt_p;
 676                unshift @opt, '-z', $opt_z if defined $opt_z;
 677                unshift @opt, '-q'         unless defined $opt_v;
 678                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 679                        push @opt, '--cvs-direct';
 680                }
 681                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 682                die "Could not start cvsps: $!\n";
 683        }
 684        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 685                                          DIR => File::Spec->tmpdir());
 686        while (<CVSPS>) {
 687            print $cvspsfh $_;
 688        }
 689        close CVSPS;
 690        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 691        close $cvspsfh;
 692} else {
 693        $cvspsfile = munge_user_filename($opt_P);
 694}
 695
 696open(CVS, "<$cvspsfile") or die $!;
 697
 698## cvsps output:
 699#---------------------
 700#PatchSet 314
 701#Date: 1999/09/18 13:03:59
 702#Author: wkoch
 703#Branch: STABLE-BRANCH-1-0
 704#Ancestor branch: HEAD
 705#Tag: (none)
 706#Log:
 707#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 708#Members:
 709#       README:1.57->1.57.2.1
 710#       VERSION:1.96->1.96.2.1
 711#
 712#---------------------
 713
 714my $state = 0;
 715
 716sub update_index (\@\@) {
 717        my $old = shift;
 718        my $new = shift;
 719        open(my $fh, '|-', qw(git update-index -z --index-info))
 720                or die "unable to open git update-index: $!";
 721        print $fh
 722                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 723                        @$old),
 724                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 725                        @$new)
 726                or die "unable to write to git update-index: $!";
 727        close $fh
 728                or die "unable to write to git update-index: $!";
 729        $? and die "git update-index reported error: $?";
 730}
 731
 732sub write_tree () {
 733        open(my $fh, '-|', qw(git write-tree))
 734                or die "unable to open git write-tree: $!";
 735        chomp(my $tree = <$fh>);
 736        is_sha1($tree)
 737                or die "Cannot get tree id ($tree): $!";
 738        close($fh)
 739                or die "Error running git write-tree: $?\n";
 740        print "Tree ID $tree\n" if $opt_v;
 741        return $tree;
 742}
 743
 744my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 745my (@old,@new,@skipped,%ignorebranch);
 746
 747# commits that cvsps cannot place anywhere...
 748$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 749
 750sub commit {
 751        if ($branch eq $opt_o && !$index{branch} &&
 752                !get_headref("$remote/$branch")) {
 753            # looks like an initial commit
 754            # use the index primed by git init
 755            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 756            $index{$branch} = "$git_dir/index";
 757        } else {
 758            # use an index per branch to speed up
 759            # imports of projects with many branches
 760            unless ($index{$branch}) {
 761                $index{$branch} = tmpnam();
 762                $ENV{GIT_INDEX_FILE} = $index{$branch};
 763                if ($ancestor) {
 764                    system("git", "read-tree", "$remote/$ancestor");
 765                } else {
 766                    system("git", "read-tree", "$remote/$branch");
 767                }
 768                die "read-tree failed: $?\n" if $?;
 769            }
 770        }
 771        $ENV{GIT_INDEX_FILE} = $index{$branch};
 772
 773        update_index(@old, @new);
 774        @old = @new = ();
 775        my $tree = write_tree();
 776        my $parent = get_headref("$remote/$last_branch");
 777        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 778
 779        my @commit_args;
 780        push @commit_args, ("-p", $parent) if $parent;
 781
 782        # loose detection of merges
 783        # based on the commit msg
 784        foreach my $rx (@mergerx) {
 785                next unless $logmsg =~ $rx && $1;
 786                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 787                if (my $sha1 = get_headref("$remote/$mparent")) {
 788                        push @commit_args, '-p', "$remote/$mparent";
 789                        print "Merge parent branch: $mparent\n" if $opt_v;
 790                }
 791        }
 792
 793        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 794        $ENV{GIT_AUTHOR_NAME} = $author_name;
 795        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 796        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 797        $ENV{GIT_COMMITTER_NAME} = $author_name;
 798        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 799        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 800        my $pid = open2(my $commit_read, my $commit_write,
 801                'git', 'commit-tree', $tree, @commit_args);
 802
 803        # compatibility with git2cvs
 804        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 805        $logmsg =~ s/[\s\n]+\z//;
 806
 807        if (@skipped) {
 808            $logmsg .= "\n\n\nSKIPPED:\n\t";
 809            $logmsg .= join("\n\t", @skipped) . "\n";
 810            @skipped = ();
 811        }
 812
 813        print($commit_write "$logmsg\n") && close($commit_write)
 814                or die "Error writing to git commit-tree: $!\n";
 815
 816        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 817        chomp(my $cid = <$commit_read>);
 818        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 819        print "Commit ID $cid\n" if $opt_v;
 820        close($commit_read);
 821
 822        waitpid($pid,0);
 823        die "Error running git commit-tree: $?\n" if $?;
 824
 825        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 826                or die "Cannot write branch $branch for update: $!\n";
 827
 828        if ($tag) {
 829                my ($xtag) = $tag;
 830                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 831                $xtag =~ tr/_/\./ if ( $opt_u );
 832                $xtag =~ s/[\/]/$opt_s/g;
 833                $xtag =~ s/\[//g;
 834
 835                system('git' , 'tag', '-f', $xtag, $cid) == 0
 836                        or die "Cannot create tag $xtag: $!\n";
 837
 838                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 839        }
 840};
 841
 842my $commitcount = 1;
 843while (<CVS>) {
 844        chomp;
 845        if ($state == 0 and /^-+$/) {
 846                $state = 1;
 847        } elsif ($state == 0) {
 848                $state = 1;
 849                redo;
 850        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 851                $patchset = 0+$_;
 852                $state=2;
 853        } elsif ($state == 2 and s/^Date:\s+//) {
 854                $date = pdate($_);
 855                unless ($date) {
 856                        print STDERR "Could not parse date: $_\n";
 857                        $state=0;
 858                        next;
 859                }
 860                $state=3;
 861        } elsif ($state == 3 and s/^Author:\s+//) {
 862                s/\s+$//;
 863                if (/^(.*?)\s+<(.*)>/) {
 864                    ($author_name, $author_email) = ($1, $2);
 865                } elsif ($conv_author_name{$_}) {
 866                        $author_name = $conv_author_name{$_};
 867                        $author_email = $conv_author_email{$_};
 868                } else {
 869                    $author_name = $author_email = $_;
 870                }
 871                $state = 4;
 872        } elsif ($state == 4 and s/^Branch:\s+//) {
 873                s/\s+$//;
 874                tr/_/\./ if ( $opt_u );
 875                s/[\/]/$opt_s/g;
 876                $branch = $_;
 877                $state = 5;
 878        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 879                s/\s+$//;
 880                $ancestor = $_;
 881                $ancestor = $opt_o if $ancestor eq "HEAD";
 882                $state = 6;
 883        } elsif ($state == 5) {
 884                $ancestor = undef;
 885                $state = 6;
 886                redo;
 887        } elsif ($state == 6 and s/^Tag:\s+//) {
 888                s/\s+$//;
 889                if ($_ eq "(none)") {
 890                        $tag = undef;
 891                } else {
 892                        $tag = $_;
 893                }
 894                $state = 7;
 895        } elsif ($state == 7 and /^Log:/) {
 896                $logmsg = "";
 897                $state = 8;
 898        } elsif ($state == 8 and /^Members:/) {
 899                $branch = $opt_o if $branch eq "HEAD";
 900                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 901                        # skip
 902                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 903                        $state = 11;
 904                        next;
 905                }
 906                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 907                        # skip if the commit is too recent
 908                        # given that the cvsps default fuzz is 300s, we give ourselves another
 909                        # 300s just in case -- this also prevents skipping commits
 910                        # due to server clock drift
 911                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 912                        $state = 11;
 913                        next;
 914                }
 915                if (exists $ignorebranch{$branch}) {
 916                        print STDERR "Skipping $branch\n";
 917                        $state = 11;
 918                        next;
 919                }
 920                if ($ancestor) {
 921                        if ($ancestor eq $branch) {
 922                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 923                                $ancestor = $opt_o;
 924                        }
 925                        if (defined get_headref("$remote/$branch")) {
 926                                print STDERR "Branch $branch already exists!\n";
 927                                $state=11;
 928                                next;
 929                        }
 930                        my $id = get_headref("$remote/$ancestor");
 931                        if (!$id) {
 932                                print STDERR "Branch $ancestor does not exist!\n";
 933                                $ignorebranch{$branch} = 1;
 934                                $state=11;
 935                                next;
 936                        }
 937
 938                        system(qw(git update-ref -m cvsimport),
 939                                "$remote/$branch", $id);
 940                        if($? != 0) {
 941                                print STDERR "Could not create branch $branch\n";
 942                                $ignorebranch{$branch} = 1;
 943                                $state=11;
 944                                next;
 945                        }
 946                }
 947                $last_branch = $branch if $branch ne $last_branch;
 948                $state = 9;
 949        } elsif ($state == 8) {
 950                $logmsg .= "$_\n";
 951        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 952#       VERSION:1.96->1.96.2.1
 953                my $init = ($2 eq "INITIAL");
 954                my $fn = $1;
 955                my $rev = $3;
 956                $fn =~ s#^/+##;
 957                if ($opt_S && $fn =~ m/$opt_S/) {
 958                    print "SKIPPING $fn v $rev\n";
 959                    push(@skipped, $fn);
 960                    next;
 961                }
 962                print "Fetching $fn   v $rev\n" if $opt_v;
 963                my ($tmpname, $size) = $cvs->file($fn,$rev);
 964                if ($size == -1) {
 965                        push(@old,$fn);
 966                        print "Drop $fn\n" if $opt_v;
 967                } else {
 968                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 969                        my $pid = open(my $F, '-|');
 970                        die $! unless defined $pid;
 971                        if (!$pid) {
 972                            exec("git", "hash-object", "-w", $tmpname)
 973                                or die "Cannot create object: $!\n";
 974                        }
 975                        my $sha = <$F>;
 976                        chomp $sha;
 977                        close $F;
 978                        my $mode = pmode($cvs->{'mode'});
 979                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
 980                }
 981                unlink($tmpname);
 982        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
 983                my $fn = $1;
 984                $fn =~ s#^/+##;
 985                push(@old,$fn);
 986                print "Delete $fn\n" if $opt_v;
 987        } elsif ($state == 9 and /^\s*$/) {
 988                $state = 10;
 989        } elsif (($state == 9 or $state == 10) and /^-+$/) {
 990                $commitcount++;
 991                if ($opt_L && $commitcount > $opt_L) {
 992                        last;
 993                }
 994                commit();
 995                if (($commitcount & 1023) == 0) {
 996                        system(qw(git repack -a -d));
 997                }
 998                $state = 1;
 999        } elsif ($state == 11 and /^-+$/) {
1000                $state = 1;
1001        } elsif (/^-+$/) { # end of unknown-line processing
1002                $state = 1;
1003        } elsif ($state != 11) { # ignore stuff when skipping
1004                print STDERR "* UNKNOWN LINE * $_\n";
1005        }
1006}
1007commit() if $branch and $state != 11;
1008
1009unless ($opt_P) {
1010        unlink($cvspsfile);
1011}
1012
1013# The heuristic of repacking every 1024 commits can leave a
1014# lot of unpacked data.  If there is more than 1MB worth of
1015# not-packed objects, repack once more.
1016my $line = `git count-objects`;
1017if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1018  my ($n_objects, $kb) = ($1, $2);
1019  1024 < $kb
1020    and system(qw(git repack -a -d));
1021}
1022
1023foreach my $git_index (values %index) {
1024    if ($git_index ne "$git_dir/index") {
1025        unlink($git_index);
1026    }
1027}
1028
1029if (defined $orig_git_index) {
1030        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1031} else {
1032        delete $ENV{GIT_INDEX_FILE};
1033}
1034
1035# Now switch back to the branch we were in before all of this happened
1036if ($orig_branch) {
1037        print "DONE.\n" if $opt_v;
1038        if ($opt_i) {
1039                exit 0;
1040        }
1041        my $tip_at_end = `git rev-parse --verify HEAD`;
1042        if ($tip_at_start ne $tip_at_end) {
1043                for ($tip_at_start, $tip_at_end) { chomp; }
1044                print "Fetched into the current branch.\n" if $opt_v;
1045                system(qw(git read-tree -u -m),
1046                       $tip_at_start, $tip_at_end);
1047                die "Fast-forward update failed: $?\n" if $?;
1048        }
1049        else {
1050                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1051                die "Could not merge $opt_o into the current branch.\n" if $?;
1052        }
1053} else {
1054        $orig_branch = "master";
1055        print "DONE; creating $orig_branch branch\n" if $opt_v;
1056        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1057                unless defined get_headref('refs/heads/master');
1058        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1059                if ($opt_r && $opt_o ne 'HEAD');
1060        system('git', 'update-ref', 'HEAD', "$orig_branch");
1061        unless ($opt_i) {
1062                system(qw(git checkout -f));
1063                die "checkout failed: $?\n" if $?;
1064        }
1065}