contrib / examples / git-svnimport.perlon commit Merge branch 'sk/svn-parse-datestamp' (06b2a0f)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to pull and analyze SVN changes.
   7#
   8# Checking out the files is done by a single long-running SVN connection.
   9#
  10# The head revision is on branch "origin" by default.
  11# You can change that with the '-o' option.
  12
  13use strict;
  14use warnings;
  15use Getopt::Std;
  16use File::Copy;
  17use File::Spec;
  18use File::Temp qw(tempfile);
  19use File::Path qw(mkpath);
  20use File::Basename qw(basename dirname);
  21use Time::Local;
  22use IO::Pipe;
  23use POSIX qw(strftime dup2);
  24use IPC::Open2;
  25use SVN::Core;
  26use SVN::Ra;
  27
  28die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,
  34    $opt_b,$opt_r,$opt_I,$opt_A,$opt_s,$opt_l,$opt_d,$opt_D,$opt_S,$opt_F,
  35    $opt_P,$opt_R);
  36
  37sub usage() {
  38        print STDERR <<END;
  39usage: ${\basename $0}     # fetch/update GIT from SVN
  40       [-o branch-for-HEAD] [-h] [-v] [-l max_rev] [-R repack_each_revs]
  41       [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
  42       [-d|-D] [-i] [-u] [-r] [-I ignorefilename] [-s start_chg]
  43       [-m] [-M regex] [-A author_file] [-S] [-F] [-P project_name] [SVN_URL]
  44END
  45        exit(1);
  46}
  47
  48getopts("A:b:C:dDFhiI:l:mM:o:rs:t:T:SP:R:uv") or usage();
  49usage if $opt_h;
  50
  51my $tag_name = $opt_t || "tags";
  52my $trunk_name = defined $opt_T ? $opt_T : "trunk";
  53my $branch_name = $opt_b || "branches";
  54my $project_name = $opt_P || "";
  55$project_name = "/" . $project_name if ($project_name);
  56my $repack_after = $opt_R || 1000;
  57my $root_pool = SVN::Pool->new_default;
  58
  59@ARGV == 1 or @ARGV == 2 or usage();
  60
  61$opt_o ||= "origin";
  62$opt_s ||= 1;
  63my $git_tree = $opt_C;
  64$git_tree ||= ".";
  65
  66my $svn_url = $ARGV[0];
  67my $svn_dir = $ARGV[1];
  68
  69our @mergerx = ();
  70if ($opt_m) {
  71        my $branch_esc = quotemeta ($branch_name);
  72        my $trunk_esc  = quotemeta ($trunk_name);
  73        @mergerx =
  74        (
  75                qr!\b(?:merg(?:ed?|ing))\b.*?\b((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
  76                qr!\b(?:from|of)\W+((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
  77                qr!\b(?:from|of)\W+(?:the )?([\w\.\-]+)[-\s]branch\b!i
  78        );
  79}
  80if ($opt_M) {
  81        unshift (@mergerx, qr/$opt_M/);
  82}
  83
  84# Absolutize filename now, since we will have chdir'ed by the time we
  85# get around to opening it.
  86$opt_A = File::Spec->rel2abs($opt_A) if $opt_A;
  87
  88our %users = ();
  89our $users_file = undef;
  90sub read_users($) {
  91        $users_file = File::Spec->rel2abs(@_);
  92        die "Cannot open $users_file\n" unless -f $users_file;
  93        open(my $authors,$users_file);
  94        while(<$authors>) {
  95                chomp;
  96                next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
  97                (my $user,my $name,my $email) = ($1,$2,$3);
  98                $users{$user} = [$name,$email];
  99        }
 100        close($authors);
 101}
 102
 103select(STDERR); $|=1; select(STDOUT);
 104
 105
 106package SVNconn;
 107# Basic SVN connection.
 108# We're only interested in connecting and downloading, so ...
 109
 110use File::Spec;
 111use File::Temp qw(tempfile);
 112use POSIX qw(strftime dup2);
 113use Fcntl qw(SEEK_SET);
 114
 115sub new {
 116        my($what,$repo) = @_;
 117        $what=ref($what) if ref($what);
 118
 119        my $self = {};
 120        $self->{'buffer'} = "";
 121        bless($self,$what);
 122
 123        $repo =~ s#/+$##;
 124        $self->{'fullrep'} = $repo;
 125        $self->conn();
 126
 127        return $self;
 128}
 129
 130sub conn {
 131        my $self = shift;
 132        my $repo = $self->{'fullrep'};
 133        my $auth = SVN::Core::auth_open ([SVN::Client::get_simple_provider,
 134                          SVN::Client::get_ssl_server_trust_file_provider,
 135                          SVN::Client::get_username_provider]);
 136        my $s = SVN::Ra->new(url => $repo, auth => $auth, pool => $root_pool);
 137        die "SVN connection to $repo: $!\n" unless defined $s;
 138        $self->{'svn'} = $s;
 139        $self->{'repo'} = $repo;
 140        $self->{'maxrev'} = $s->get_latest_revnum();
 141}
 142
 143sub file {
 144        my($self,$path,$rev) = @_;
 145
 146        my ($fh, $name) = tempfile('gitsvn.XXXXXX',
 147                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 148
 149        print "... $rev $path ...\n" if $opt_v;
 150        my (undef, $properties);
 151        $path =~ s#^/*##;
 152        my $subpool = SVN::Pool::new_default_sub;
 153        eval { (undef, $properties)
 154                   = $self->{'svn'}->get_file($path,$rev,$fh); };
 155        if($@) {
 156                return undef if $@ =~ /Attempted to get checksum/;
 157                die $@;
 158        }
 159        my $mode;
 160        if (exists $properties->{'svn:executable'}) {
 161                $mode = '100755';
 162        } elsif (exists $properties->{'svn:special'}) {
 163                my ($special_content, $filesize);
 164                $filesize = tell $fh;
 165                seek $fh, 0, SEEK_SET;
 166                read $fh, $special_content, $filesize;
 167                if ($special_content =~ s/^link //) {
 168                        $mode = '120000';
 169                        seek $fh, 0, SEEK_SET;
 170                        truncate $fh, 0;
 171                        print $fh $special_content;
 172                } else {
 173                        die "unexpected svn:special file encountered";
 174                }
 175        } else {
 176                $mode = '100644';
 177        }
 178        close ($fh);
 179
 180        return ($name, $mode);
 181}
 182
 183sub ignore {
 184        my($self,$path,$rev) = @_;
 185
 186        print "... $rev $path ...\n" if $opt_v;
 187        $path =~ s#^/*##;
 188        my $subpool = SVN::Pool::new_default_sub;
 189        my (undef,undef,$properties)
 190            = $self->{'svn'}->get_dir($path,$rev,undef);
 191        if (exists $properties->{'svn:ignore'}) {
 192                my ($fh, $name) = tempfile('gitsvn.XXXXXX',
 193                                           DIR => File::Spec->tmpdir(),
 194                                           UNLINK => 1);
 195                print $fh $properties->{'svn:ignore'};
 196                close($fh);
 197                return $name;
 198        } else {
 199                return undef;
 200        }
 201}
 202
 203sub dir_list {
 204        my($self,$path,$rev) = @_;
 205        $path =~ s#^/*##;
 206        my $subpool = SVN::Pool::new_default_sub;
 207        my ($dirents,undef,$properties)
 208            = $self->{'svn'}->get_dir($path,$rev,undef);
 209        return $dirents;
 210}
 211
 212package main;
 213use URI;
 214
 215our $svn = $svn_url;
 216$svn .= "/$svn_dir" if defined $svn_dir;
 217my $svn2 = SVNconn->new($svn);
 218$svn = SVNconn->new($svn);
 219
 220my $lwp_ua;
 221if($opt_d or $opt_D) {
 222        $svn_url = URI->new($svn_url)->canonical;
 223        if($opt_D) {
 224                $svn_dir =~ s#/*$#/#;
 225        } else {
 226                $svn_dir = "";
 227        }
 228        if ($svn_url->scheme eq "http") {
 229                use LWP::UserAgent;
 230                $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []);
 231        } else {
 232                print STDERR "Warning: not HTTP; turning off direct file access\n";
 233                $opt_d=0;
 234        }
 235}
 236
 237sub pdate($) {
 238        my($d) = @_;
 239        $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
 240                or die "Unparseable date: $d\n";
 241        my $y=$1; $y-=1900 if $y>1900;
 242        return timegm($6||0,$5,$4,$3,$2-1,$y);
 243}
 244
 245sub getwd() {
 246        my $pwd = `pwd`;
 247        chomp $pwd;
 248        return $pwd;
 249}
 250
 251
 252sub get_headref($$) {
 253    my $name    = shift;
 254    my $git_dir = shift;
 255    my $sha;
 256
 257    if (open(C,"$git_dir/refs/heads/$name")) {
 258        chomp($sha = <C>);
 259        close(C);
 260        length($sha) == 40
 261            or die "Cannot get head id for $name ($sha): $!\n";
 262    }
 263    return $sha;
 264}
 265
 266
 267-d $git_tree
 268        or mkdir($git_tree,0777)
 269        or die "Could not create $git_tree: $!";
 270chdir($git_tree);
 271
 272my $orig_branch = "";
 273my $forward_master = 0;
 274my %branches;
 275
 276my $git_dir = $ENV{"GIT_DIR"} || ".git";
 277$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 278$ENV{"GIT_DIR"} = $git_dir;
 279my $orig_git_index;
 280$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 281my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
 282                                    DIR => File::Spec->tmpdir());
 283close ($git_ih);
 284$ENV{GIT_INDEX_FILE} = $git_index;
 285my $maxnum = 0;
 286my $last_rev = "";
 287my $last_branch;
 288my $current_rev = $opt_s || 1;
 289unless(-d $git_dir) {
 290        system("git init");
 291        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 292        system("git read-tree --empty");
 293        die "Cannot init an empty tree: $?\n" if $?;
 294
 295        $last_branch = $opt_o;
 296        $orig_branch = "";
 297} else {
 298        -f "$git_dir/refs/heads/$opt_o"
 299                or die "Branch '$opt_o' does not exist.\n".
 300                       "Either use the correct '-o branch' option,\n".
 301                       "or import to a new repository.\n";
 302
 303        -f "$git_dir/svn2git"
 304                or die "'$git_dir/svn2git' does not exist.\n".
 305                       "You need that file for incremental imports.\n";
 306        open(F, "git symbolic-ref HEAD |") or
 307                die "Cannot run git-symbolic-ref: $!\n";
 308        chomp ($last_branch = <F>);
 309        $last_branch = basename($last_branch);
 310        close(F);
 311        unless($last_branch) {
 312                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 313                $last_branch = "master";
 314        }
 315        $orig_branch = $last_branch;
 316        $last_rev = get_headref($orig_branch, $git_dir);
 317        if (-f "$git_dir/SVN2GIT_HEAD") {
 318                die <<EOM;
 319SVN2GIT_HEAD exists.
 320Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
 321You may need to run
 322
 323    git-read-tree -m -u SVN2GIT_HEAD HEAD
 324EOM
 325        }
 326        system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
 327
 328        $forward_master =
 329            $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
 330            system('cmp', '-s', "$git_dir/refs/heads/master",
 331                                "$git_dir/refs/heads/$opt_o") == 0;
 332
 333        # populate index
 334        system('git', 'read-tree', $last_rev);
 335        die "read-tree failed: $?\n" if $?;
 336
 337        # Get the last import timestamps
 338        open my $B,"<", "$git_dir/svn2git";
 339        while(<$B>) {
 340                chomp;
 341                my($num,$branch,$ref) = split;
 342                $branches{$branch}{$num} = $ref;
 343                $branches{$branch}{"LAST"} = $ref;
 344                $current_rev = $num+1 if $current_rev <= $num;
 345        }
 346        close($B);
 347}
 348-d $git_dir
 349        or die "Could not create git subdir ($git_dir).\n";
 350
 351my $default_authors = "$git_dir/svn-authors";
 352if ($opt_A) {
 353        read_users($opt_A);
 354        copy($opt_A,$default_authors) or die "Copy failed: $!";
 355} else {
 356        read_users($default_authors) if -f $default_authors;
 357}
 358
 359open BRANCHES,">>", "$git_dir/svn2git";
 360
 361sub node_kind($$) {
 362        my ($svnpath, $revision) = @_;
 363        $svnpath =~ s#^/*##;
 364        my $subpool = SVN::Pool::new_default_sub;
 365        my $kind = $svn->{'svn'}->check_path($svnpath,$revision);
 366        return $kind;
 367}
 368
 369sub get_file($$$) {
 370        my($svnpath,$rev,$path) = @_;
 371
 372        # now get it
 373        my ($name,$mode);
 374        if($opt_d) {
 375                my($req,$res);
 376
 377                # /svn/!svn/bc/2/django/trunk/django-docs/build.py
 378                my $url=$svn_url->clone();
 379                $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath");
 380                print "... $path...\n" if $opt_v;
 381                $req = HTTP::Request->new(GET => $url);
 382                $res = $lwp_ua->request($req);
 383                if ($res->is_success) {
 384                        my $fh;
 385                        ($fh, $name) = tempfile('gitsvn.XXXXXX',
 386                        DIR => File::Spec->tmpdir(), UNLINK => 1);
 387                        print $fh $res->content;
 388                        close($fh) or die "Could not write $name: $!\n";
 389                } else {
 390                        return undef if $res->code == 301; # directory?
 391                        die $res->status_line." at $url\n";
 392                }
 393                $mode = '0644'; # can't obtain mode via direct http request?
 394        } else {
 395                ($name,$mode) = $svn->file("$svnpath",$rev);
 396                return undef unless defined $name;
 397        }
 398
 399        my $pid = open(my $F, '-|');
 400        die $! unless defined $pid;
 401        if (!$pid) {
 402            exec("git", "hash-object", "-w", $name)
 403                or die "Cannot create object: $!\n";
 404        }
 405        my $sha = <$F>;
 406        chomp $sha;
 407        close $F;
 408        unlink $name;
 409        return [$mode, $sha, $path];
 410}
 411
 412sub get_ignore($$$$$) {
 413        my($new,$old,$rev,$path,$svnpath) = @_;
 414
 415        return unless $opt_I;
 416        my $name = $svn->ignore("$svnpath",$rev);
 417        if ($path eq '/') {
 418                $path = $opt_I;
 419        } else {
 420                $path = File::Spec->catfile($path,$opt_I);
 421        }
 422        if (defined $name) {
 423                my $pid = open(my $F, '-|');
 424                die $! unless defined $pid;
 425                if (!$pid) {
 426                        exec("git", "hash-object", "-w", $name)
 427                            or die "Cannot create object: $!\n";
 428                }
 429                my $sha = <$F>;
 430                chomp $sha;
 431                close $F;
 432                unlink $name;
 433                push(@$new,['0644',$sha,$path]);
 434        } elsif (defined $old) {
 435                push(@$old,$path);
 436        }
 437}
 438
 439sub project_path($$)
 440{
 441        my ($path, $project) = @_;
 442
 443        $path = "/".$path unless ($path =~ m#^\/#) ;
 444        return $1 if ($path =~ m#^$project\/(.*)$#);
 445
 446        $path =~ s#\.#\\\.#g;
 447        $path =~ s#\+#\\\+#g;
 448        return "/" if ($project =~ m#^$path.*$#);
 449
 450        return undef;
 451}
 452
 453sub split_path($$) {
 454        my($rev,$path) = @_;
 455        my $branch;
 456
 457        if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
 458                $branch = "/$1";
 459        } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
 460                $branch = "/";
 461        } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
 462                $branch = $1;
 463        } else {
 464                my %no_error = (
 465                        "/" => 1,
 466                        "/$tag_name" => 1,
 467                        "/$branch_name" => 1
 468                );
 469                print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
 470                return ()
 471        }
 472        if ($path eq "") {
 473                $path = "/";
 474        } elsif ($project_name) {
 475                $path = project_path($path, $project_name);
 476        }
 477        return ($branch,$path);
 478}
 479
 480sub branch_rev($$) {
 481
 482        my ($srcbranch,$uptorev) = @_;
 483
 484        my $bbranches = $branches{$srcbranch};
 485        my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
 486        my $therev;
 487        foreach my $arev(@revs) {
 488                next if  ($arev eq 'LAST');
 489                if ($arev <= $uptorev) {
 490                        $therev = $arev;
 491                        last;
 492                }
 493        }
 494        return $therev;
 495}
 496
 497sub expand_svndir($$$);
 498
 499sub expand_svndir($$$)
 500{
 501        my ($svnpath, $rev, $path) = @_;
 502        my @list;
 503        get_ignore(\@list, undef, $rev, $path, $svnpath);
 504        my $dirents = $svn->dir_list($svnpath, $rev);
 505        foreach my $p(keys %$dirents) {
 506                my $kind = node_kind($svnpath.'/'.$p, $rev);
 507                if ($kind eq $SVN::Node::file) {
 508                        my $f = get_file($svnpath.'/'.$p, $rev, $path.'/'.$p);
 509                        push(@list, $f) if $f;
 510                } elsif ($kind eq $SVN::Node::dir) {
 511                        push(@list,
 512                             expand_svndir($svnpath.'/'.$p, $rev, $path.'/'.$p));
 513                }
 514        }
 515        return @list;
 516}
 517
 518sub copy_path($$$$$$$$) {
 519        # Somebody copied a whole subdirectory.
 520        # We need to find the index entries from the old version which the
 521        # SVN log entry points to, and add them to the new place.
 522
 523        my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
 524
 525        my($srcbranch,$srcpath) = split_path($rev,$oldpath);
 526        unless(defined $srcbranch && defined $srcpath) {
 527                print "Path not found when copying from $oldpath @ $rev.\n".
 528                        "Will try to copy from original SVN location...\n"
 529                        if $opt_v;
 530                push (@$new, expand_svndir($oldpath, $rev, $path));
 531                return;
 532        }
 533        my $therev = branch_rev($srcbranch, $rev);
 534        my $gitrev = $branches{$srcbranch}{$therev};
 535        unless($gitrev) {
 536                print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
 537                return;
 538        }
 539        if ($srcbranch ne $newbranch) {
 540                push(@$parents, $branches{$srcbranch}{'LAST'});
 541        }
 542        print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
 543        if ($node_kind eq $SVN::Node::dir) {
 544                $srcpath =~ s#/*$#/#;
 545        }
 546
 547        my $pid = open my $f,'-|';
 548        die $! unless defined $pid;
 549        if (!$pid) {
 550                exec("git","ls-tree","-r","-z",$gitrev,$srcpath)
 551                        or die $!;
 552        }
 553        local $/ = "\0";
 554        while(<$f>) {
 555                chomp;
 556                my($m,$p) = split(/\t/,$_,2);
 557                my($mode,$type,$sha1) = split(/ /,$m);
 558                next if $type ne "blob";
 559                if ($node_kind eq $SVN::Node::dir) {
 560                        $p = $path . substr($p,length($srcpath)-1);
 561                } else {
 562                        $p = $path;
 563                }
 564                push(@$new,[$mode,$sha1,$p]);
 565        }
 566        close($f) or
 567                print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
 568}
 569
 570sub commit {
 571        my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
 572        my($committer_name,$committer_email,$dest);
 573        my($author_name,$author_email);
 574        my(@old,@new,@parents);
 575
 576        if (not defined $author or $author eq "") {
 577                $committer_name = $committer_email = "unknown";
 578        } elsif (defined $users_file) {
 579                die "User $author is not listed in $users_file\n"
 580                    unless exists $users{$author};
 581                ($committer_name,$committer_email) = @{$users{$author}};
 582        } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
 583                ($committer_name, $committer_email) = ($1, $2);
 584        } else {
 585                $author =~ s/^<(.*)>$/$1/;
 586                $committer_name = $committer_email = $author;
 587        }
 588
 589        if ($opt_F && $message =~ /From:\s+(.*?)\s+<(.*)>\s*\n/) {
 590                ($author_name, $author_email) = ($1, $2);
 591                print "Author from From: $1 <$2>\n" if ($opt_v);;
 592        } elsif ($opt_S && $message =~ /Signed-off-by:\s+(.*?)\s+<(.*)>\s*\n/) {
 593                ($author_name, $author_email) = ($1, $2);
 594                print "Author from Signed-off-by: $1 <$2>\n" if ($opt_v);;
 595        } else {
 596                $author_name = $committer_name;
 597                $author_email = $committer_email;
 598        }
 599
 600        $date = pdate($date);
 601
 602        my $tag;
 603        my $parent;
 604        if($branch eq "/") { # trunk
 605                $parent = $opt_o;
 606        } elsif($branch =~ m#^/(.+)#) { # tag
 607                $tag = 1;
 608                $parent = $1;
 609        } else { # "normal" branch
 610                # nothing to do
 611                $parent = $branch;
 612        }
 613        $dest = $parent;
 614
 615        my $prev = $changed_paths->{"/"};
 616        if($prev and $prev->[0] eq "A") {
 617                delete $changed_paths->{"/"};
 618                my $oldpath = $prev->[1];
 619                my $rev;
 620                if(defined $oldpath) {
 621                        my $p;
 622                        ($parent,$p) = split_path($revision,$oldpath);
 623                        if(defined $parent) {
 624                                if($parent eq "/") {
 625                                        $parent = $opt_o;
 626                                } else {
 627                                        $parent =~ s#^/##; # if it's a tag
 628                                }
 629                        }
 630                } else {
 631                        $parent = undef;
 632                }
 633        }
 634
 635        my $rev;
 636        if($revision > $opt_s and defined $parent) {
 637                open(H,'-|',"git","rev-parse","--verify",$parent);
 638                $rev = <H>;
 639                close(H) or do {
 640                        print STDERR "$revision: cannot find commit '$parent'!\n";
 641                        return;
 642                };
 643                chop $rev;
 644                if(length($rev) != 40) {
 645                        print STDERR "$revision: cannot find commit '$parent'!\n";
 646                        return;
 647                }
 648                $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
 649                if($revision != $opt_s and not $rev) {
 650                        print STDERR "$revision: do not know ancestor for '$parent'!\n";
 651                        return;
 652                }
 653        } else {
 654                $rev = undef;
 655        }
 656
 657#       if($prev and $prev->[0] eq "A") {
 658#               if(not $tag) {
 659#                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
 660#                               print STDERR "$revision: Could not create branch $branch: $!\n";
 661#                               $state=11;
 662#                               next;
 663#                       }
 664#                       print H "$rev\n"
 665#                               or die "Could not write branch $branch: $!";
 666#                       close(H)
 667#                               or die "Could not write branch $branch: $!";
 668#               }
 669#       }
 670        if(not defined $rev) {
 671                unlink($git_index);
 672        } elsif ($rev ne $last_rev) {
 673                print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
 674                system("git", "read-tree", $rev);
 675                die "read-tree failed for $rev: $?\n" if $?;
 676                $last_rev = $rev;
 677        }
 678
 679        push (@parents, $rev) if defined $rev;
 680
 681        my $cid;
 682        if($tag and not %$changed_paths) {
 683                $cid = $rev;
 684        } else {
 685                my @paths = sort keys %$changed_paths;
 686                foreach my $path(@paths) {
 687                        my $action = $changed_paths->{$path};
 688
 689                        if ($action->[0] eq "R") {
 690                                # refer to a file/tree in an earlier commit
 691                                push(@old,$path); # remove any old stuff
 692                        }
 693                        if(($action->[0] eq "A") || ($action->[0] eq "R")) {
 694                                my $node_kind = node_kind($action->[3], $revision);
 695                                if ($node_kind eq $SVN::Node::file) {
 696                                        my $f = get_file($action->[3],
 697                                                         $revision, $path);
 698                                        if ($f) {
 699                                                push(@new,$f) if $f;
 700                                        } else {
 701                                                my $opath = $action->[3];
 702                                                print STDERR "$revision: $branch: could not fetch '$opath'\n";
 703                                        }
 704                                } elsif ($node_kind eq $SVN::Node::dir) {
 705                                        if($action->[1]) {
 706                                                copy_path($revision, $branch,
 707                                                          $path, $action->[1],
 708                                                          $action->[2], $node_kind,
 709                                                          \@new, \@parents);
 710                                        } else {
 711                                                get_ignore(\@new, \@old, $revision,
 712                                                           $path, $action->[3]);
 713                                        }
 714                                }
 715                        } elsif ($action->[0] eq "D") {
 716                                push(@old,$path);
 717                        } elsif ($action->[0] eq "M") {
 718                                my $node_kind = node_kind($action->[3], $revision);
 719                                if ($node_kind eq $SVN::Node::file) {
 720                                        my $f = get_file($action->[3],
 721                                                         $revision, $path);
 722                                        push(@new,$f) if $f;
 723                                } elsif ($node_kind eq $SVN::Node::dir) {
 724                                        get_ignore(\@new, \@old, $revision,
 725                                                   $path, $action->[3]);
 726                                }
 727                        } else {
 728                                die "$revision: unknown action '".$action->[0]."' for $path\n";
 729                        }
 730                }
 731
 732                while(@old) {
 733                        my @o1;
 734                        if(@old > 55) {
 735                                @o1 = splice(@old,0,50);
 736                        } else {
 737                                @o1 = @old;
 738                                @old = ();
 739                        }
 740                        my $pid = open my $F, "-|";
 741                        die "$!" unless defined $pid;
 742                        if (!$pid) {
 743                                exec("git", "ls-files", "-z", @o1) or die $!;
 744                        }
 745                        @o1 = ();
 746                        local $/ = "\0";
 747                        while(<$F>) {
 748                                chomp;
 749                                push(@o1,$_);
 750                        }
 751                        close($F);
 752
 753                        while(@o1) {
 754                                my @o2;
 755                                if(@o1 > 55) {
 756                                        @o2 = splice(@o1,0,50);
 757                                } else {
 758                                        @o2 = @o1;
 759                                        @o1 = ();
 760                                }
 761                                system("git","update-index","--force-remove","--",@o2);
 762                                die "Cannot remove files: $?\n" if $?;
 763                        }
 764                }
 765                while(@new) {
 766                        my @n2;
 767                        if(@new > 12) {
 768                                @n2 = splice(@new,0,10);
 769                        } else {
 770                                @n2 = @new;
 771                                @new = ();
 772                        }
 773                        system("git","update-index","--add",
 774                                (map { ('--cacheinfo', @$_) } @n2));
 775                        die "Cannot add files: $?\n" if $?;
 776                }
 777
 778                my $pid = open(C,"-|");
 779                die "Cannot fork: $!" unless defined $pid;
 780                unless($pid) {
 781                        exec("git","write-tree");
 782                        die "Cannot exec git-write-tree: $!\n";
 783                }
 784                chomp(my $tree = <C>);
 785                length($tree) == 40
 786                        or die "Cannot get tree id ($tree): $!\n";
 787                close(C)
 788                        or die "Error running git-write-tree: $?\n";
 789                print "Tree ID $tree\n" if $opt_v;
 790
 791                my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 792                my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 793                $pid = fork();
 794                die "Fork: $!\n" unless defined $pid;
 795                unless($pid) {
 796                        $pr->writer();
 797                        $pw->reader();
 798                        open(OUT,">&STDOUT");
 799                        dup2($pw->fileno(),0);
 800                        dup2($pr->fileno(),1);
 801                        $pr->close();
 802                        $pw->close();
 803
 804                        my @par = ();
 805
 806                        # loose detection of merges
 807                        # based on the commit msg
 808                        foreach my $rx (@mergerx) {
 809                                if ($message =~ $rx) {
 810                                        my $mparent = $1;
 811                                        if ($mparent eq 'HEAD') { $mparent = $opt_o };
 812                                        if ( -e "$git_dir/refs/heads/$mparent") {
 813                                                $mparent = get_headref($mparent, $git_dir);
 814                                                push (@parents, $mparent);
 815                                                print OUT "Merge parent branch: $mparent\n" if $opt_v;
 816                                        }
 817                                }
 818                        }
 819                        my %seen_parents = ();
 820                        my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
 821                        foreach my $bparent (@unique_parents) {
 822                                push @par, '-p', $bparent;
 823                                print OUT "Merge parent branch: $bparent\n" if $opt_v;
 824                        }
 825
 826                        exec("env",
 827                                "GIT_AUTHOR_NAME=$author_name",
 828                                "GIT_AUTHOR_EMAIL=$author_email",
 829                                "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 830                                "GIT_COMMITTER_NAME=$committer_name",
 831                                "GIT_COMMITTER_EMAIL=$committer_email",
 832                                "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 833                                "git", "commit-tree", $tree,@par);
 834                        die "Cannot exec git-commit-tree: $!\n";
 835                }
 836                $pw->writer();
 837                $pr->reader();
 838
 839                $message =~ s/[\s\n]+\z//;
 840                $message = "r$revision: $message" if $opt_r;
 841
 842                print $pw "$message\n"
 843                        or die "Error writing to git-commit-tree: $!\n";
 844                $pw->close();
 845
 846                print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
 847                chomp($cid = <$pr>);
 848                length($cid) == 40
 849                        or die "Cannot get commit id ($cid): $!\n";
 850                print "Commit ID $cid\n" if $opt_v;
 851                $pr->close();
 852
 853                waitpid($pid,0);
 854                die "Error running git-commit-tree: $?\n" if $?;
 855        }
 856
 857        if (not defined $cid) {
 858                $cid = $branches{"/"}{"LAST"};
 859        }
 860
 861        if(not defined $dest) {
 862                print "... no known parent\n" if $opt_v;
 863        } elsif(not $tag) {
 864                print "Writing to refs/heads/$dest\n" if $opt_v;
 865                open(C,">$git_dir/refs/heads/$dest") and
 866                print C ("$cid\n") and
 867                close(C)
 868                        or die "Cannot write branch $dest for update: $!\n";
 869        }
 870
 871        if ($tag) {
 872                $last_rev = "-" if %$changed_paths;
 873                # the tag was 'complex', i.e. did not refer to a "real" revision
 874
 875                $dest =~ tr/_/\./ if $opt_u;
 876
 877                system('git', 'tag', '-f', $dest, $cid) == 0
 878                        or die "Cannot create tag $dest: $!\n";
 879
 880                print "Created tag '$dest' on '$branch'\n" if $opt_v;
 881        }
 882        $branches{$branch}{"LAST"} = $cid;
 883        $branches{$branch}{$revision} = $cid;
 884        $last_rev = $cid;
 885        print BRANCHES "$revision $branch $cid\n";
 886        print "DONE: $revision $dest $cid\n" if $opt_v;
 887}
 888
 889sub commit_all {
 890        # Recursive use of the SVN connection does not work
 891        local $svn = $svn2;
 892
 893        my ($changed_paths, $revision, $author, $date, $message) = @_;
 894        my %p;
 895        while(my($path,$action) = each %$changed_paths) {
 896                $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ];
 897        }
 898        $changed_paths = \%p;
 899
 900        my %done;
 901        my @col;
 902        my $pref;
 903        my $branch;
 904
 905        while(my($path,$action) = each %$changed_paths) {
 906                ($branch,$path) = split_path($revision,$path);
 907                next if not defined $branch;
 908                next if not defined $path;
 909                $done{$branch}{$path} = $action;
 910        }
 911        while(($branch,$changed_paths) = each %done) {
 912                commit($branch, $changed_paths, $revision, $author, $date, $message);
 913        }
 914}
 915
 916$opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'};
 917
 918if ($opt_l < $current_rev) {
 919    print "Up to date: no new revisions to fetch!\n" if $opt_v;
 920    unlink("$git_dir/SVN2GIT_HEAD");
 921    exit;
 922}
 923
 924print "Processing from $current_rev to $opt_l ...\n" if $opt_v;
 925
 926my $from_rev;
 927my $to_rev = $current_rev - 1;
 928
 929my $subpool = SVN::Pool::new_default_sub;
 930while ($to_rev < $opt_l) {
 931        $subpool->clear;
 932        $from_rev = $to_rev + 1;
 933        $to_rev = $from_rev + $repack_after;
 934        $to_rev = $opt_l if $opt_l < $to_rev;
 935        print "Fetching from $from_rev to $to_rev ...\n" if $opt_v;
 936        $svn->{'svn'}->get_log("",$from_rev,$to_rev,0,1,1,\&commit_all);
 937        my $pid = fork();
 938        die "Fork: $!\n" unless defined $pid;
 939        unless($pid) {
 940                exec("git", "repack", "-d")
 941                        or die "Cannot repack: $!\n";
 942        }
 943        waitpid($pid, 0);
 944}
 945
 946
 947unlink($git_index);
 948
 949if (defined $orig_git_index) {
 950        $ENV{GIT_INDEX_FILE} = $orig_git_index;
 951} else {
 952        delete $ENV{GIT_INDEX_FILE};
 953}
 954
 955# Now switch back to the branch we were in before all of this happened
 956if($orig_branch) {
 957        print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
 958        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 959                if $forward_master;
 960        unless ($opt_i) {
 961                system('git', 'read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
 962                die "read-tree failed: $?\n" if $?;
 963        }
 964} else {
 965        $orig_branch = "master";
 966        print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
 967        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 968                unless -f "$git_dir/refs/heads/master";
 969        system('git', 'update-ref', 'HEAD', "$orig_branch");
 970        unless ($opt_i) {
 971                system('git checkout');
 972                die "checkout failed: $?\n" if $?;
 973        }
 974}
 975unlink("$git_dir/SVN2GIT_HEAD");
 976close(BRANCHES);