b976841efd276595770a51562ad7ce9b1d51f743
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to pull and analyze SVN changes.
   7#
   8# Checking out the files is done by a single long-running SVN connection.
   9#
  10# The head revision is on branch "origin" by default.
  11# You can change that with the '-o' option.
  12
  13require v5.8.0; # for shell-safe open("-|",LIST)
  14use strict;
  15use warnings;
  16use Getopt::Std;
  17use File::Spec;
  18use File::Temp qw(tempfile);
  19use File::Path qw(mkpath);
  20use File::Basename qw(basename dirname);
  21use Time::Local;
  22use IO::Pipe;
  23use POSIX qw(strftime dup2);
  24use IPC::Open2;
  25use SVN::Core;
  26use SVN::Ra;
  27
  28die "Need CVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l);
  34
  35sub usage() {
  36        print STDERR <<END;
  37Usage: ${\basename $0}     # fetch/update GIT from CVS
  38       [-o branch-for-HEAD] [-h] [-v] [-l max_num_changes]
  39       [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
  40       [-i] [-u] [-s start_chg] [-m] [-M regex] [SVN_URL]
  41END
  42        exit(1);
  43}
  44
  45getopts("b:C:hil:mM:o:s:t:T:uv") or usage();
  46usage if $opt_h;
  47
  48my $tag_name = $opt_t || "tags";
  49my $trunk_name = $opt_T || "trunk";
  50my $branch_name = $opt_b || "branches";
  51
  52@ARGV == 1 or usage();
  53
  54$opt_o ||= "origin";
  55$opt_s ||= 1;
  56$opt_l = 100 unless defined $opt_l;
  57my $git_tree = $opt_C;
  58$git_tree ||= ".";
  59
  60my $svn_url = $ARGV[0];
  61
  62our @mergerx = ();
  63if ($opt_m) {
  64        @mergerx = ( qr/\W(?:from|of|merge|merging|merged) (\w+)/i );
  65}
  66if ($opt_M) {
  67        push (@mergerx, qr/$opt_M/);
  68}
  69
  70select(STDERR); $|=1; select(STDOUT);
  71
  72
  73package SVNconn;
  74# Basic SVN connection.
  75# We're only interested in connecting and downloading, so ...
  76
  77use File::Spec;
  78use File::Temp qw(tempfile);
  79use POSIX qw(strftime dup2);
  80
  81sub new {
  82        my($what,$repo) = @_;
  83        $what=ref($what) if ref($what);
  84
  85        my $self = {};
  86        $self->{'buffer'} = "";
  87        bless($self,$what);
  88
  89        $repo =~ s#/+$##;
  90        $self->{'fullrep'} = $repo;
  91        $self->conn();
  92
  93        return $self;
  94}
  95
  96sub conn {
  97        my $self = shift;
  98        my $repo = $self->{'fullrep'};
  99        my $s = SVN::Ra->new($repo);
 100
 101        die "SVN connection to $repo: $!\n" unless defined $s;
 102        $self->{'svn'} = $s;
 103        $self->{'repo'} = $repo;
 104        $self->{'maxrev'} = $s->get_latest_revnum();
 105}
 106
 107sub file {
 108        my($self,$path,$rev) = @_;
 109        my $res;
 110
 111        my ($fh, $name) = tempfile('gitsvn.XXXXXX', 
 112                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 113
 114        print "... $rev $path ...\n" if $opt_v;
 115        eval { $self->{'svn'}->get_file($path,$rev,$fh); };
 116        if ($@ and $@ !~ /Attempted to get checksum/) {
 117            # retry
 118            $self->conn();
 119                eval { $self->{'svn'}->get_file($path,$rev,$fh); };
 120        };
 121        return () if $@ and $@ !~ /Attempted to get checksum/;
 122        die $@ if $@;
 123        close ($fh);
 124
 125        return ($name, $res);
 126}
 127
 128
 129package main;
 130
 131my $svn = SVNconn->new($svn_url);
 132
 133
 134sub pdate($) {
 135        my($d) = @_;
 136        $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
 137                or die "Unparseable date: $d\n";
 138        my $y=$1; $y-=1900 if $y>1900;
 139        return timegm($6||0,$5,$4,$3,$2-1,$y);
 140}
 141
 142sub getwd() {
 143        my $pwd = `pwd`;
 144        chomp $pwd;
 145        return $pwd;
 146}
 147
 148
 149sub get_headref($$) {
 150    my $name    = shift;
 151    my $git_dir = shift; 
 152    my $sha;
 153    
 154    if (open(C,"$git_dir/refs/heads/$name")) {
 155        chomp($sha = <C>);
 156        close(C);
 157        length($sha) == 40
 158            or die "Cannot get head id for $name ($sha): $!\n";
 159    }
 160    return $sha;
 161}
 162
 163
 164-d $git_tree
 165        or mkdir($git_tree,0777)
 166        or die "Could not create $git_tree: $!";
 167chdir($git_tree);
 168
 169my $orig_branch = "";
 170my $forward_master = 0;
 171my %branches;
 172
 173my $git_dir = $ENV{"GIT_DIR"} || ".git";
 174$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 175$ENV{"GIT_DIR"} = $git_dir;
 176my $orig_git_index;
 177$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 178my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
 179                                    DIR => File::Spec->tmpdir());
 180close ($git_ih);
 181$ENV{GIT_INDEX_FILE} = $git_index;
 182my $maxnum = 0;
 183my $last_rev = "";
 184my $last_branch;
 185my $current_rev = $opt_s-1;
 186unless(-d $git_dir) {
 187        system("git-init-db");
 188        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 189        system("git-read-tree");
 190        die "Cannot init an empty tree: $?\n" if $?;
 191
 192        $last_branch = $opt_o;
 193        $orig_branch = "";
 194} else {
 195        -f "$git_dir/refs/heads/$opt_o"
 196                or die "Branch '$opt_o' does not exist.\n".
 197                       "Either use the correct '-o branch' option,\n".
 198                       "or import to a new repository.\n";
 199
 200        -f "$git_dir/svn2git"
 201                or die "'$git_dir/svn2git' does not exist.\n".
 202                       "You need that file for incremental imports.\n";
 203        $last_branch = basename(readlink("$git_dir/HEAD"));
 204        unless($last_branch) {
 205                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 206                $last_branch = "master";
 207        }
 208        $orig_branch = $last_branch;
 209        $last_rev = get_headref($orig_branch, $git_dir);
 210        if (-f "$git_dir/SVN2GIT_HEAD") {
 211                die <<EOM;
 212SVN2GIT_HEAD exists.
 213Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
 214You may need to run
 215
 216    git-read-tree -m -u SVN2GIT_HEAD HEAD
 217EOM
 218        }
 219        system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
 220
 221        $forward_master =
 222            $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
 223            system('cmp', '-s', "$git_dir/refs/heads/master", 
 224                                "$git_dir/refs/heads/$opt_o") == 0;
 225
 226        # populate index
 227        system('git-read-tree', $last_rev);
 228        die "read-tree failed: $?\n" if $?;
 229
 230        # Get the last import timestamps
 231        open my $B,"<", "$git_dir/svn2git";
 232        while(<$B>) {
 233                chomp;
 234                my($num,$branch,$ref) = split;
 235                $branches{$branch}{$num} = $ref;
 236                $branches{$branch}{"LAST"} = $ref;
 237                $current_rev = $num if $current_rev < $num;
 238        }
 239        close($B);
 240}
 241-d $git_dir
 242        or die "Could not create git subdir ($git_dir).\n";
 243
 244open BRANCHES,">>", "$git_dir/svn2git";
 245
 246sub get_file($$$) {
 247        my($rev,$branch,$path) = @_;
 248
 249        # revert split_path(), below
 250        my $svnpath;
 251        $path = "" if $path eq "/"; # this should not happen, but ...
 252        if($branch eq "/") {
 253                $svnpath = "/$trunk_name/$path";
 254        } elsif($branch =~ m#^/#) {
 255                $svnpath = "/$tag_name$branch/$path";
 256        } else {
 257                $svnpath = "/$branch_name/$branch/$path";
 258        }
 259
 260        # now get it
 261        my ($name, $res) = eval { $svn->file($svnpath,$rev); };
 262        return () unless defined $name;
 263
 264        open my $F, '-|', "git-hash-object", "-w", $name
 265                or die "Cannot create object: $!\n";
 266        my $sha = <$F>;
 267        chomp $sha;
 268        close $F;
 269        unlink $name;
 270        my $mode = "0644"; # SV does not seem to store any file modes
 271        return [$mode, $sha, $path];
 272}
 273
 274sub split_path($$) {
 275        my($rev,$path) = @_;
 276        my $branch;
 277
 278        if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
 279                $branch = "/$1";
 280        } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
 281                $branch = "/";
 282        } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
 283                $branch = $1;
 284        } else {
 285                print STDERR "$rev: Unrecognized path: $path\n";
 286                return ()
 287        }
 288        $path = "/" if $path eq "";
 289        return ($branch,$path);
 290}
 291
 292sub commit {
 293        my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
 294        my($author_name,$author_email,$dest);
 295        my(@old,@new);
 296
 297        if (not defined $author) {
 298                $author_name = $author_email = "unknown";
 299        } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
 300                ($author_name, $author_email) = ($1, $2);
 301        } else {
 302                $author =~ s/^<(.*)>$/$1/;
 303                $author_name = $author_email = $author;
 304        }
 305        $date = pdate($date);
 306
 307        my $tag;
 308        my $parent;
 309        if($branch eq "/") { # trunk
 310                $parent = $opt_o;
 311        } elsif($branch =~ m#^/(.+)#) { # tag
 312                $tag = 1;
 313                $parent = $1;
 314        } else { # "normal" branch
 315                # nothing to do
 316                $parent = $branch;
 317        }
 318        $dest = $parent;
 319
 320        my $prev = $changed_paths->{"/"};
 321        if($prev and $prev->[0] eq "A") {
 322                delete $changed_paths->{"/"};
 323                my $oldpath = $prev->[1];
 324                my $rev;
 325                if(defined $oldpath) {
 326                        my $p;
 327                        ($parent,$p) = split_path($revision,$oldpath);
 328                        if($parent eq "/") {
 329                                $parent = $opt_o;
 330                        } else {
 331                                $parent =~ s#^/##; # if it's a tag
 332                        }
 333                } else {
 334                        $parent = undef;
 335                }
 336        }
 337
 338        my $rev;
 339        if($revision > $opt_s and defined $parent) {
 340                open(H,"git-rev-parse --verify $parent |");
 341                $rev = <H>;
 342                close(H) or do {
 343                        print STDERR "$revision: cannot find commit '$parent'!\n";
 344                        return;
 345                };
 346                chop $rev;
 347                if(length($rev) != 40) {
 348                        print STDERR "$revision: cannot find commit '$parent'!\n";
 349                        return;
 350                }
 351                $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
 352                if($revision != $opt_s and not $rev) {
 353                        print STDERR "$revision: do not know ancestor for '$parent'!\n";
 354                        return;
 355                }
 356        } else {
 357                $rev = undef;
 358        }
 359
 360#       if($prev and $prev->[0] eq "A") {
 361#               if(not $tag) {
 362#                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
 363#                               print STDERR "$revision: Could not create branch $branch: $!\n";
 364#                               $state=11;
 365#                               next;
 366#                       }
 367#                       print H "$rev\n"
 368#                               or die "Could not write branch $branch: $!";
 369#                       close(H)
 370#                               or die "Could not write branch $branch: $!";
 371#               }
 372#       }
 373        if(not defined $rev) {
 374                unlink($git_index);
 375        } elsif ($rev ne $last_rev) {
 376                print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
 377                system("git-read-tree", $rev);
 378                die "read-tree failed for $rev: $?\n" if $?;
 379                $last_rev = $rev;
 380        }
 381
 382        my $cid;
 383        if($tag and not %$changed_paths) {
 384                $cid = $rev;
 385        } else {
 386                while(my($path,$action) = each %$changed_paths) {
 387                        if ($action->[0] eq "A") {
 388                                my $f = get_file($revision,$branch,$path);
 389                                push(@new,$f) if $f;
 390                        } elsif ($action->[0] eq "D") {
 391                                push(@old,$path);
 392                        } elsif ($action->[0] eq "M") {
 393                                my $f = get_file($revision,$branch,$path);
 394                                push(@new,$f) if $f;
 395                        } elsif ($action->[0] eq "R") {
 396                                # refer to a file/tree in an earlier commit
 397                                push(@old,$path); # remove any old stuff
 398
 399                                # ... and add any new stuff
 400                                my($b,$p) = split_path($revision,$action->[1]);
 401                                open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p;
 402                                local $/ = '\0';
 403                                while(<$F>) {
 404                                        chomp;
 405                                        my($m,$p) = split(/\t/,$_,2);
 406                                        my($mode,$type,$sha1) = split(/ /,$m);
 407                                        next if $type ne "blob";
 408                                        push(@new,[$mode,$sha1,$p]);
 409                                }
 410                        } else {
 411                                die "$revision: unknown action '".$action->[0]."' for $path\n";
 412                        }
 413                }
 414
 415                if(@old) {
 416                        open my $F, "-|", "git-ls-files", "-z", @old or die $!;
 417                        @old = ();
 418                        local $/ = '\0';
 419                        while(<$F>) {
 420                                chomp;
 421                                push(@old,$_);
 422                        }
 423                        close($F);
 424
 425                        while(@old) {
 426                                my @o2;
 427                                if(@old > 55) {
 428                                        @o2 = splice(@old,0,50);
 429                                } else {
 430                                        @o2 = @old;
 431                                        @old = ();
 432                                }
 433                                system("git-update-index","--force-remove","--",@o2);
 434                                die "Cannot remove files: $?\n" if $?;
 435                        }
 436                }
 437                while(@new) {
 438                        my @n2;
 439                        if(@new > 12) {
 440                                @n2 = splice(@new,0,10);
 441                        } else {
 442                                @n2 = @new;
 443                                @new = ();
 444                        }
 445                        system("git-update-index","--add",
 446                                (map { ('--cacheinfo', @$_) } @n2));
 447                        die "Cannot add files: $?\n" if $?;
 448                }
 449
 450                my $pid = open(C,"-|");
 451                die "Cannot fork: $!" unless defined $pid;
 452                unless($pid) {
 453                        exec("git-write-tree");
 454                        die "Cannot exec git-write-tree: $!\n";
 455                }
 456                chomp(my $tree = <C>);
 457                length($tree) == 40
 458                        or die "Cannot get tree id ($tree): $!\n";
 459                close(C)
 460                        or die "Error running git-write-tree: $?\n";
 461                print "Tree ID $tree\n" if $opt_v;
 462
 463                my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 464                my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 465                $pid = fork();
 466                die "Fork: $!\n" unless defined $pid;
 467                unless($pid) {
 468                        $pr->writer();
 469                        $pw->reader();
 470                        open(OUT,">&STDOUT");
 471                        dup2($pw->fileno(),0);
 472                        dup2($pr->fileno(),1);
 473                        $pr->close();
 474                        $pw->close();
 475
 476                        my @par = ();
 477                        @par = ("-p",$rev) if defined $rev;
 478
 479                        # loose detection of merges
 480                        # based on the commit msg
 481                        foreach my $rx (@mergerx) {
 482                                if ($message =~ $rx) {
 483                                        my $mparent = $1;
 484                                        if ($mparent eq 'HEAD') { $mparent = $opt_o };
 485                                        if ( -e "$git_dir/refs/heads/$mparent") {
 486                                                $mparent = get_headref($mparent, $git_dir);
 487                                                push @par, '-p', $mparent;
 488                                                print OUT "Merge parent branch: $mparent\n" if $opt_v;
 489                                        }
 490                                } 
 491                        }
 492
 493                        exec("env",
 494                                "GIT_AUTHOR_NAME=$author_name",
 495                                "GIT_AUTHOR_EMAIL=$author_email",
 496                                "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 497                                "GIT_COMMITTER_NAME=$author_name",
 498                                "GIT_COMMITTER_EMAIL=$author_email",
 499                                "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 500                                "git-commit-tree", $tree,@par);
 501                        die "Cannot exec git-commit-tree: $!\n";
 502                }
 503                $pw->writer();
 504                $pr->reader();
 505
 506                $message =~ s/[\s\n]+\z//;
 507
 508                print $pw "$message\n"
 509                        or die "Error writing to git-commit-tree: $!\n";
 510                $pw->close();
 511
 512                print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
 513                chomp($cid = <$pr>);
 514                length($cid) == 40
 515                        or die "Cannot get commit id ($cid): $!\n";
 516                print "Commit ID $cid\n" if $opt_v;
 517                $pr->close();
 518
 519                waitpid($pid,0);
 520                die "Error running git-commit-tree: $?\n" if $?;
 521        }
 522
 523        if(not defined $dest) {
 524                print "... no known parent\n" if $opt_v;
 525        } elsif(not $tag) {
 526                print "Writing to refs/heads/$dest\n" if $opt_v;
 527                open(C,">$git_dir/refs/heads/$dest") and 
 528                print C ("$cid\n") and
 529                close(C)
 530                        or die "Cannot write branch $dest for update: $!\n";
 531        }
 532
 533        if($tag) {
 534                my($in, $out) = ('','');
 535                $last_rev = "-" if %$changed_paths;
 536                # the tag was 'complex', i.e. did not refer to a "real" revision
 537                
 538                $dest =~ tr/_/\./ if $opt_u;
 539
 540                my $pid = open2($in, $out, 'git-mktag');
 541                print $out ("object $cid\n".
 542                    "type commit\n".
 543                    "tag $dest\n".
 544                    "tagger $author_name <$author_email>\n") and
 545                close($out)
 546                    or die "Cannot create tag object $dest: $!\n";
 547
 548                my $tagobj = <$in>;
 549                chomp $tagobj;
 550
 551                if ( !close($in) or waitpid($pid, 0) != $pid or
 552                                $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) {
 553                        die "Cannot create tag object $dest: $!\n";
 554                }
 555
 556                open(C,">$git_dir/refs/tags/$dest") and
 557                print C ("$tagobj\n") and
 558                close(C)
 559                        or die "Cannot create tag $branch: $!\n";
 560
 561                print "Created tag '$dest' on '$branch'\n" if $opt_v;
 562        }
 563        $branches{$branch}{"LAST"} = $cid;
 564        $branches{$branch}{$revision} = $cid;
 565        $last_rev = $cid;
 566        print BRANCHES "$revision $branch $cid\n";
 567        print "DONE: $revision $dest $cid\n" if $opt_v;
 568}
 569
 570my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
 571sub _commit_all {
 572        ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
 573        my %p;
 574        while(my($path,$action) = each %$changed_paths) {
 575                $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ];
 576        }
 577        $changed_paths = \%p;
 578}
 579
 580sub commit_all {
 581        my %done;
 582        my @col;
 583        my $pref;
 584        my $branch;
 585
 586        while(my($path,$action) = each %$changed_paths) {
 587                ($branch,$path) = split_path($revision,$path);
 588                next if not defined $branch;
 589                $done{$branch}{$path} = $action;
 590        }
 591        while(($branch,$changed_paths) = each %done) {
 592                commit($branch, $changed_paths, $revision, $author, $date, $message);
 593        }
 594}
 595
 596while(++$current_rev <= $svn->{'maxrev'}) {
 597        $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
 598        commit_all();
 599        if($opt_l and not --$opt_l) {
 600                print STDERR "Exiting due to a memory leak. Repeat, please.\n";
 601                last;
 602        }
 603}
 604
 605
 606unlink($git_index);
 607
 608if (defined $orig_git_index) {
 609        $ENV{GIT_INDEX_FILE} = $orig_git_index;
 610} else {
 611        delete $ENV{GIT_INDEX_FILE};
 612}
 613
 614# Now switch back to the branch we were in before all of this happened
 615if($orig_branch) {
 616        print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
 617        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 618                if $forward_master;
 619        unless ($opt_i) {
 620                system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
 621                die "read-tree failed: $?\n" if $?;
 622        }
 623} else {
 624        $orig_branch = "master";
 625        print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
 626        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 627                unless -f "$git_dir/refs/heads/master";
 628        unlink("$git_dir/HEAD");
 629        symlink("refs/heads/$orig_branch","$git_dir/HEAD");
 630        unless ($opt_i) {
 631                system('git checkout');
 632                die "checkout failed: $?\n" if $?;
 633        }
 634}
 635unlink("$git_dir/SVN2GIT_HEAD");
 636close(BRANCHES);