git-svnimport.perlon commit svn import: add libsvn-core-perl to Debian's control file (c6582ab)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to pull and analyze SVN changes.
   7#
   8# Checking out the files is done by a single long-running CVS connection
   9# / server process.
  10#
  11# The head revision is on branch "origin" by default.
  12# You can change that with the '-o' option.
  13
  14require v5.8.0; # for shell-safe open("-|",LIST)
  15use strict;
  16use warnings;
  17use Getopt::Std;
  18use File::Spec;
  19use File::Temp qw(tempfile);
  20use File::Path qw(mkpath);
  21use File::Basename qw(basename dirname);
  22use Time::Local;
  23use IO::Pipe;
  24use POSIX qw(strftime dup2);
  25use IPC::Open2;
  26use SVN::Core;
  27use SVN::Ra;
  28
  29$SIG{'PIPE'}="IGNORE";
  30$ENV{'TZ'}="UTC";
  31
  32our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b);
  33
  34sub usage() {
  35        print STDERR <<END;
  36Usage: ${\basename $0}     # fetch/update GIT from CVS
  37       [-o branch-for-HEAD] [-h] [-v]
  38       [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
  39       [-i] [-u] [-s subst] [-m] [-M regex] [SVN_URL]
  40END
  41        exit(1);
  42}
  43
  44getopts("b:C:hivmM:o:t:T:u") or usage();
  45usage if $opt_h;
  46
  47my $tag_name = $opt_t || "tags";
  48my $trunk_name = $opt_T || "trunk";
  49my $branch_name = $opt_b || "branches";
  50
  51@ARGV <= 1 or usage();
  52
  53$opt_o ||= "origin";
  54my $git_tree = $opt_C;
  55$git_tree ||= ".";
  56
  57my $cvs_tree;
  58if ($#ARGV == 0) {
  59        $cvs_tree = $ARGV[0];
  60} elsif (-f 'CVS/Repository') {
  61        open my $f, '<', 'CVS/Repository' or 
  62            die 'Failed to open CVS/Repository';
  63        $cvs_tree = <$f>;
  64        chomp $cvs_tree;
  65        close $f;
  66} else {
  67        usage();
  68}
  69
  70our @mergerx = ();
  71if ($opt_m) {
  72        @mergerx = ( qr/\W(?:from|of|merge|merging|merged) (\w+)/i );
  73}
  74if ($opt_M) {
  75        push (@mergerx, qr/$opt_M/);
  76}
  77
  78select(STDERR); $|=1; select(STDOUT);
  79
  80
  81package SVNconn;
  82# Basic SVN connection.
  83# We're only interested in connecting and downloading, so ...
  84
  85use File::Spec;
  86use File::Temp qw(tempfile);
  87use POSIX qw(strftime dup2);
  88
  89sub new {
  90        my($what,$repo) = @_;
  91        $what=ref($what) if ref($what);
  92
  93        my $self = {};
  94        $self->{'buffer'} = "";
  95        bless($self,$what);
  96
  97        $repo =~ s#/+$##;
  98        $self->{'fullrep'} = $repo;
  99        $self->conn();
 100
 101        $self->{'lines'} = undef;
 102
 103        return $self;
 104}
 105
 106sub conn {
 107        my $self = shift;
 108        my $repo = $self->{'fullrep'};
 109        my $s = SVN::Ra->new($repo);
 110
 111        die "SVN connection to $repo: $!\n" unless defined $s;
 112        $self->{'svn'} = $s;
 113        $self->{'repo'} = $repo;
 114        $self->{'maxrev'} = $s->get_latest_revnum();
 115}
 116
 117sub file {
 118        my($self,$path,$rev) = @_;
 119        my $res;
 120
 121        my ($fh, $name) = tempfile('gitsvn.XXXXXX', 
 122                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 123
 124        print "... $rev $path ...\n" if $opt_v;
 125        eval { $self->{'svn'}->get_file($path,$rev,$fh); };
 126        if (defined $@ and $@ !~ /Attempted to get checksum/) {
 127            # retry
 128            $self->conn();
 129                eval { $self->{'svn'}->get_file($path,$rev,$fh); };
 130        };
 131        return () if defined $@ and $@ !~ /Attempted to get checksum/;
 132        die $@ if $@;
 133        close ($fh);
 134
 135        return ($name, $res);
 136}
 137
 138
 139package main;
 140
 141my $svn = SVNconn->new($cvs_tree);
 142
 143
 144sub pdate($) {
 145        my($d) = @_;
 146        $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
 147                or die "Unparseable date: $d\n";
 148        my $y=$1; $y-=1900 if $y>1900;
 149        return timegm($6||0,$5,$4,$3,$2-1,$y);
 150}
 151
 152sub getwd() {
 153        my $pwd = `pwd`;
 154        chomp $pwd;
 155        return $pwd;
 156}
 157
 158
 159sub get_headref($$) {
 160    my $name    = shift;
 161    my $git_dir = shift; 
 162    my $sha;
 163    
 164    if (open(C,"$git_dir/refs/heads/$name")) {
 165        chomp($sha = <C>);
 166        close(C);
 167        length($sha) == 40
 168            or die "Cannot get head id for $name ($sha): $!\n";
 169    }
 170    return $sha;
 171}
 172
 173
 174-d $git_tree
 175        or mkdir($git_tree,0777)
 176        or die "Could not create $git_tree: $!";
 177chdir($git_tree);
 178
 179my $orig_branch = "";
 180my $forward_master = 0;
 181my %branches;
 182
 183my $git_dir = $ENV{"GIT_DIR"} || ".git";
 184$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 185$ENV{"GIT_DIR"} = $git_dir;
 186my $orig_git_index;
 187$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 188my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
 189                                    DIR => File::Spec->tmpdir());
 190close ($git_ih);
 191$ENV{GIT_INDEX_FILE} = $git_index;
 192my $maxnum = 0;
 193my $last_rev = "";
 194my $last_branch;
 195my $current_rev = 0;
 196unless(-d $git_dir) {
 197        system("git-init-db");
 198        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 199        system("git-read-tree");
 200        die "Cannot init an empty tree: $?\n" if $?;
 201
 202        $last_branch = $opt_o;
 203        $orig_branch = "";
 204} else {
 205        -f "$git_dir/refs/heads/$opt_o"
 206                or die "Branch '$opt_o' does not exist.\n".
 207                       "Either use the correct '-o branch' option,\n".
 208                       "or import to a new repository.\n";
 209
 210        -f "$git_dir/svn2git"
 211                or die "'$git_dir/svn2git' does not exist.\n".
 212                       "You need that file for incremental imports.\n";
 213        $last_branch = basename(readlink("$git_dir/HEAD"));
 214        unless($last_branch) {
 215                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 216                $last_branch = "master";
 217        }
 218        $orig_branch = $last_branch;
 219        $last_rev = get_headref($orig_branch, $git_dir);
 220        if (-f "$git_dir/SVN2GIT_HEAD") {
 221                die <<EOM;
 222SVN2GIT_HEAD exists.
 223Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
 224You may need to run
 225
 226    git-read-tree -m -u SVN2GIT_HEAD HEAD
 227EOM
 228        }
 229        system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
 230
 231        $forward_master =
 232            $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
 233            system('cmp', '-s', "$git_dir/refs/heads/master", 
 234                                "$git_dir/refs/heads/$opt_o") == 0;
 235
 236        # populate index
 237        system('git-read-tree', $last_rev);
 238        die "read-tree failed: $?\n" if $?;
 239
 240        # Get the last import timestamps
 241        open my $B,"<", "$git_dir/svn2git";
 242        while(<$B>) {
 243                chomp;
 244                my($num,$branch,$ref) = split;
 245                $branches{$branch}{$num} = $ref;
 246                $branches{$branch}{"LAST"} = $ref;
 247                $current_rev = $num+1 if $current_rev < $num+1;
 248        }
 249        close($B);
 250}
 251-d $git_dir
 252        or die "Could not create git subdir ($git_dir).\n";
 253
 254open BRANCHES,">>", "$git_dir/svn2git";
 255
 256
 257## cvsps output:
 258#---------------------
 259#PatchSet 314
 260#Date: 1999/09/18 13:03:59
 261#Author: wkoch
 262#Branch: STABLE-BRANCH-1-0
 263#Ancestor branch: HEAD
 264#Tag: (none)
 265#Log:
 266#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 267#Members:
 268#       README:1.57->1.57.2.1
 269#       VERSION:1.96->1.96.2.1
 270#
 271#---------------------
 272
 273my $state = 0;
 274
 275sub get_file($$$) {
 276        my($rev,$branch,$path) = @_;
 277
 278        # revert split_path(), below
 279        my $svnpath;
 280        $path = "" if $path eq "/"; # this should not happen, but ...
 281        if($branch eq "/") {
 282                $svnpath = "/$trunk_name/$path";
 283        } elsif($branch =~ m#^/#) {
 284                $svnpath = "/$tag_name$branch/$path";
 285        } else {
 286                $svnpath = "/$branch_name/$branch/$path";
 287        }
 288
 289        # now get it
 290        my ($name, $res) = eval { $svn->file($svnpath,$rev); };
 291        return () unless defined $name;
 292
 293        open my $F, '-|', "git-hash-object", "-w", $name
 294                or die "Cannot create object: $!\n";
 295        my $sha = <$F>;
 296        chomp $sha;
 297        close $F;
 298        my $mode = "0644"; # SV does not seem to store any file modes
 299        return [$mode, $sha, $path];
 300}
 301
 302sub split_path($$) {
 303        my($rev,$path) = @_;
 304        my $branch;
 305
 306        if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
 307                $branch = "/$1";
 308        } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
 309                $branch = "/";
 310        } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
 311                $branch = $1;
 312        } else {
 313                print STDERR "$rev: Unrecognized path: $path\n";
 314                return ()
 315        }
 316        $path = "/" if $path eq "";
 317        return ($branch,$path);
 318}
 319
 320sub commit {
 321        my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
 322        my($author_name,$author_email,$dest);
 323        my(@old,@new);
 324
 325        if (not defined $author) {
 326                $author_name = $author_email = "unknown";
 327        } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
 328                ($author_name, $author_email) = ($1, $2);
 329        } else {
 330                $author =~ s/^<(.*)>$/$1/;
 331                $author_name = $author_email = $author;
 332        }
 333        $date = pdate($date);
 334
 335        my $tag;
 336        my $parent;
 337        if($branch eq "/") { # trunk
 338                $parent = $opt_o;
 339        } elsif($branch =~ m#^/(.+)#) { # tag
 340                $tag = 1;
 341                $parent = $1;
 342        } else { # "normal" branch
 343                # nothing to do
 344                $parent = $branch;
 345        }
 346        $dest = $parent;
 347
 348        my $prev = $changed_paths->{"/"};
 349        if($prev and $prev->[0] eq "A") {
 350                delete $changed_paths->{"/"};
 351                my $oldpath = $prev->[1];
 352                my $rev;
 353                if(defined $oldpath) {
 354                        my $p;
 355                        ($parent,$p) = split_path($revision,$oldpath);
 356                        if($parent eq "/") {
 357                                $parent = $opt_o;
 358                        } else {
 359                                $parent =~ s#^/##; # if it's a tag
 360                        }
 361                } else {
 362                        $parent = undef;
 363                }
 364        }
 365
 366        my $rev;
 367        if(defined $parent) {
 368                open(H,"git-rev-parse --verify $parent |");
 369                $rev = <H>;
 370                close(H) or do {
 371                        print STDERR "$revision: cannot find commit '$parent'!\n";
 372                        return;
 373                };
 374                chop $rev;
 375                if(length($rev) != 40) {
 376                        print STDERR "$revision: cannot find commit '$parent'!\n";
 377                        return;
 378                }
 379                $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
 380                if($revision != 1 and not $rev) {
 381                        print STDERR "$revision: do not know ancestor for '$parent'!\n";
 382                        return;
 383                }
 384        } else {
 385                $rev = undef;
 386        }
 387
 388#       if($prev and $prev->[0] eq "A") {
 389#               if(not $tag) {
 390#                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
 391#                               print STDERR "$revision: Could not create branch $branch: $!\n";
 392#                               $state=11;
 393#                               next;
 394#                       }
 395#                       print H "$rev\n"
 396#                               or die "Could not write branch $branch: $!";
 397#                       close(H)
 398#                               or die "Could not write branch $branch: $!";
 399#               }
 400#       }
 401        if(not defined $rev) {
 402                unlink($git_index);
 403        } elsif ($rev ne $last_rev) {
 404                print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
 405                system("git-read-tree", $rev);
 406                die "read-tree failed for $rev: $?\n" if $?;
 407                $last_rev = $rev;
 408        }
 409
 410        while(my($path,$action) = each %$changed_paths) {
 411                if ($action->[0] eq "A") {
 412                        my $f = get_file($revision,$branch,$path);
 413                        push(@new,$f) if $f;
 414                } elsif ($action->[0] eq "D") {
 415                        push(@old,$path);
 416                } elsif ($action->[0] eq "M") {
 417                        my $f = get_file($revision,$branch,$path);
 418                        push(@new,$f) if $f;
 419                } elsif ($action->[0] eq "R") {
 420                        # refer to a file/tree in an earlier commit
 421                        push(@old,$path); # remove any old stuff
 422
 423                        # ... and add any new stuff
 424                        my($b,$p) = split_path($revision,$action->[1]);
 425                        open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p;
 426                        local $/ = '\0';
 427                        while(<$F>) {
 428                                chomp;
 429                                my($m,$p) = split(/\t/,$_,2);
 430                                my($mode,$type,$sha1) = split(/ /,$m);
 431                                next if $type ne "blob";
 432                                push(@new,[$mode,$sha1,$p]);
 433                        }
 434                } else {
 435                        die "$revision: unknown action '".$action->[0]."' for $path\n";
 436                }
 437        }
 438
 439        if(@old) {
 440                open my $F, "-│", "git-ls-files", "-z", @old or die $!;
 441                @old = ();
 442                local $/ = '\0';
 443                while(<$F>) {
 444                        chomp;
 445                        push(@old,$_);
 446                }
 447                close($F);
 448
 449                while(@old) {
 450                        my @o2;
 451                        if(@old > 55) {
 452                                @o2 = splice(@old,0,50);
 453                        } else {
 454                                @o2 = @old;
 455                                @old = ();
 456                        }
 457                        system("git-update-index","--force-remove","--",@o2);
 458                        die "Cannot remove files: $?\n" if $?;
 459                }
 460        }
 461        while(@new) {
 462                my @n2;
 463                if(@new > 12) {
 464                        @n2 = splice(@new,0,10);
 465                } else {
 466                        @n2 = @new;
 467                        @new = ();
 468                }
 469                system("git-update-index","--add",
 470                        (map { ('--cacheinfo', @$_) } @n2));
 471                die "Cannot add files: $?\n" if $?;
 472        }
 473
 474        my $pid = open(C,"-|");
 475        die "Cannot fork: $!" unless defined $pid;
 476        unless($pid) {
 477                exec("git-write-tree");
 478                die "Cannot exec git-write-tree: $!\n";
 479        }
 480        chomp(my $tree = <C>);
 481        length($tree) == 40
 482                or die "Cannot get tree id ($tree): $!\n";
 483        close(C)
 484                or die "Error running git-write-tree: $?\n";
 485        print "Tree ID $tree\n" if $opt_v;
 486
 487        my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 488        my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 489        $pid = fork();
 490        die "Fork: $!\n" unless defined $pid;
 491        unless($pid) {
 492                $pr->writer();
 493                $pw->reader();
 494                open(OUT,">&STDOUT");
 495                dup2($pw->fileno(),0);
 496                dup2($pr->fileno(),1);
 497                $pr->close();
 498                $pw->close();
 499
 500                my @par = ();
 501                @par = ("-p",$rev) if defined $rev;
 502
 503                # loose detection of merges
 504                # based on the commit msg
 505                foreach my $rx (@mergerx) {
 506                        if ($message =~ $rx) {
 507                                my $mparent = $1;
 508                                if ($mparent eq 'HEAD') { $mparent = $opt_o };
 509                                if ( -e "$git_dir/refs/heads/$mparent") {
 510                                        $mparent = get_headref($mparent, $git_dir);
 511                                        push @par, '-p', $mparent;
 512                                        print OUT "Merge parent branch: $mparent\n" if $opt_v;
 513                                }
 514                        } 
 515                }
 516
 517                exec("env",
 518                        "GIT_AUTHOR_NAME=$author_name",
 519                        "GIT_AUTHOR_EMAIL=$author_email",
 520                        "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 521                        "GIT_COMMITTER_NAME=$author_name",
 522                        "GIT_COMMITTER_EMAIL=$author_email",
 523                        "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 524                        "git-commit-tree", $tree,@par);
 525                die "Cannot exec git-commit-tree: $!\n";
 526        }
 527        $pw->writer();
 528        $pr->reader();
 529
 530        $message =~ s/[\s\n]+\z//;
 531
 532        print $pw "$message\n"
 533                or die "Error writing to git-commit-tree: $!\n";
 534        $pw->close();
 535
 536        print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
 537        chomp(my $cid = <$pr>);
 538        length($cid) == 40
 539                or die "Cannot get commit id ($cid): $!\n";
 540        print "Commit ID $cid\n" if $opt_v;
 541        $pr->close();
 542
 543        waitpid($pid,0);
 544        die "Error running git-commit-tree: $?\n" if $?;
 545
 546        if(defined $dest) {
 547                print "Writing to refs/heads/$dest\n" if $opt_v;
 548                open(C,">$git_dir/refs/heads/$dest") and 
 549                print C ("$cid\n") and
 550                close(C)
 551                        or die "Cannot write branch $dest for update: $!\n";
 552        } else {
 553                print "... no known parent\n" if $opt_v;
 554        }
 555        $branches{$branch}{"LAST"} = $cid;
 556        $branches{$branch}{$revision} = $cid;
 557        $last_rev = $cid;
 558        print BRANCHES "$revision $branch $cid\n";
 559        print "DONE: $revision $dest $cid\n" if $opt_v;
 560
 561        if($tag) {
 562                my($in, $out) = ('','');
 563                $last_rev = "-" if %$changed_paths;
 564                # the tag was 'complex', i.e. did not refer to a "real" revision
 565                
 566                $tag =~ tr/_/\./ if $opt_u;
 567
 568                my $pid = open2($in, $out, 'git-mktag');
 569                print $out ("object $cid\n".
 570                    "type commit\n".
 571                    "tag $tag\n".
 572                    "tagger $author_name <$author_email>\n") and
 573                close($out)
 574                    or die "Cannot create tag object $tag: $!\n";
 575
 576                my $tagobj = <$in>;
 577                chomp $tagobj;
 578
 579                if ( !close($in) or waitpid($pid, 0) != $pid or
 580                                $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) {
 581                        die "Cannot create tag object $tag: $!\n";
 582                }
 583                
 584
 585                open(C,">$git_dir/refs/tags/$tag")
 586                        or die "Cannot create tag $tag: $!\n";
 587                print C "$tagobj\n"
 588                        or die "Cannot write tag $tag: $!\n";
 589                close(C)
 590                        or die "Cannot write tag $tag: $!\n";
 591
 592                print "Created tag '$tag' on '$branch'\n" if $opt_v;
 593        }
 594}
 595
 596my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
 597sub _commit_all {
 598        ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
 599        my %p;
 600        while(my($path,$action) = each %$changed_paths) {
 601                $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ];
 602        }
 603        $changed_paths = \%p;
 604}
 605
 606sub commit_all {
 607        my %done;
 608        my @col;
 609        my $pref;
 610        my $branch;
 611
 612        while(my($path,$action) = each %$changed_paths) {
 613                ($branch,$path) = split_path($revision,$path);
 614                next if not defined $branch;
 615                $done{$branch}{$path} = $action;
 616        }
 617        while(($branch,$changed_paths) = each %done) {
 618                commit($branch, $changed_paths, $revision, $author, $date, $message);
 619        }
 620}
 621
 622while(++$current_rev < $svn->{'maxrev'}) {
 623        $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
 624        commit_all();
 625}
 626
 627
 628unlink($git_index);
 629
 630if (defined $orig_git_index) {
 631        $ENV{GIT_INDEX_FILE} = $orig_git_index;
 632} else {
 633        delete $ENV{GIT_INDEX_FILE};
 634}
 635
 636# Now switch back to the branch we were in before all of this happened
 637if($orig_branch) {
 638        print "DONE\n" if $opt_v;
 639        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 640                if $forward_master;
 641        unless ($opt_i) {
 642                system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
 643                die "read-tree failed: $?\n" if $?;
 644        }
 645} else {
 646        $orig_branch = "master";
 647        print "DONE; creating $orig_branch branch\n" if $opt_v;
 648        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 649                unless -f "$git_dir/refs/heads/master";
 650        unlink("$git_dir/HEAD");
 651        symlink("refs/heads/$orig_branch","$git_dir/HEAD");
 652        unless ($opt_i) {
 653                system('git checkout');
 654                die "checkout failed: $?\n" if $?;
 655        }
 656}
 657unlink("$git_dir/SVN2GIT_HEAD");
 658close(BRANCHES);