#
# The basic idea is to pull and analyze SVN changes.
#
-# Checking out the files is done by a single long-running CVS connection
-# / server process.
+# Checking out the files is done by a single long-running SVN connection.
#
# The head revision is on branch "origin" by default.
# You can change that with the '-o' option.
-require v5.8.0; # for shell-safe open("-|",LIST)
+require 5.008; # for shell-safe open("-|",LIST)
use strict;
use warnings;
use Getopt::Std;
use SVN::Core;
use SVN::Ra;
-die "Need CVN:COre 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
+die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
$SIG{'PIPE'}="IGNORE";
$ENV{'TZ'}="UTC";
-our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l);
+our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l,$opt_d,$opt_D);
sub usage() {
print STDERR <<END;
-Usage: ${\basename $0} # fetch/update GIT from CVS
- [-o branch-for-HEAD] [-h] [-v] [-l max_num_changes]
+Usage: ${\basename $0} # fetch/update GIT from SVN
+ [-o branch-for-HEAD] [-h] [-v] [-l max_rev]
[-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
- [-i] [-u] [-s start_chg] [-m] [-M regex] [SVN_URL]
+ [-d|-D] [-i] [-u] [-s start_chg] [-m] [-M regex] [SVN_URL]
END
exit(1);
}
-getopts("b:C:hil:mM:o:s:t:T:uv") or usage();
+getopts("b:C:dDhil:mM:o:s:t:T:uv") or usage();
usage if $opt_h;
my $tag_name = $opt_t || "tags";
my $trunk_name = $opt_T || "trunk";
my $branch_name = $opt_b || "branches";
-@ARGV <= 1 or usage();
+@ARGV == 1 or @ARGV == 2 or usage();
$opt_o ||= "origin";
-$opt_l = 100 unless defined $opt_l;
+$opt_s ||= 1;
my $git_tree = $opt_C;
$git_tree ||= ".";
-my $cvs_tree;
-if ($#ARGV == 0) {
- $cvs_tree = $ARGV[0];
-} elsif (-f 'CVS/Repository') {
- open my $f, '<', 'CVS/Repository' or
- die 'Failed to open CVS/Repository';
- $cvs_tree = <$f>;
- chomp $cvs_tree;
- close $f;
-} else {
- usage();
-}
+my $svn_url = $ARGV[0];
+my $svn_dir = $ARGV[1];
our @mergerx = ();
if ($opt_m) {
sub conn {
my $self = shift;
my $repo = $self->{'fullrep'};
- my $s = SVN::Ra->new($repo);
-
+ my $auth = SVN::Core::auth_open ([SVN::Client::get_simple_provider,
+ SVN::Client::get_ssl_server_trust_file_provider,
+ SVN::Client::get_username_provider]);
+ my $s = SVN::Ra->new(url => $repo, auth => $auth);
die "SVN connection to $repo: $!\n" unless defined $s;
$self->{'svn'} = $s;
$self->{'repo'} = $repo;
sub file {
my($self,$path,$rev) = @_;
- my $res;
- my ($fh, $name) = tempfile('gitsvn.XXXXXX',
+ my ($fh, $name) = tempfile('gitsvn.XXXXXX',
DIR => File::Spec->tmpdir(), UNLINK => 1);
print "... $rev $path ...\n" if $opt_v;
- eval { $self->{'svn'}->get_file($path,$rev,$fh); };
- if ($@ and $@ !~ /Attempted to get checksum/) {
- # retry
- $self->conn();
- eval { $self->{'svn'}->get_file($path,$rev,$fh); };
- };
- return () if $@ and $@ !~ /Attempted to get checksum/;
- die $@ if $@;
+ my $pool = SVN::Pool->new();
+ eval { $self->{'svn'}->get_file($path,$rev,$fh,$pool); };
+ $pool->clear;
+ if($@) {
+ return undef if $@ =~ /Attempted to get checksum/;
+ die $@;
+ }
close ($fh);
- return ($name, $res);
+ return $name;
}
-
package main;
-
-my $svn = SVNconn->new($cvs_tree);
-
+use URI;
+
+our $svn = $svn_url;
+$svn .= "/$svn_dir" if defined $svn_dir;
+my $svn2 = SVNconn->new($svn);
+$svn = SVNconn->new($svn);
+
+my $lwp_ua;
+if($opt_d or $opt_D) {
+ $svn_url = URI->new($svn_url)->canonical;
+ if($opt_D) {
+ $svn_dir =~ s#/*$#/#;
+ } else {
+ $svn_dir = "";
+ }
+ if ($svn_url->scheme eq "http") {
+ use LWP::UserAgent;
+ $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []);
+ } else {
+ print STDERR "Warning: not HTTP; turning off direct file access\n";
+ $opt_d=0;
+ }
+}
sub pdate($) {
my($d) = @_;
sub get_headref($$) {
my $name = shift;
- my $git_dir = shift;
+ my $git_dir = shift;
my $sha;
-
+
if (open(C,"$git_dir/refs/heads/$name")) {
chomp($sha = <C>);
close(C);
my $maxnum = 0;
my $last_rev = "";
my $last_branch;
-my $current_rev = $opt_s ? ($opt_s-1) : 0;
+my $current_rev = $opt_s || 1;
unless(-d $git_dir) {
system("git-init-db");
die "Cannot init the GIT db at $git_tree: $?\n" if $?;
-f "$git_dir/svn2git"
or die "'$git_dir/svn2git' does not exist.\n".
"You need that file for incremental imports.\n";
- $last_branch = basename(readlink("$git_dir/HEAD"));
+ open(F, "git-symbolic-ref HEAD |") or
+ die "Cannot run git-symbolic-ref: $!\n";
+ chomp ($last_branch = <F>);
+ $last_branch = basename($last_branch);
+ close(F);
unless($last_branch) {
warn "Cannot read the last branch name: $! -- assuming 'master'\n";
$last_branch = "master";
$forward_master =
$opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
- system('cmp', '-s', "$git_dir/refs/heads/master",
+ system('cmp', '-s', "$git_dir/refs/heads/master",
"$git_dir/refs/heads/$opt_o") == 0;
# populate index
my($num,$branch,$ref) = split;
$branches{$branch}{$num} = $ref;
$branches{$branch}{"LAST"} = $ref;
- $current_rev = $num if $current_rev < $num;
+ $current_rev = $num+1 if $current_rev <= $num;
}
close($B);
}
open BRANCHES,">>", "$git_dir/svn2git";
+sub node_kind($$$) {
+ my ($branch, $path, $revision) = @_;
+ my $pool=SVN::Pool->new;
+ my $kind = $svn->{'svn'}->check_path(revert_split_path($branch,$path),$revision,$pool);
+ $pool->clear;
+ return $kind;
+}
-## cvsps output:
-#---------------------
-#PatchSet 314
-#Date: 1999/09/18 13:03:59
-#Author: wkoch
-#Branch: STABLE-BRANCH-1-0
-#Ancestor branch: HEAD
-#Tag: (none)
-#Log:
-# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
-#Members:
-# README:1.57->1.57.2.1
-# VERSION:1.96->1.96.2.1
-#
-#---------------------
-
-my $state = 0;
-
-sub get_file($$$) {
- my($rev,$branch,$path) = @_;
+sub revert_split_path($$) {
+ my($branch,$path) = @_;
- # revert split_path(), below
my $svnpath;
$path = "" if $path eq "/"; # this should not happen, but ...
if($branch eq "/") {
- $svnpath = "/$trunk_name/$path";
+ $svnpath = "$trunk_name/$path";
} elsif($branch =~ m#^/#) {
- $svnpath = "/$tag_name$branch/$path";
+ $svnpath = "$tag_name$branch/$path";
} else {
- $svnpath = "/$branch_name/$branch/$path";
+ $svnpath = "$branch_name/$branch/$path";
}
+ $svnpath =~ s#/+$##;
+ return $svnpath;
+}
+
+sub get_file($$$) {
+ my($rev,$branch,$path) = @_;
+
+ my $svnpath = revert_split_path($branch,$path);
+
# now get it
- my ($name, $res) = eval { $svn->file($svnpath,$rev); };
- return () unless defined $name;
+ my $name;
+ if($opt_d) {
+ my($req,$res);
+
+ # /svn/!svn/bc/2/django/trunk/django-docs/build.py
+ my $url=$svn_url->clone();
+ $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath");
+ print "... $path...\n" if $opt_v;
+ $req = HTTP::Request->new(GET => $url);
+ $res = $lwp_ua->request($req);
+ if ($res->is_success) {
+ my $fh;
+ ($fh, $name) = tempfile('gitsvn.XXXXXX',
+ DIR => File::Spec->tmpdir(), UNLINK => 1);
+ print $fh $res->content;
+ close($fh) or die "Could not write $name: $!\n";
+ } else {
+ return undef if $res->code == 301; # directory?
+ die $res->status_line." at $url\n";
+ }
+ } else {
+ $name = $svn->file("$svnpath",$rev);
+ return undef unless defined $name;
+ }
open my $F, '-|', "git-hash-object", "-w", $name
or die "Cannot create object: $!\n";
my $sha = <$F>;
chomp $sha;
close $F;
+ unlink $name;
my $mode = "0644"; # SV does not seem to store any file modes
return [$mode, $sha, $path];
}
} elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
$branch = $1;
} else {
- print STDERR "$rev: Unrecognized path: $path\n";
+ my %no_error = (
+ "/" => 1,
+ "/$tag_name" => 1,
+ "/$branch_name" => 1
+ );
+ print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
return ()
}
$path = "/" if $path eq "";
return ($branch,$path);
}
+sub branch_rev($$) {
+
+ my ($srcbranch,$uptorev) = @_;
+
+ my $bbranches = $branches{$srcbranch};
+ my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
+ my $therev;
+ foreach my $arev(@revs) {
+ next if ($arev eq 'LAST');
+ if ($arev <= $uptorev) {
+ $therev = $arev;
+ last;
+ }
+ }
+ return $therev;
+}
+
+sub copy_path($$$$$$$$) {
+ # Somebody copied a whole subdirectory.
+ # We need to find the index entries from the old version which the
+ # SVN log entry points to, and add them to the new place.
+
+ my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
+
+ my($srcbranch,$srcpath) = split_path($rev,$oldpath);
+ unless(defined $srcbranch) {
+ print "Path not found when copying from $oldpath @ $rev\n";
+ return;
+ }
+ my $therev = branch_rev($srcbranch, $rev);
+ my $gitrev = $branches{$srcbranch}{$therev};
+ unless($gitrev) {
+ print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
+ return;
+ }
+ if ($srcbranch ne $newbranch) {
+ push(@$parents, $branches{$srcbranch}{'LAST'});
+ }
+ print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
+ if ($node_kind eq $SVN::Node::dir) {
+ $srcpath =~ s#/*$#/#;
+ }
+
+ open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath;
+ local $/ = "\0";
+ while(<$f>) {
+ chomp;
+ my($m,$p) = split(/\t/,$_,2);
+ my($mode,$type,$sha1) = split(/ /,$m);
+ next if $type ne "blob";
+ if ($node_kind eq $SVN::Node::dir) {
+ $p = $path . substr($p,length($srcpath)-1);
+ } else {
+ $p = $path;
+ }
+ push(@$new,[$mode,$sha1,$p]);
+ }
+ close($f) or
+ print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
+}
+
sub commit {
my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
my($author_name,$author_email,$dest);
- my(@old,@new);
+ my(@old,@new,@parents);
if (not defined $author) {
$author_name = $author_email = "unknown";
$last_rev = $rev;
}
+ push (@parents, $rev) if defined $rev;
+
my $cid;
if($tag and not %$changed_paths) {
$cid = $rev;
} else {
- while(my($path,$action) = each %$changed_paths) {
- if ($action->[0] eq "A") {
- my $f = get_file($revision,$branch,$path);
- push(@new,$f) if $f;
+ my @paths = sort keys %$changed_paths;
+ foreach my $path(@paths) {
+ my $action = $changed_paths->{$path};
+
+ if ($action->[0] eq "R") {
+ # refer to a file/tree in an earlier commit
+ push(@old,$path); # remove any old stuff
+ }
+ if(($action->[0] eq "A") || ($action->[0] eq "R")) {
+ my $node_kind = node_kind($branch,$path,$revision);
+ if($action->[1]) {
+ copy_path($revision,$branch,$path,$action->[1],$action->[2],$node_kind,\@new,\@parents);
+ } elsif ($node_kind eq $SVN::Node::file) {
+ my $f = get_file($revision,$branch,$path);
+ if ($f) {
+ push(@new,$f) if $f;
+ } else {
+ my $opath = $action->[3];
+ print STDERR "$revision: $branch: could not fetch '$opath'\n";
+ }
+ }
} elsif ($action->[0] eq "D") {
push(@old,$path);
} elsif ($action->[0] eq "M") {
- my $f = get_file($revision,$branch,$path);
- push(@new,$f) if $f;
- } elsif ($action->[0] eq "R") {
- # refer to a file/tree in an earlier commit
- push(@old,$path); # remove any old stuff
-
- # ... and add any new stuff
- my($b,$p) = split_path($revision,$action->[1]);
- open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p;
- local $/ = '\0';
- while(<$F>) {
- chomp;
- my($m,$p) = split(/\t/,$_,2);
- my($mode,$type,$sha1) = split(/ /,$m);
- next if $type ne "blob";
- push(@new,[$mode,$sha1,$p]);
+ my $node_kind = node_kind($branch,$path,$revision);
+ if ($node_kind eq $SVN::Node::file) {
+ my $f = get_file($revision,$branch,$path);
+ push(@new,$f) if $f;
}
} else {
die "$revision: unknown action '".$action->[0]."' for $path\n";
}
}
- if(@old) {
- open my $F, "-|", "git-ls-files", "-z", @old or die $!;
- @old = ();
- local $/ = '\0';
+ while(@old) {
+ my @o1;
+ if(@old > 55) {
+ @o1 = splice(@old,0,50);
+ } else {
+ @o1 = @old;
+ @old = ();
+ }
+ open my $F, "-|", "git-ls-files", "-z", @o1 or die $!;
+ @o1 = ();
+ local $/ = "\0";
while(<$F>) {
chomp;
- push(@old,$_);
+ push(@o1,$_);
}
close($F);
- while(@old) {
+ while(@o1) {
my @o2;
- if(@old > 55) {
- @o2 = splice(@old,0,50);
+ if(@o1 > 55) {
+ @o2 = splice(@o1,0,50);
} else {
- @o2 = @old;
- @old = ();
+ @o2 = @o1;
+ @o1 = ();
}
system("git-update-index","--force-remove","--",@o2);
die "Cannot remove files: $?\n" if $?;
$pw->close();
my @par = ();
- @par = ("-p",$rev) if defined $rev;
# loose detection of merges
# based on the commit msg
if ($mparent eq 'HEAD') { $mparent = $opt_o };
if ( -e "$git_dir/refs/heads/$mparent") {
$mparent = get_headref($mparent, $git_dir);
- push @par, '-p', $mparent;
+ push (@parents, $mparent);
print OUT "Merge parent branch: $mparent\n" if $opt_v;
}
- }
+ }
+ }
+ my %seen_parents = ();
+ my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
+ foreach my $bparent (@unique_parents) {
+ push @par, '-p', $bparent;
+ print OUT "Merge parent branch: $bparent\n" if $opt_v;
}
exec("env",
die "Error running git-commit-tree: $?\n" if $?;
}
+ if (not defined $cid) {
+ $cid = $branches{"/"}{"LAST"};
+ }
+
if(not defined $dest) {
print "... no known parent\n" if $opt_v;
} elsif(not $tag) {
print "Writing to refs/heads/$dest\n" if $opt_v;
- open(C,">$git_dir/refs/heads/$dest") and
+ open(C,">$git_dir/refs/heads/$dest") and
print C ("$cid\n") and
close(C)
or die "Cannot write branch $dest for update: $!\n";
my($in, $out) = ('','');
$last_rev = "-" if %$changed_paths;
# the tag was 'complex', i.e. did not refer to a "real" revision
-
+
$dest =~ tr/_/\./ if $opt_u;
+ $branch = $dest;
my $pid = open2($in, $out, 'git-mktag');
print $out ("object $cid\n".
print "DONE: $revision $dest $cid\n" if $opt_v;
}
-my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
-sub _commit_all {
- ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
+sub commit_all {
+ # Recursive use of the SVN connection does not work
+ local $svn = $svn2;
+
+ my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
my %p;
while(my($path,$action) = each %$changed_paths) {
- $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ];
+ $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ];
}
$changed_paths = \%p;
-}
-sub commit_all {
my %done;
my @col;
my $pref;
}
}
-while(++$current_rev < $svn->{'maxrev'}) {
- $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
- commit_all();
- if($opt_l and not --$opt_l) {
- print STDERR "Exiting due to a memory leak. Repeat, please.\n";
- last;
- }
+$opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'};
+
+if ($svn->{'maxrev'} < $current_rev) {
+ print "Up to date: no new revisions to fetch!\n" if $opt_v;
+ unlink("$git_dir/SVN2GIT_HEAD");
+ exit;
}
+print "Fetching from $current_rev to $opt_l ...\n" if $opt_v;
+
+my $pool=SVN::Pool->new;
+$svn->{'svn'}->get_log("/",$current_rev,$opt_l,0,1,1,\&commit_all,$pool);
+$pool->clear;
+
unlink($git_index);
print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
unless -f "$git_dir/refs/heads/master";
- unlink("$git_dir/HEAD");
- symlink("refs/heads/$orig_branch","$git_dir/HEAD");
+ system('git-update-ref', 'HEAD', "$orig_branch");
unless ($opt_i) {
system('git checkout');
die "checkout failed: $?\n" if $?;