use warnings;
use strict;
use vars qw/ $AUTHOR $VERSION
- $SVN_URL
- $GIT_SVN_INDEX $GIT_SVN
- $GIT_DIR $GIT_SVN_DIR $REVDB
- $_follow_parent $sha1 $sha1_short $_revision
- $_cp_remote $_upgrade $_q
- $_authors %users/;
+ $sha1 $sha1_short $_revision
+ $_q $_authors %users/;
$AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
$VERSION = '@@GIT_VERSION@@';
$Git::SVN::default_repo_id = 'git-svn';
$Git::SVN::default_ref_id = $ENV{GIT_SVN_ID} || 'git-svn';
-my $LC_ALL = $ENV{LC_ALL};
$Git::SVN::Log::TZ = $ENV{TZ};
-# make sure the svn binary gives consistent output between locales and TZs:
$ENV{TZ} = 'UTC';
-$ENV{LC_ALL} = 'C';
$| = 1; # unbuffer STDOUT
sub fatal (@) { print STDERR @_; exit 1 }
$sha1 = qr/[a-f\d]{40}/;
$sha1_short = qr/[a-f\d]{4,40}/;
my ($_stdin, $_help, $_edit,
- $_repack, $_repack_nr, $_repack_flags,
- $_message, $_file, $_no_metadata,
+ $_message, $_file,
$_template, $_shared,
- $_version, $_upgrade,
+ $_version,
$_merge, $_strategy, $_dry_run,
$_prefix);
my %remote_opts = ( 'username=s' => \$Git::SVN::Prompt::_username,
'config-dir=s' => \$Git::SVN::Ra::config_dir,
'no-auth-cache' => \$Git::SVN::Prompt::_no_auth_cache );
-my %fc_opts = ( 'follow-parent|follow' => \$_follow_parent,
+my %fc_opts = ( 'follow-parent|follow' => \$Git::SVN::_follow_parent,
'authors-file|A=s' => \$_authors,
- 'repack:i' => \$_repack,
- 'no-metadata' => \$_no_metadata,
+ 'repack:i' => \$Git::SVN::_repack,
+ 'no-metadata' => \$Git::SVN::_no_metadata,
'quiet|q' => \$_q,
- 'repack-flags|repack-args|repack-opts=s' => \$_repack_flags,
+ 'repack-flags|repack-args|repack-opts=s' =>
+ \$Git::SVN::_repack_flags,
%remote_opts );
my ($_trunk, $_tags, $_branches);
my %opts = %{$cmd{$cmd}->[2]} if (defined $cmd);
read_repo_config(\%opts);
-my $rv = GetOptions(%opts, 'help|H|h' => \$_help,
- 'version|V' => \$_version,
- 'minimize-connections' =>
- \$Git::SVN::Migration::_minimize,
- 'id|i=s' => \$Git::SVN::default_ref_id);
+my $rv = GetOptions(%opts, 'help|H|h' => \$_help, 'version|V' => \$_version,
+ 'minimize-connections' => \$Git::SVN::Migration::_minimize,
+ 'id|i=s' => \$Git::SVN::default_ref_id,
+ 'svn-remote|remote|R=s' => \$Git::SVN::default_repo_id);
exit 1 if (!$rv && $cmd ne 'log');
usage(0) if $_help;
unless ($cmd =~ /^(?:init|multi-init|commit-diff)$/) {
Git::SVN::Migration::migration_check();
}
+Git::SVN::init_vars();
eval {
Git::SVN::verify_remotes_sanity();
$cmd{$cmd}->[0]->(@ARGV);
package Git::SVN;
use strict;
use warnings;
-use vars qw/$default_repo_id $default_ref_id/;
+use vars qw/$default_repo_id $default_ref_id $_no_metadata $_follow_parent
+ $_repack $_repack_flags/;
use Carp qw/croak/;
use File::Path qw/mkpath/;
+use File::Copy qw/copy/;
use IPC::Open3;
+my $_repack_nr;
# properties that we do not log:
my %SKIP_PROP;
BEGIN {
svn:entry:committed-date/;
}
+my %LOCKFILES;
+END { unlink keys %LOCKFILES if %LOCKFILES }
+
sub fetch_all {
my ($repo_id, $url, $fetch) = @_;
my @gs;
my $ra = Git::SVN::Ra->new($url);
my $head = $ra->get_latest_revnum;
my $base = $head;
- my $new_remote;
foreach my $p (sort keys %$fetch) {
my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p);
- my $lr = $gs->last_rev;
+ my $lr = $gs->rev_db_max;
if (defined $lr) {
$base = $lr if ($lr < $base);
- } else {
- $new_remote = 1;
}
push @gs, $gs;
}
- $base = 0 if $new_remote;
return if (++$base > $head);
$ra->gs_fetch_loop_common($base, $head, @gs);
}
$r;
}
+sub init_vars {
+ if (defined $_repack) {
+ $_repack = 1000 if ($_repack <= 0);
+ $_repack_nr = $_repack;
+ $_repack_flags ||= '-d';
+ }
+}
+
sub verify_remotes_sanity {
return unless -d $ENV{GIT_DIR};
my %seen;
$rl = readline $fh;
defined $rl or return (undef, undef);
chomp $rl;
- while ($c ne $rl && tell $fh != 0) {
+ while (('0' x40) eq $rl && tell $fh != 0) {
$offset -= 41;
seek $fh, $offset, 2;
$rl = readline $fh;
defined $rl or return (undef, undef);
chomp $rl;
}
+ if ($c) {
+ die "$self->{db_path} and ", $self->refname,
+ " inconsistent!:\n$c != $rl\n";
+ }
my $rev = tell $fh;
croak $! if ($rev < 0);
$rev = ($rev - 41) / 41;
sub get_fetch_range {
my ($self, $min, $max) = @_;
$max ||= $self->ra->get_latest_revnum;
- $min ||= $self->last_rev || 0;
+ $min ||= $self->rev_db_max;
(++$min, $max);
}
defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
or croak $!;
print $msg_fh $log_entry->{log} or croak $!;
- print $msg_fh "\ngit-svn-id: ", $self->full_url, '@',
- $log_entry->{revision}, ' ',
- $self->ra->uuid, "\n" or croak $!;
+ unless ($_no_metadata) {
+ print $msg_fh "\ngit-svn-id: ", $self->full_url, '@',
+ $log_entry->{revision}, ' ',
+ $self->ra->uuid, "\n" or croak $!;
+ }
$msg_fh->flush == 0 or croak $!;
close $msg_fh or croak $!;
chomp(my $commit = do { local $/; <$out_fh> });
die "Failed to commit, invalid sha1: $commit\n";
}
- command_noisy('update-ref',$self->refname, $commit);
- $self->rev_db_set($log_entry->{revision}, $commit);
+ $self->rev_db_set($log_entry->{revision}, $commit, 1);
$self->{last_rev} = $log_entry->{revision};
$self->{last_commit} = $commit;
print "r$log_entry->{revision} = $commit ($self->{ref_id})\n";
+ if (defined $_repack && (--$_repack_nr == 0)) {
+ $_repack_nr = $_repack;
+ # repack doesn't use any arguments with spaces in them, does it?
+ print "Running git repack $_repack_flags ...\n";
+ command_noisy('repack', split(/\s+/, $_repack_flags));
+ print "Done repacking\n";
+ }
return $commit;
}
return 1;
}
-sub match_paths {
- my ($self, $paths) = @_;
- return 1 if $paths->{'/'};
- $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\/?/;
- grep /$self->{path_regex}/, keys %$paths and return 1;
- my $c = '';
- foreach (split m#/#, $self->rel_path) {
- $c .= "/$_";
- return 1 if $paths->{$c};
- }
- return 0;
-}
-
sub find_parent_branch {
my ($self, $paths, $rev) = @_;
- return undef unless $::_follow_parent;
+ return undef unless $_follow_parent;
unless (defined $paths) {
- $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1,
- sub { $paths = dup_changed_paths($_[0]) });
+ my $err_handler = $SVN::Error::handler;
+ $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
+ $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1, sub {
+ $paths =
+ Git::SVN::Ra::dup_changed_paths($_[0]) });
+ $SVN::Error::handler = $err_handler;
}
return undef unless defined $paths;
$gs = Git::SVN->init($new_url, '', $ref_id, $ref_id);
}
my ($r0, $parent) = $gs->find_rev_before($r, 1);
- if ($::_follow_parent && (!defined $r0 || !defined $parent)) {
+ if ($_follow_parent && (!defined $r0 || !defined $parent)) {
$gs->fetch(0, $r);
($r0, $parent) = $gs->last_rev_commit;
}
or die "SVN connection failed somewhere...\n";
}
print STDERR "Successfully followed parent\n";
- $ed->{new_fetch} = 1;
return $self->make_log_entry($rev, [$parent], $ed);
}
not_found:
return $log_entry;
}
$ed = SVN::Git::Fetcher->new($self);
- $ed->{new_fetch} = 1;
}
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
die "SVN connection failed somewhere...\n";
my ($self, $rev, $parents, $ed) = @_;
my $untracked = $self->get_untracked($ed);
- return undef if (! $ed->{new_fetch} && ! $ed->{nr} && ! @$untracked);
-
open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
print $un "r$rev\n" or croak $!;
print $un $_, "\n" foreach @$untracked;
# to a revision: (41 * rev) is the byte offset.
# A record of 40 0s denotes an empty revision.
# And yes, it's still pretty fast (faster than Tie::File).
+# These files are disposable unless --no-metadata is set
sub rev_db_set {
- my ($self, $rev, $commit) = @_;
+ my ($self, $rev, $commit, $update_ref) = @_;
length $commit == 40 or croak "arg3 must be a full SHA1 hexsum\n";
- open my $fh, '+<', $self->{db_path} or croak $!;
+ my ($db, $db_lock) = ($self->{db_path}, "$self->{db_path}.lock");
+ my $sig;
+ if ($update_ref) {
+ $SIG{INT} = $SIG{HUP} = $SIG{TERM} = $SIG{ALRM} = $SIG{PIPE} =
+ $SIG{USR1} = $SIG{USR2} = sub { $sig = $_[0] };
+ }
+ $LOCKFILES{$db_lock} = 1;
+ if ($_no_metadata) {
+ copy($db, $db_lock) or die "rev_db_set(@_): ",
+ "Failed to copy: ",
+ "$db => $db_lock ($!)\n";
+ } else {
+ rename $db, $db_lock or die "rev_db_set(@_): ",
+ "Failed to rename: ",
+ "$db => $db_lock ($!)\n";
+ }
+ open my $fh, '+<', $db_lock or croak $!;
my $offset = $rev * 41;
# assume that append is the common case:
seek $fh, 0, 2 or croak $!;
my $pos = tell $fh;
if ($pos < $offset) {
- print $fh (('0' x 40),"\n") x (($offset - $pos) / 41)
- or croak $!;
+ for (1 .. (($offset - $pos) / 41)) {
+ print $fh (('0' x 40),"\n") or croak $!;
+ }
}
seek $fh, $offset, 0 or croak $!;
print $fh $commit,"\n" or croak $!;
close $fh or croak $!;
+ if ($update_ref) {
+ command_noisy('update-ref', '-m', "r$rev",
+ $self->refname, $commit);
+ }
+ rename $db_lock, $db or die "rev_db_set(@_): ", "Failed to rename: ",
+ "$db_lock => $db ($!)\n";
+ delete $LOCKFILES{$db_lock};
+ if ($update_ref) {
+ $SIG{INT} = $SIG{HUP} = $SIG{TERM} = $SIG{ALRM} = $SIG{PIPE} =
+ $SIG{USR1} = $SIG{USR2} = 'DEFAULT';
+ kill $sig, $$ if defined $sig;
+ }
+}
+
+sub rev_db_max {
+ my ($self) = @_;
+ my @stat = stat $self->{db_path} or
+ die "Couldn't stat $self->{db_path}: $!\n";
+ ($stat[7] % 41) == 0 or
+ die "$self->{db_path} inconsistent size:$stat[7]\n";
+ my $max = $stat[7] / 41;
+ (($max > 0) ? $max - 1 : 0);
}
sub rev_db_get {
my ($self, $base, $head, @gs) = @_;
my $inc = 1000;
my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc);
- my @paths = @gs == 1 ? ($gs[0]->{path}) : ('');
foreach my $gs (@gs) {
if (my $last_commit = $gs->last_commit) {
$gs->assert_index_clean($last_commit);
}
}
while (1) {
- my @revs;
+ my %revs;
my $err;
my $err_handler = $SVN::Error::handler;
$SVN::Error::handler = sub {
($err) = @_;
skip_unknown_revs($err);
};
- $self->get_log(\@paths, $min, $max, 0, 1, 1,
- sub { push @revs, [ dup_changed_paths($_[0]), $_[1] ]; });
- $SVN::Error::handler = $err_handler;
-
- if (! @revs && $err && $max >= $head) {
- print STDERR "Branch probably deleted:\n ",
- $err->expanded_message,
- "\nWill attempt to follow revisions ",
- "r$min .. r$max ",
- "committed before the deletion\n";
- @revs = map { [ undef, $_ ] } ($min .. $max);
- }
- foreach (@revs) {
- my ($paths, $r) = @$_;
- foreach my $gs (@gs) {
- if ($paths) {
- $gs->match_paths($paths) or next;
+ foreach my $gs (@gs) {
+ $self->get_log([$gs->{path}], $min, $max, 0, 1, 1, sub
+ { my ($paths, $rev) = @_;
+ push @{$revs{$rev}},
+ [ $gs,
+ dup_changed_paths($paths) ] });
+
+ next unless ($err && $max >= $head);
+
+ print STDERR "Path '$gs->{path}' ",
+ "was probably deleted:\n",
+ $err->expanded_message,
+ "\nWill attempt to follow ",
+ "revisions r$min .. r$max ",
+ "committed before the deletion\n";
+ my $hi = $max;
+ while (--$hi >= $min) {
+ my $ok;
+ $self->get_log([$gs->{path}], $min, $hi,
+ 0, 1, 1, sub {
+ my ($paths, $rev) = @_;
+ $ok = $rev;
+ push @{$revs{$rev}}, [ $gs,
+ dup_changed_paths($_[0])]});
+ if ($ok) {
+ print STDERR "r$min .. r$ok OK\n";
+ last;
}
+ }
+ }
+ $SVN::Error::handler = $err_handler;
+ foreach my $r (sort {$a <=> $b} keys %revs) {
+ foreach (@{$revs{$r}}) {
+ my ($gs, $paths) = @$_;
my $lr = $gs->last_rev;
next if defined $lr && $lr >= $r;
next if defined $gs->rev_db_get($r);
}
}
}
+ # pre-fill the .rev_db since it'll eventually get filled in
+ # with '0' x40 if something new gets committed
+ foreach my $gs (@gs) {
+ next if defined $gs->rev_db_get($max);
+ $gs->rev_db_set($max, 0 x40);
+ }
last if $max >= $head;
$min = $max + 1;
$max += $inc;