$_use_log_author $_add_author_from $_localtime/;
use Carp qw/croak/;
use File::Path qw/mkpath/;
-use File::Copy qw/copy/;
use IPC::Open3;
use Memoize; # core since 5.8.0, Jul 2002
-use Memoize::Storable;
use POSIX qw(:signal_h);
+use Time::Local;
use Git qw(
command
add_path_to_url
);
-my $can_use_yaml;
-BEGIN {
- $can_use_yaml = eval { require Git::SVN::Memoize::YAML; 1};
-}
-
+my $memo_backend;
our $_follow_parent = 1;
our $_minimize_url = 'unset';
our $default_repo_id = 'svn';
" globbed: $refname\n";
}
my $u = (::cmt_metadata("$refname"))[0];
+ if (!defined($u)) {
+ warn
+"W: $refname: no associated commit metadata from SVN, skipping\n";
+ next;
+ }
$u =~ s!^\Q$url\E(/|$)!! or die
"$refname: '$url' not found in '$u'\n";
if ($pathname ne $u) {
#
# Additionally, % must be escaped because it is used for escaping
# and we want our escaped refname to be reversible
- $refname =~ s{([ \%~\^:\?\*\[\t])}{sprintf('%%%02X',ord($1))}eg;
+ $refname =~ s{([ \%~\^:\?\*\[\t\\])}{sprintf('%%%02X',ord($1))}eg;
# no slash-separated component can begin with a dot .
# /.* becomes /%2E*
(++$min, $max);
}
+sub svn_dir {
+ command_oneline(qw(rev-parse --git-path svn));
+}
+
sub tmp_config {
my (@args) = @_;
- my $old_def_config = "$ENV{GIT_DIR}/svn/config";
- my $config = "$ENV{GIT_DIR}/svn/.metadata";
+ my $svn_dir = svn_dir();
+ my $old_def_config = "$svn_dir/config";
+ my $config = "$svn_dir/.metadata";
if (! -f $config && -f $old_def_config) {
rename $old_def_config, $config or
die "Failed rename $old_def_config => $config: $!\n";
or die "SVN connection failed somewhere...\n";
}
print STDERR "Successfully followed parent\n" unless $::_q > 1;
- return $self->make_log_entry($rev, [$parent], $ed);
+ return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
}
return undef;
}
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
die "SVN connection failed somewhere...\n";
}
- $self->make_log_entry($rev, \@parents, $ed);
+ $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
}
sub mkemptydirs {
my ($self, $r) = @_;
+ # add/remove/collect a paths table
+ #
+ # Paths are split into a tree of nodes, stored as a hash of hashes.
+ #
+ # Each node contains a 'path' entry for the path (if any) associated
+ # with that node and a 'children' entry for any nodes under that
+ # location.
+ #
+ # Removing a path requires a hash lookup for each component then
+ # dropping that node (and anything under it), which is substantially
+ # faster than a grep slice into a single hash of paths for large
+ # numbers of paths.
+ #
+ # For a large (200K) number of empty_dir directives this reduces
+ # scanning time to 3 seconds vs 10 minutes for grep+delete on a single
+ # hash of paths.
+ sub add_path {
+ my ($paths_table, $path) = @_;
+ my $node_ref;
+
+ foreach my $x (split('/', $path)) {
+ if (!exists($paths_table->{$x})) {
+ $paths_table->{$x} = { children => {} };
+ }
+
+ $node_ref = $paths_table->{$x};
+ $paths_table = $paths_table->{$x}->{children};
+ }
+
+ $node_ref->{path} = $path;
+ }
+
+ sub remove_path {
+ my ($paths_table, $path) = @_;
+ my $nodes_ref;
+ my $node_name;
+
+ foreach my $x (split('/', $path)) {
+ if (!exists($paths_table->{$x})) {
+ return;
+ }
+
+ $nodes_ref = $paths_table;
+ $node_name = $x;
+
+ $paths_table = $paths_table->{$x}->{children};
+ }
+
+ delete($nodes_ref->{$node_name});
+ }
+
+ sub collect_paths {
+ my ($paths_table, $paths_ref) = @_;
+
+ foreach my $v (values %$paths_table) {
+ my $p = $v->{path};
+ my $c = $v->{children};
+
+ collect_paths($c, $paths_ref);
+
+ if (defined($p)) {
+ push(@$paths_ref, $p);
+ }
+ }
+ }
+
sub scan {
- my ($r, $empty_dirs, $line) = @_;
+ my ($r, $paths_table, $line) = @_;
if (defined $r && $line =~ /^r(\d+)$/) {
return 0 if $1 > $r;
} elsif ($line =~ /^ \+empty_dir: (.+)$/) {
- $empty_dirs->{$1} = 1;
+ add_path($paths_table, $1);
} elsif ($line =~ /^ \-empty_dir: (.+)$/) {
- my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs);
- delete @$empty_dirs{@d};
+ remove_path($paths_table, $1);
}
1; # continue
};
- my %empty_dirs = ();
+ my @empty_dirs;
+ my %paths_table;
+
my $gz_file = "$self->{dir}/unhandled.log.gz";
if (-f $gz_file) {
if (!can_compress()) {
die "Unable to open $gz_file: $!\n";
my $line;
while ($gz->gzreadline($line) > 0) {
- scan($r, \%empty_dirs, $line) or last;
+ scan($r, \%paths_table, $line) or last;
}
$gz->gzclose;
}
if (open my $fh, '<', "$self->{dir}/unhandled.log") {
binmode $fh or croak "binmode: $!";
while (<$fh>) {
- scan($r, \%empty_dirs, $_) or last;
+ scan($r, \%paths_table, $_) or last;
}
close $fh;
}
+ collect_paths(\%paths_table, \@empty_dirs);
my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
- foreach my $d (sort keys %empty_dirs) {
+ foreach my $d (sort @empty_dirs) {
$d = uri_decode($d);
$d =~ s/$strip//;
next unless length($d);
$ENV{TZ} = 'UTC';
my $epoch_in_UTC =
- POSIX::strftime('%s', $S, $M, $H, $d, $m - 1, $Y - 1900);
+ Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y);
# Determine our local timezone (including DST) at the
# time of $epoch_in_UTC. $Git::SVN::Log::TZ stored the
delete $ENV{TZ};
}
- my $our_TZ = get_tz_offset();
+ my $our_TZ = get_tz_offset($epoch_in_UTC);
# This converts $epoch_in_UTC into our local timezone.
my ($sec, $min, $hour, $mday, $mon, $year,
}
if ($author =~ /^\s*(.+?)\s*<(.*)>\s*$/) {
my ($name, $email) = ($1, $2);
- $email = undef if length $2 == 0;
return [$name, $email];
} else {
die "Author: $orig_author: $::_authors_prog returned "
}
sub find_extra_svk_parents {
- my ($self, $ed, $tickets, $parents) = @_;
+ my ($self, $tickets, $parents) = @_;
# aha! svk:merge property changed...
my @tickets = split "\n", $tickets;
my @known_parents;
sub lookup_svn_merge {
my $uuid = shift;
my $url = shift;
- my $merge = shift;
+ my $source = shift;
+ my $revs = shift;
- my ($source, $revs) = split ":", $merge;
my $path = $source;
$path =~ s{^/}{};
my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@rv;
}
-sub check_cherry_pick {
+sub check_cherry_pick2 {
my $base = shift;
my $tip = shift;
my $parents = shift;
delete $commits{$commit};
}
}
- return (keys %commits);
+ my @k = (keys %commits);
+ return (scalar @k, $k[0]);
}
sub has_no_changes {
my $hash = shift;
my $path = shift;
- if ($can_use_yaml) {
+ unless ($memo_backend) {
+ if (eval { require Git::SVN::Memoize::YAML; 1}) {
+ $memo_backend = 1;
+ } else {
+ require Memoize::Storable;
+ $memo_backend = -1;
+ }
+ }
+
+ if ($memo_backend > 0) {
tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
} else {
- tie %$hash => 'Memoize::Storable', "$path.db", 'nstore';
+ # first verify that any existing file can actually be loaded
+ # (it may have been saved by an incompatible version)
+ my $db = "$path.db";
+ if (-e $db) {
+ use Storable qw(retrieve);
+
+ if (!eval { retrieve($db); 1 }) {
+ unlink $db or die "unlink $db failed: $!";
+ }
+ }
+ tie %$hash => 'Memoize::Storable', $db, 'nstore';
}
}
return if $memoized;
$memoized = 1;
- my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
+ my $cache_path = svn_dir() . '/.caches/';
mkpath([$cache_path]) unless -d $cache_path;
my %lookup_svn_merge_cache;
- my %check_cherry_pick_cache;
+ my %check_cherry_pick2_cache;
my %has_no_changes_cache;
- my %_rev_list_cache;
tie_for_persistent_memoization(\%lookup_svn_merge_cache,
"$cache_path/lookup_svn_merge");
LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
;
- tie_for_persistent_memoization(\%check_cherry_pick_cache,
- "$cache_path/check_cherry_pick");
- memoize 'check_cherry_pick',
+ tie_for_persistent_memoization(\%check_cherry_pick2_cache,
+ "$cache_path/check_cherry_pick2");
+ memoize 'check_cherry_pick2',
SCALAR_CACHE => 'FAULT',
- LIST_CACHE => ['HASH' => \%check_cherry_pick_cache],
+ LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
;
tie_for_persistent_memoization(\%has_no_changes_cache,
SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
LIST_CACHE => 'FAULT',
;
-
- tie_for_persistent_memoization(\%_rev_list_cache,
- "$cache_path/_rev_list");
- memoize '_rev_list',
- SCALAR_CACHE => 'FAULT',
- LIST_CACHE => ['HASH' => \%_rev_list_cache],
- ;
-
}
sub unmemoize_svn_mergeinfo_functions {
$memoized = 0;
Memoize::unmemoize 'lookup_svn_merge';
- Memoize::unmemoize 'check_cherry_pick';
+ Memoize::unmemoize 'check_cherry_pick2';
Memoize::unmemoize 'has_no_changes';
- Memoize::unmemoize '_rev_list';
}
sub clear_memoized_mergeinfo_caches {
die "Only call this method in non-memoized context" if ($memoized);
- my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
+ my $cache_path = svn_dir() . '/.caches/';
return unless -d $cache_path;
for my $cache_file (("$cache_path/lookup_svn_merge",
- "$cache_path/check_cherry_pick",
+ "$cache_path/check_cherry_pick", # old
+ "$cache_path/check_cherry_pick2",
"$cache_path/has_no_changes")) {
for my $suffix (qw(yaml db)) {
my $file = "$cache_file.$suffix";
return @excluded;
}
+# Compute what's new in svn:mergeinfo.
+sub mergeinfo_changes {
+ my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
+ my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
+ my $old_minfo = {};
+
+ my $ra = $self->ra;
+ # Give up if $old_path isn't in the repo.
+ # This is probably a merge on a subtree.
+ if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
+ warn "W: ignoring svn:mergeinfo on $old_path, ",
+ "directory didn't exist in r$old_rev\n";
+ return {};
+ }
+ my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev);
+ if (defined $props->{"svn:mergeinfo"}) {
+ my %omi = map {split ":", $_ } split "\n",
+ $props->{"svn:mergeinfo"};
+ $old_minfo = \%omi;
+ }
+
+ my %changes = ();
+ foreach my $p (keys %minfo) {
+ my $a = $old_minfo->{$p} || "";
+ my $b = $minfo{$p};
+ # Omit merged branches whose ranges lists are unchanged.
+ next if $a eq $b;
+ # Remove any common range list prefix.
+ ($a ^ $b) =~ /^[\0]*/;
+ my $common_prefix = rindex $b, ",", $+[0] - 1;
+ $changes{$p} = substr $b, $common_prefix + 1;
+ }
+ print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
+ scalar(keys %minfo), " sources, ",
+ scalar(keys %changes), " changed\n";
+
+ return \%changes;
+}
# note: this function should only be called if the various dirprops
# have actually changed
sub find_extra_svn_parents {
- my ($self, $ed, $mergeinfo, $parents) = @_;
+ my ($self, $mergeinfo, $parents) = @_;
# aha! svk:merge property changed...
memoize_svn_mergeinfo_functions();
# history. Then, we figure out which git revisions are in
# that tip, but not this revision. If all of those revisions
# are now marked as merge, we can add the tip as a parent.
- my @merges = split "\n", $mergeinfo;
+ my @merges = sort keys %$mergeinfo;
my @merge_tips;
my $url = $self->url;
my $uuid = $self->ra_uuid;
my @all_ranges;
for my $merge ( @merges ) {
my ($tip_commit, @ranges) =
- lookup_svn_merge( $uuid, $url, $merge );
+ lookup_svn_merge( $uuid, $url,
+ $merge, $mergeinfo->{$merge} );
unless (!$tip_commit or
grep { $_ eq $tip_commit } @$parents ) {
push @merge_tips, $tip_commit;
# check merge tips for new parents
my @new_parents;
for my $merge_tip ( @merge_tips ) {
- my $spec = shift @merges;
+ my $merge = shift @merges;
next unless $merge_tip and $excluded{$merge_tip};
+ my $spec = "$merge:$mergeinfo->{$merge}";
# check out 'new' tips
my $merge_base;
}
# double check that there are no missing non-merge commits
- my (@incomplete) = check_cherry_pick(
+ my ($ninc, $ifirst) = check_cherry_pick2(
$merge_base, $merge_tip,
$parents,
@all_ranges,
);
- if ( @incomplete ) {
- warn "W:svn cherry-pick ignored ($spec) - missing "
- .@incomplete." commit(s) (eg $incomplete[0])\n";
+ if ($ninc) {
+ warn "W: svn cherry-pick ignored ($spec) - missing " .
+ "$ninc commit(s) (eg $ifirst)\n";
} else {
- warn
- "Found merge parent (svn:mergeinfo prop): ",
- $merge_tip, "\n";
+ warn "Found merge parent ($spec): ", $merge_tip, "\n";
push @new_parents, $merge_tip;
}
}
}
sub make_log_entry {
- my ($self, $rev, $parents, $ed) = @_;
+ my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
my $untracked = $self->get_untracked($ed);
my @parents = @$parents;
- my $ps = $ed->{path_strip} || "";
- for my $path ( grep { m/$ps/ } %{$ed->{dir_prop}} ) {
- my $props = $ed->{dir_prop}{$path};
- if ( $props->{"svk:merge"} ) {
- $self->find_extra_svk_parents
- ($ed, $props->{"svk:merge"}, \@parents);
+ my $props = $ed->{dir_prop}{$self->path};
+ if ($self->follow_parent) {
+ my $tickets = $props->{"svk:merge"};
+ if ($tickets) {
+ $self->find_extra_svk_parents($tickets, \@parents);
}
- if ( $props->{"svn:mergeinfo"} ) {
- $self->find_extra_svn_parents
- ($ed,
- $props->{"svn:mergeinfo"},
- \@parents);
+
+ my $mergeinfo_prop = $props->{"svn:mergeinfo"};
+ if ($mergeinfo_prop) {
+ my $mi_changes = $self->mergeinfo_changes(
+ $parent_path,
+ $parent_rev,
+ $self->path,
+ $rev,
+ $mergeinfo_prop);
+ $self->find_extra_svn_parents($mi_changes, \@parents);
}
}
remove_username($full_url);
$log_entry{metadata} = "$full_url\@$r $uuid";
$log_entry{svm_revision} = $r;
- $email ||= "$author\@$uuid";
- $commit_email ||= "$author\@$uuid";
+ $email = "$author\@$uuid" unless defined $email;
+ $commit_email = "$author\@$uuid" unless defined $commit_email;
} elsif ($self->use_svnsync_props) {
my $full_url = canonicalize_url(
add_path_to_url( $self->svnsync->{url}, $self->path )
remove_username($full_url);
my $uuid = $self->svnsync->{uuid};
$log_entry{metadata} = "$full_url\@$rev $uuid";
- $email ||= "$author\@$uuid";
- $commit_email ||= "$author\@$uuid";
+ $email = "$author\@$uuid" unless defined $email;
+ $commit_email = "$author\@$uuid" unless defined $commit_email;
} else {
my $url = $self->metadata_url;
remove_username($url);
my $uuid = $self->rewrite_uuid || $self->ra->get_uuid;
$log_entry{metadata} = "$url\@$rev " . $uuid;
- $email ||= "$author\@" . $uuid;
- $commit_email ||= "$author\@" . $uuid;
+ $email = "$author\@$uuid" unless defined $email;
+ $commit_email = "$author\@$uuid" unless defined $commit_email;
}
$log_entry{name} = $name;
$log_entry{email} = $email;
# both of these options make our .rev_db file very, very important
# and we can't afford to lose it because rebuild() won't work
if ($self->use_svm_props || $self->no_metadata) {
+ require File::Copy;
$sync = 1;
- copy($db, $db_lock) or die "rev_map_set(@_): ",
+ File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ",
"Failed to copy: ",
"$db => $db_lock ($!)\n";
} else {
"refs/remotes/$prefix$default_ref_id";
}
$_[1] = $repo_id;
- my $dir = "$ENV{GIT_DIR}/svn/$ref_id";
+ my $svn_dir = svn_dir();
+ my $dir = "$svn_dir/$ref_id";
- # Older repos imported by us used $GIT_DIR/svn/foo instead of
- # $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo
- if ($ref_id =~ m{^refs/remotes/(.*)}) {
- my $old_dir = "$ENV{GIT_DIR}/svn/$1";
+ # Older repos imported by us used $svn_dir/foo instead of
+ # $svn_dir/refs/remotes/foo when tracking refs/remotes/foo
+ if ($ref_id =~ m{^refs/remotes/(.+)}) {
+ my $old_dir = "$svn_dir/$1";
if (-d $old_dir && ! -d $dir) {
$dir = $old_dir;
}
mkpath([$dir]);
my $obj = bless {
ref_id => $ref_id, dir => $dir, index => "$dir/index",
- config => "$ENV{GIT_DIR}/svn/config",
+ config => "$svn_dir/config",
map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
# Ensure it gets canonicalized