i18n: unpack-trees: avoid substituting only a verb in sentences
[gitweb.git] / perl / Git / SVN.pm
index 8478d0c95293b531547084e19b6680cf73187469..b2c14e2ff5485f49af83530ce1b1921c99641f5c 100644 (file)
@@ -9,12 +9,10 @@ package Git::SVN;
            $_use_log_author $_add_author_from $_localtime/;
 use Carp qw/croak/;
 use File::Path qw/mkpath/;
-use File::Copy qw/copy/;
 use IPC::Open3;
-use Time::Local;
 use Memoize;  # core since 5.8.0, Jul 2002
-use Memoize::Storable;
 use POSIX qw(:signal_h);
+use Time::Local;
 
 use Git qw(
     command
@@ -22,14 +20,18 @@ package Git::SVN;
     command_noisy
     command_output_pipe
     command_close_pipe
+    get_tz_offset
+);
+use Git::SVN::Utils qw(
+       fatal
+       can_compress
+       join_paths
+       canonicalize_path
+       canonicalize_url
+       add_path_to_url
 );
-use Git::SVN::Utils qw(fatal can_compress);
-
-my $can_use_yaml;
-BEGIN {
-       $can_use_yaml = eval { require Git::SVN::Memoize::YAML; 1};
-}
 
+my $memo_backend;
 our $_follow_parent  = 1;
 our $_minimize_url   = 'unset';
 our $default_repo_id = 'svn';
@@ -195,9 +197,9 @@ sub read_all_remotes {
                } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
                        $r->{$1}->{svm} = {};
                } elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
-                       $r->{$1}->{url} = $2;
+                       $r->{$1}->{url} = canonicalize_url($2);
                } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
-                       $r->{$1}->{pushurl} = $2;
+                       $r->{$1}->{pushurl} = canonicalize_url($2);
                } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
                        $r->{$1}->{ignore_refs_regex} = $2;
                } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
@@ -290,7 +292,7 @@ sub find_existing_remote {
 
 sub init_remote_config {
        my ($self, $url, $no_write) = @_;
-       $url =~ s!/+$!!; # strip trailing slash
+       $url = canonicalize_url($url);
        my $r = read_all_remotes();
        my $existing = find_existing_remote($url, $r);
        if ($existing) {
@@ -314,12 +316,10 @@ sub init_remote_config {
                                print STDERR "Using higher level of URL: ",
                                             "$url => $min_url\n";
                        }
-                       my $old_path = $self->{path};
-                       $self->{path} = $url;
-                       $self->{path} =~ s!^\Q$min_url\E(/|$)!!;
-                       if (length $old_path) {
-                               $self->{path} .= "/$old_path";
-                       }
+                       my $old_path = $self->path;
+                       $url =~ s!^\Q$min_url\E(/|$)!!;
+                       $url = join_paths($url, $old_path);
+                       $self->path($url);
                        $url = $min_url;
                }
        }
@@ -343,18 +343,22 @@ sub init_remote_config {
        unless ($no_write) {
                command_noisy('config',
                              "svn-remote.$self->{repo_id}.url", $url);
-               $self->{path} =~ s{^/}{};
-               $self->{path} =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
+               my $path = $self->path;
+               $path =~ s{^/}{};
+               $path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
+               $self->path($path);
                command_noisy('config', '--add',
                              "svn-remote.$self->{repo_id}.fetch",
-                             "$self->{path}:".$self->refname);
+                             $self->path.":".$self->refname);
        }
-       $self->{url} = $url;
+       $self->url($url);
 }
 
 sub find_by_url { # repos_root and, path are optional
        my ($class, $full_url, $repos_root, $path) = @_;
 
+       $full_url = canonicalize_url($full_url);
+
        return undef unless defined $full_url;
        remove_username($full_url);
        remove_username($repos_root) if defined $repos_root;
@@ -393,6 +397,11 @@ sub find_by_url { # repos_root and, path are optional
                        }
                        $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
                }
+
+               # remote fetch paths are not URI escaped.  Decode ours
+               # so they match
+               $p = uri_decode($p);
+
                foreach my $f (keys %$fetch) {
                        next if $f ne $p;
                        return Git::SVN->new($fetch->{$f}, $repo_id, $f);
@@ -435,20 +444,25 @@ sub new {
                }
        }
        my $self = _new($class, $repo_id, $ref_id, $path);
-       if (!defined $self->{path} || !length $self->{path}) {
+       if (!defined $self->path || !length $self->path) {
                my $fetch = command_oneline('config', '--get',
                                            "svn-remote.$repo_id.fetch",
                                            ":$ref_id\$") or
                     die "Failed to read \"svn-remote.$repo_id.fetch\" ",
                         "\":$ref_id\$\" in config\n";
-               ($self->{path}, undef) = split(/\s*:\s*/, $fetch);
+               my($path) = split(/\s*:\s*/, $fetch);
+               $self->path($path);
        }
-       $self->{path} =~ s{/+}{/}g;
-       $self->{path} =~ s{\A/}{};
-       $self->{path} =~ s{/\z}{};
-       $self->{url} = command_oneline('config', '--get',
-                                      "svn-remote.$repo_id.url") or
+       {
+               my $path = $self->path;
+               $path =~ s{\A/}{};
+               $path =~ s{/\z}{};
+               $self->path($path);
+       }
+       my $url = command_oneline('config', '--get',
+                                 "svn-remote.$repo_id.url") or
                   die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
+       $self->url($url);
        $self->{pushurl} = eval { command_oneline('config', '--get',
                                  "svn-remote.$repo_id.pushurl") };
        $self->rebuild;
@@ -461,8 +475,8 @@ sub refname {
        # It cannot end with a slash /, we'll throw up on this because
        # SVN can't have directories with a slash in their name, either:
        if ($refname =~ m{/$}) {
-               die "ref: '$refname' ends with a trailing slash, this is ",
-                   "not permitted by git nor Subversion\n";
+               die "ref: '$refname' ends with a trailing slash; this is ",
+                   "not permitted by git or Subversion\n";
        }
 
        # It cannot have ASCII control character space, tilde ~, caret ^,
@@ -471,7 +485,7 @@ sub refname {
        #
        # Additionally, % must be escaped because it is used for escaping
        # and we want our escaped refname to be reversible
-       $refname =~ s{([ \%~\^:\?\*\[\t])}{uc sprintf('%%%02x',ord($1))}eg;
+       $refname =~ s{([ \%~\^:\?\*\[\t])}{sprintf('%%%02X',ord($1))}eg;
 
        # no slash-separated component can begin with a dot .
        # /.* becomes /%2E*
@@ -552,8 +566,7 @@ sub _set_svm_vars {
                # username is of no interest
                $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
 
-               my $replace = $ra->{url};
-               $replace .= "/$path" if length $path;
+               my $replace = add_path_to_url($ra->url, $path);
 
                my $section = "svn-remote.$self->{repo_id}";
                tmp_config("$section.svm-source", $src);
@@ -567,20 +580,21 @@ sub _set_svm_vars {
        }
 
        my $r = $ra->get_latest_revnum;
-       my $path = $self->{path};
+       my $path = $self->path;
        my %tried;
        while (length $path) {
-               unless ($tried{"$self->{url}/$path"}) {
+               my $try = add_path_to_url($self->url, $path);
+               unless ($tried{$try}) {
                        return $ra if $self->read_svm_props($ra, $path, $r);
-                       $tried{"$self->{url}/$path"} = 1;
+                       $tried{$try} = 1;
                }
                $path =~ s#/?[^/]+$##;
        }
        die "Path: '$path' should be ''\n" if $path ne '';
        return $ra if $self->read_svm_props($ra, $path, $r);
-       $tried{"$self->{url}/$path"} = 1;
+       $tried{ add_path_to_url($self->url, $path) } = 1;
 
-       if ($ra->{repos_root} eq $self->{url}) {
+       if ($ra->{repos_root} eq $self->url) {
                die @err, (map { "  $_\n" } keys %tried), "\n";
        }
 
@@ -590,20 +604,21 @@ sub _set_svm_vars {
        $path = $ra->{svn_path};
        $ra = Git::SVN::Ra->new($ra->{repos_root});
        while (length $path) {
-               unless ($tried{"$ra->{url}/$path"}) {
+               my $try = add_path_to_url($ra->url, $path);
+               unless ($tried{$try}) {
                        $ok = $self->read_svm_props($ra, $path, $r);
                        last if $ok;
-                       $tried{"$ra->{url}/$path"} = 1;
+                       $tried{$try} = 1;
                }
                $path =~ s#/?[^/]+$##;
        }
        die "Path: '$path' should be ''\n" if $path ne '';
        $ok ||= $self->read_svm_props($ra, $path, $r);
-       $tried{"$ra->{url}/$path"} = 1;
+       $tried{ add_path_to_url($ra->url, $path) } = 1;
        if (!$ok) {
                die @err, (map { "  $_\n" } keys %tried), "\n";
        }
-       Git::SVN::Ra->new($self->{url});
+       Git::SVN::Ra->new($self->url);
 }
 
 sub svnsync {
@@ -670,7 +685,7 @@ sub ra_uuid {
                if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
                        $self->{ra_uuid} = $uuid;
                } else {
-                       die "ra_uuid called without URL\n" unless $self->{url};
+                       die "ra_uuid called without URL\n" unless $self->url;
                        $self->{ra_uuid} = $self->ra->get_uuid;
                        tmp_config('--add', $key, $self->{ra_uuid});
                }
@@ -694,7 +709,7 @@ sub repos_root {
 
 sub ra {
        my ($self) = shift;
-       my $ra = Git::SVN::Ra->new($self->{url});
+       my $ra = Git::SVN::Ra->new($self->url);
        $self->_set_repos_root($ra->{repos_root});
        if ($self->use_svm_props && !$self->{svm}) {
                if ($self->no_metadata) {
@@ -728,7 +743,7 @@ sub prop_walk {
        $path =~ s#^/*#/#g;
        my $p = $path;
        # Strip the irrelevant part of the path.
-       $p =~ s#^/+\Q$self->{path}\E(/|$)#/#;
+       $p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#;
        # Ensure the path is terminated by a `/'.
        $p =~ s#/*$#/#;
 
@@ -749,7 +764,7 @@ sub prop_walk {
 
        foreach (sort keys %$dirent) {
                next if $dirent->{$_}->{kind} != $SVN::Node::dir;
-               $self->prop_walk($self->{path} . $p . $_, $rev, $sub);
+               $self->prop_walk($self->path . $p . $_, $rev, $sub);
        }
 }
 
@@ -919,20 +934,19 @@ sub rewrite_uuid {
 
 sub metadata_url {
        my ($self) = @_;
-       ($self->rewrite_root || $self->{url}) .
-          (length $self->{path} ? '/' . $self->{path} : '');
+       my $url = $self->rewrite_root || $self->url;
+       return canonicalize_url( add_path_to_url( $url, $self->path ) );
 }
 
 sub full_url {
        my ($self) = @_;
-       $self->{url} . (length $self->{path} ? '/' . $self->{path} : '');
+       return canonicalize_url( add_path_to_url( $self->url, $self->path ) );
 }
 
 sub full_pushurl {
        my ($self) = @_;
        if ($self->{pushurl}) {
-               return $self->{pushurl} . (length $self->{path} ? '/' .
-                      $self->{path} : '');
+               return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) );
        } else {
                return $self->full_url;
        }
@@ -1048,20 +1062,20 @@ sub do_git_commit {
 
 sub match_paths {
        my ($self, $paths, $r) = @_;
-       return 1 if $self->{path} eq '';
-       if (my $path = $paths->{"/$self->{path}"}) {
+       return 1 if $self->path eq '';
+       if (my $path = $paths->{"/".$self->path}) {
                return ($path->{action} eq 'D') ? 0 : 1;
        }
-       $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\//;
+       $self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/};
        if (grep /$self->{path_regex}/, keys %$paths) {
                return 1;
        }
        my $c = '';
-       foreach (split m#/#, $self->{path}) {
+       foreach (split m#/#, $self->path) {
                $c .= "/$_";
                next unless ($paths->{$c} &&
                             ($paths->{$c}->{action} =~ /^[AR]$/));
-               if ($self->ra->check_path($self->{path}, $r) ==
+               if ($self->ra->check_path($self->path, $r) ==
                    $SVN::Node::dir) {
                        return 1;
                }
@@ -1075,14 +1089,14 @@ sub find_parent_branch {
        unless (defined $paths) {
                my $err_handler = $SVN::Error::handler;
                $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
-               $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1,
+               $self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1,
                                   sub { $paths = $_[0] });
                $SVN::Error::handler = $err_handler;
        }
        return undef unless defined $paths;
 
        # look for a parent from another branch:
-       my @b_path_components = split m#/#, $self->{path};
+       my @b_path_components = split m#/#, $self->path;
        my @a_path_components;
        my $i;
        while (@b_path_components) {
@@ -1099,8 +1113,8 @@ sub find_parent_branch {
        }
        my $r = $i->{copyfrom_rev};
        my $repos_root = $self->ra->{repos_root};
-       my $url = $self->ra->{url};
-       my $new_url = $url . $branch_from;
+       my $url = $self->ra->url;
+       my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) );
        print STDERR  "Found possible branch point: ",
                      "$new_url => ", $self->full_url, ", $r\n"
                      unless $::_q > 1;
@@ -1114,7 +1128,7 @@ sub find_parent_branch {
                        ($base, $head) = parse_revision_argument(0, $r);
                } else {
                        if ($r0 < $r) {
-                               $gs->ra->get_log([$gs->{path}], $r0 + 1, $r, 1,
+                               $gs->ra->get_log([$gs->path], $r0 + 1, $r, 1,
                                        0, 1, sub { $base = $_[1] - 1 });
                        }
                }
@@ -1136,7 +1150,7 @@ sub find_parent_branch {
                        # at the moment), so we can't rely on it
                        $self->{last_rev} = $r0;
                        $self->{last_commit} = $parent;
-                       $ed = Git::SVN::Fetcher->new($self, $gs->{path});
+                       $ed = Git::SVN::Fetcher->new($self, $gs->path);
                        $gs->ra->gs_do_switch($r0, $rev, $gs,
                                              $self->full_url, $ed)
                          or die "SVN connection failed somewhere...\n";
@@ -1159,7 +1173,7 @@ sub find_parent_branch {
                          or die "SVN connection failed somewhere...\n";
                }
                print STDERR "Successfully followed parent\n" unless $::_q > 1;
-               return $self->make_log_entry($rev, [$parent], $ed);
+               return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
        }
        return undef;
 }
@@ -1172,7 +1186,7 @@ sub do_fetch {
                # we can have a branch that was deleted, then re-added
                # under the same name but copied from another path, in
                # which case we'll have multiple parents (we don't
-               # want to break the original ref, nor lose copypath info):
+               # want to break the original ref or lose copypath info):
                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
                        push @{$log_entry->{parents}}, $lc;
                        return $log_entry;
@@ -1191,26 +1205,93 @@ sub do_fetch {
        unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
                die "SVN connection failed somewhere...\n";
        }
-       $self->make_log_entry($rev, \@parents, $ed);
+       $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
 }
 
 sub mkemptydirs {
        my ($self, $r) = @_;
 
+       # add/remove/collect a paths table
+       #
+       # Paths are split into a tree of nodes, stored as a hash of hashes.
+       #
+       # Each node contains a 'path' entry for the path (if any) associated
+       # with that node and a 'children' entry for any nodes under that
+       # location.
+       #
+       # Removing a path requires a hash lookup for each component then
+       # dropping that node (and anything under it), which is substantially
+       # faster than a grep slice into a single hash of paths for large
+       # numbers of paths.
+       #
+       # For a large (200K) number of empty_dir directives this reduces
+       # scanning time to 3 seconds vs 10 minutes for grep+delete on a single
+       # hash of paths.
+       sub add_path {
+               my ($paths_table, $path) = @_;
+               my $node_ref;
+
+               foreach my $x (split('/', $path)) {
+                       if (!exists($paths_table->{$x})) {
+                               $paths_table->{$x} = { children => {} };
+                       }
+
+                       $node_ref = $paths_table->{$x};
+                       $paths_table = $paths_table->{$x}->{children};
+               }
+
+               $node_ref->{path} = $path;
+       }
+
+       sub remove_path {
+               my ($paths_table, $path) = @_;
+               my $nodes_ref;
+               my $node_name;
+
+               foreach my $x (split('/', $path)) {
+                       if (!exists($paths_table->{$x})) {
+                               return;
+                       }
+
+                       $nodes_ref = $paths_table;
+                       $node_name = $x;
+
+                       $paths_table = $paths_table->{$x}->{children};
+               }
+
+               delete($nodes_ref->{$node_name});
+       }
+
+       sub collect_paths {
+               my ($paths_table, $paths_ref) = @_;
+
+               foreach my $v (values %$paths_table) {
+                       my $p = $v->{path};
+                       my $c = $v->{children};
+
+                       collect_paths($c, $paths_ref);
+
+                       if (defined($p)) {
+                               push(@$paths_ref, $p);
+                       }
+               }
+       }
+
        sub scan {
-               my ($r, $empty_dirs, $line) = @_;
+               my ($r, $paths_table, $line) = @_;
                if (defined $r && $line =~ /^r(\d+)$/) {
                        return 0 if $1 > $r;
                } elsif ($line =~ /^  \+empty_dir: (.+)$/) {
-                       $empty_dirs->{$1} = 1;
+                       add_path($paths_table, $1);
                } elsif ($line =~ /^  \-empty_dir: (.+)$/) {
-                       my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs);
-                       delete @$empty_dirs{@d};
+                       remove_path($paths_table, $1);
                }
                1; # continue
        };
 
-       my %empty_dirs = ();
+       my @empty_dirs;
+       my %paths_table;
+
        my $gz_file = "$self->{dir}/unhandled.log.gz";
        if (-f $gz_file) {
                if (!can_compress()) {
@@ -1221,7 +1302,7 @@ sub mkemptydirs {
                                die "Unable to open $gz_file: $!\n";
                        my $line;
                        while ($gz->gzreadline($line) > 0) {
-                               scan($r, \%empty_dirs, $line) or last;
+                               scan($r, \%paths_table, $line) or last;
                        }
                        $gz->gzclose;
                }
@@ -1230,13 +1311,14 @@ sub mkemptydirs {
        if (open my $fh, '<', "$self->{dir}/unhandled.log") {
                binmode $fh or croak "binmode: $!";
                while (<$fh>) {
-                       scan($r, \%empty_dirs, $_) or last;
+                       scan($r, \%paths_table, $_) or last;
                }
                close $fh;
        }
 
-       my $strip = qr/\A\Q$self->{path}\E(?:\/|$)/;
-       foreach my $d (sort keys %empty_dirs) {
+       collect_paths(\%paths_table, \@empty_dirs);
+       my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
+       foreach my $d (sort @empty_dirs) {
                $d = uri_decode($d);
                $d =~ s/$strip//;
                next unless length($d);
@@ -1292,14 +1374,6 @@ sub get_untracked {
        \@out;
 }
 
-sub get_tz {
-       # some systmes don't handle or mishandle %z, so be creative.
-       my $t = shift || time;
-       my $gm = timelocal(gmtime($t));
-       my $sign = qw( + + - )[ $t <=> $gm ];
-       return sprintf("%s%02d%02d", $sign, (gmtime(abs($t - $gm)))[2,1]);
-}
-
 # parse_svn_date(DATE)
 # --------------------
 # Given a date (in UTC) from Subversion, return a string in the format
@@ -1310,7 +1384,7 @@ sub get_tz {
 sub parse_svn_date {
        my $date = shift || return '+0000 1970-01-01 00:00:00';
        my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
-                                           (\d\d)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
+                                           (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
                                         croak "Unable to parse date: $date\n";
        my $parsed_date;    # Set next.
 
@@ -1321,7 +1395,7 @@ sub parse_svn_date {
                $ENV{TZ} = 'UTC';
 
                my $epoch_in_UTC =
-                   POSIX::strftime('%s', $S, $M, $H, $d, $m - 1, $Y - 1900);
+                   Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y - 1900);
 
                # Determine our local timezone (including DST) at the
                # time of $epoch_in_UTC.  $Git::SVN::Log::TZ stored the
@@ -1332,7 +1406,7 @@ sub parse_svn_date {
                        delete $ENV{TZ};
                }
 
-               my $our_TZ = get_tz();
+               my $our_TZ = get_tz_offset();
 
                # This converts $epoch_in_UTC into our local timezone.
                my ($sec, $min, $hour, $mday, $mon, $year,
@@ -1422,19 +1496,18 @@ sub check_author {
 }
 
 sub find_extra_svk_parents {
-       my ($self, $ed, $tickets, $parents) = @_;
+       my ($self, $tickets, $parents) = @_;
        # aha!  svk:merge property changed...
        my @tickets = split "\n", $tickets;
        my @known_parents;
        for my $ticket ( @tickets ) {
                my ($uuid, $path, $rev) = split /:/, $ticket;
                if ( $uuid eq $self->ra_uuid ) {
-                       my $url = $self->{url};
-                       my $repos_root = $url;
+                       my $repos_root = $self->url;
                        my $branch_from = $path;
                        $branch_from =~ s{^/}{};
-                       my $gs = $self->other_gs($repos_root."/".$branch_from,
-                                                $url,
+                       my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ),
+                                                $repos_root,
                                                 $branch_from,
                                                 $rev,
                                                 $self->{ref_id});
@@ -1468,9 +1541,9 @@ sub find_extra_svk_parents {
 sub lookup_svn_merge {
        my $uuid = shift;
        my $url = shift;
-       my $merge = shift;
+       my $source = shift;
+       my $revs = shift;
 
-       my ($source, $revs) = split ":", $merge;
        my $path = $source;
        $path =~ s{^/}{};
        my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@@ -1483,13 +1556,18 @@ sub lookup_svn_merge {
        my @merged_commit_ranges;
        # find the tip
        for my $range ( @ranges ) {
+               if ($range =~ /[*]$/) {
+                       warn "W: Ignoring partial merge in svn:mergeinfo "
+                               ."dirprop: $source:$range\n";
+                       next;
+               }
                my ($bottom, $top) = split "-", $range;
                $top ||= $bottom;
                my $bottom_commit = $gs->find_rev_after( $bottom, 1, $top );
                my $top_commit = $gs->find_rev_before( $top, 1, $bottom );
 
                unless ($top_commit and $bottom_commit) {
-                       warn "W:unknown path/rev in svn:mergeinfo "
+                       warn "W: unknown path/rev in svn:mergeinfo "
                                ."dirprop: $source:$range\n";
                        next;
                }
@@ -1522,7 +1600,7 @@ sub _rev_list {
        @rv;
 }
 
-sub check_cherry_pick {
+sub check_cherry_pick2 {
        my $base = shift;
        my $tip = shift;
        my $parents = shift;
@@ -1537,7 +1615,8 @@ sub check_cherry_pick {
                        delete $commits{$commit};
                }
        }
-       return (keys %commits);
+       my @k = (keys %commits);
+       return (scalar @k, $k[0]);
 }
 
 sub has_no_changes {
@@ -1562,7 +1641,16 @@ sub tie_for_persistent_memoization {
        my $hash = shift;
        my $path = shift;
 
-       if ($can_use_yaml) {
+       unless ($memo_backend) {
+               if (eval { require Git::SVN::Memoize::YAML; 1}) {
+                       $memo_backend = 1;
+               } else {
+                       require Memoize::Storable;
+                       $memo_backend = -1;
+               }
+       }
+
+       if ($memo_backend > 0) {
                tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
        } else {
                tie %$hash => 'Memoize::Storable', "$path.db", 'nstore';
@@ -1582,7 +1670,7 @@ sub tie_for_persistent_memoization {
                mkpath([$cache_path]) unless -d $cache_path;
 
                my %lookup_svn_merge_cache;
-               my %check_cherry_pick_cache;
+               my %check_cherry_pick2_cache;
                my %has_no_changes_cache;
 
                tie_for_persistent_memoization(\%lookup_svn_merge_cache,
@@ -1592,11 +1680,11 @@ sub tie_for_persistent_memoization {
                        LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
                ;
 
-               tie_for_persistent_memoization(\%check_cherry_pick_cache,
-                   "$cache_path/check_cherry_pick");
-               memoize 'check_cherry_pick',
+               tie_for_persistent_memoization(\%check_cherry_pick2_cache,
+                   "$cache_path/check_cherry_pick2");
+               memoize 'check_cherry_pick2',
                        SCALAR_CACHE => 'FAULT',
-                       LIST_CACHE => ['HASH' => \%check_cherry_pick_cache],
+                       LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
                ;
 
                tie_for_persistent_memoization(\%has_no_changes_cache,
@@ -1612,7 +1700,7 @@ sub tie_for_persistent_memoization {
                $memoized = 0;
 
                Memoize::unmemoize 'lookup_svn_merge';
-               Memoize::unmemoize 'check_cherry_pick';
+               Memoize::unmemoize 'check_cherry_pick2';
                Memoize::unmemoize 'has_no_changes';
        }
 
@@ -1623,7 +1711,8 @@ sub tie_for_persistent_memoization {
                return unless -d $cache_path;
 
                for my $cache_file (("$cache_path/lookup_svn_merge",
-                                    "$cache_path/check_cherry_pick",
+                                    "$cache_path/check_cherry_pick", # old
+                                    "$cache_path/check_cherry_pick2",
                                     "$cache_path/has_no_changes")) {
                        for my $suffix (qw(yaml db)) {
                                my $file = "$cache_file.$suffix";
@@ -1661,7 +1750,6 @@ sub parents_exclude {
                                if ( $commit eq $excluded ) {
                                        push @excluded, $commit;
                                        $found++;
-                                       last;
                                }
                                else {
                                        push @new, $commit;
@@ -1678,11 +1766,49 @@ sub parents_exclude {
        return @excluded;
 }
 
+# Compute what's new in svn:mergeinfo.
+sub mergeinfo_changes {
+       my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
+       my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
+       my $old_minfo = {};
+
+       my $ra = $self->ra;
+       # Give up if $old_path isn't in the repo.
+       # This is probably a merge on a subtree.
+       if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
+               warn "W: ignoring svn:mergeinfo on $old_path, ",
+                       "directory didn't exist in r$old_rev\n";
+               return {};
+       }
+       my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev);
+       if (defined $props->{"svn:mergeinfo"}) {
+               my %omi = map {split ":", $_ } split "\n",
+                       $props->{"svn:mergeinfo"};
+               $old_minfo = \%omi;
+       }
+
+       my %changes = ();
+       foreach my $p (keys %minfo) {
+               my $a = $old_minfo->{$p} || "";
+               my $b = $minfo{$p};
+               # Omit merged branches whose ranges lists are unchanged.
+               next if $a eq $b;
+               # Remove any common range list prefix.
+               ($a ^ $b) =~ /^[\0]*/;
+               my $common_prefix = rindex $b, ",", $+[0] - 1;
+               $changes{$p} = substr $b, $common_prefix + 1;
+       }
+       print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
+               scalar(keys %minfo), " sources, ",
+               scalar(keys %changes), " changed\n";
+
+       return \%changes;
+}
 
 # note: this function should only be called if the various dirprops
 # have actually changed
 sub find_extra_svn_parents {
-       my ($self, $ed, $mergeinfo, $parents) = @_;
+       my ($self, $mergeinfo, $parents) = @_;
        # aha!  svk:merge property changed...
 
        memoize_svn_mergeinfo_functions();
@@ -1691,18 +1817,19 @@ sub find_extra_svn_parents {
        # history.  Then, we figure out which git revisions are in
        # that tip, but not this revision.  If all of those revisions
        # are now marked as merge, we can add the tip as a parent.
-       my @merges = split "\n", $mergeinfo;
+       my @merges = sort keys %$mergeinfo;
        my @merge_tips;
-       my $url = $self->{url};
+       my $url = $self->url;
        my $uuid = $self->ra_uuid;
-       my %ranges;
+       my @all_ranges;
        for my $merge ( @merges ) {
                my ($tip_commit, @ranges) =
-                       lookup_svn_merge( $uuid, $url, $merge );
+                       lookup_svn_merge( $uuid, $url,
+                                         $merge, $mergeinfo->{$merge} );
                unless (!$tip_commit or
                                grep { $_ eq $tip_commit } @$parents ) {
                        push @merge_tips, $tip_commit;
-                       $ranges{$tip_commit} = \@ranges;
+                       push @all_ranges, @ranges;
                } else {
                        push @merge_tips, undef;
                }
@@ -1714,10 +1841,9 @@ sub find_extra_svn_parents {
        # check merge tips for new parents
        my @new_parents;
        for my $merge_tip ( @merge_tips ) {
-               my $spec = shift @merges;
+               my $merge = shift @merges;
                next unless $merge_tip and $excluded{$merge_tip};
-
-               my $ranges = $ranges{$merge_tip};
+               my $spec = "$merge:$mergeinfo->{$merge}";
 
                # check out 'new' tips
                my $merge_base;
@@ -1737,19 +1863,17 @@ sub find_extra_svn_parents {
                }
 
                # double check that there are no missing non-merge commits
-               my (@incomplete) = check_cherry_pick(
+               my ($ninc, $ifirst) = check_cherry_pick2(
                        $merge_base, $merge_tip,
                        $parents,
-                       @$ranges,
+                       @all_ranges,
                       );
 
-               if ( @incomplete ) {
-                       warn "W:svn cherry-pick ignored ($spec) - missing "
-                               .@incomplete." commit(s) (eg $incomplete[0])\n";
+               if ($ninc) {
+                       warn "W: svn cherry-pick ignored ($spec) - missing " .
+                               "$ninc commit(s) (eg $ifirst)\n";
                } else {
-                       warn
-                               "Found merge parent (svn:mergeinfo prop): ",
-                                       $merge_tip, "\n";
+                       warn "Found merge parent ($spec): ", $merge_tip, "\n";
                        push @new_parents, $merge_tip;
                }
        }
@@ -1775,23 +1899,20 @@ sub find_extra_svn_parents {
 }
 
 sub make_log_entry {
-       my ($self, $rev, $parents, $ed) = @_;
+       my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
        my $untracked = $self->get_untracked($ed);
 
        my @parents = @$parents;
-       my $ps = $ed->{path_strip} || "";
-       for my $path ( grep { m/$ps/ } %{$ed->{dir_prop}} ) {
-               my $props = $ed->{dir_prop}{$path};
-               if ( $props->{"svk:merge"} ) {
-                       $self->find_extra_svk_parents
-                               ($ed, $props->{"svk:merge"}, \@parents);
-               }
-               if ( $props->{"svn:mergeinfo"} ) {
-                       $self->find_extra_svn_parents
-                               ($ed,
-                                $props->{"svn:mergeinfo"},
-                                \@parents);
-               }
+       my $props = $ed->{dir_prop}{$self->path};
+       if ( $props->{"svk:merge"} ) {
+               $self->find_extra_svk_parents($props->{"svk:merge"}, \@parents);
+       }
+       if ( $props->{"svn:mergeinfo"} ) {
+               my $mi_changes = $self->mergeinfo_changes
+                       ($parent_path, $parent_rev,
+                        $self->path, $rev,
+                        $props->{"svn:mergeinfo"});
+               $self->find_extra_svn_parents($mi_changes, \@parents);
        }
 
        open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
@@ -1875,8 +1996,9 @@ sub make_log_entry {
                $email ||= "$author\@$uuid";
                $commit_email ||= "$author\@$uuid";
        } elsif ($self->use_svnsync_props) {
-               my $full_url = $self->svnsync->{url};
-               $full_url .= "/$self->{path}" if length $self->{path};
+               my $full_url = canonicalize_url(
+                       add_path_to_url( $self->svnsync->{url}, $self->path )
+               );
                remove_username($full_url);
                my $uuid = $self->svnsync->{uuid};
                $log_entry{metadata} = "$full_url\@$rev $uuid";
@@ -1923,7 +2045,7 @@ sub set_tree {
                        tree_b => $tree,
                        editor_cb => sub {
                               $self->set_tree_cb($log_entry, $tree, @_) },
-                       svn_path => $self->{path} );
+                       svn_path => $self->path );
        if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) {
                print "No changes\nr$self->{last_rev} = $tree\n";
        }
@@ -1946,11 +2068,25 @@ sub rebuild_from_rev_db {
        unlink $path or croak "unlink: $!";
 }
 
+#define a global associate map to record rebuild status
+my %rebuild_status;
+#define a global associate map to record rebuild verify status
+my %rebuild_verify_status;
+
 sub rebuild {
        my ($self) = @_;
        my $map_path = $self->map_path;
        my $partial = (-e $map_path && ! -z $map_path);
-       return unless ::verify_ref($self->refname.'^0');
+       my $verify_key = $self->refname.'^0';
+       if (!$rebuild_verify_status{$verify_key}) {
+               my $verify_result = ::verify_ref($verify_key);
+               if ($verify_result) {
+                       $rebuild_verify_status{$verify_key} = 1;
+               }
+       }
+       if (!$rebuild_verify_status{$verify_key}) {
+               return;
+       }
        if (!$partial && ($self->use_svm_props || $self->no_metadata)) {
                my $rev_db = $self->rev_db_path;
                $self->rebuild_from_rev_db($rev_db);
@@ -1964,10 +2100,21 @@ sub rebuild {
        print "Rebuilding $map_path ...\n" if (!$partial);
        my ($base_rev, $head) = ($partial ? $self->rev_map_max_norebuild(1) :
                (undef, undef));
+       my $key_value = ($head ? "$head.." : "") . $self->refname;
+       if (exists $rebuild_status{$key_value}) {
+               print "Done rebuilding $map_path\n" if (!$partial || !$head);
+               my $rev_db_path = $self->rev_db_path;
+               if (-f $self->rev_db_path) {
+                       unlink $self->rev_db_path or croak "unlink: $!";
+               }
+               $self->unlink_rev_db_symlink;
+               return;
+       }
        my ($log, $ctx) =
-           command_output_pipe(qw/rev-list --pretty=raw --reverse/,
-                               ($head ? "$head.." : "") . $self->refname,
+               command_output_pipe(qw/rev-list --pretty=raw --reverse/,
+                               $key_value,
                                '--');
+       $rebuild_status{$key_value} = 1;
        my $metadata_url = $self->metadata_url;
        remove_username($metadata_url);
        my $svn_uuid = $self->rewrite_uuid || $self->ra_uuid;
@@ -2113,8 +2260,9 @@ sub rev_map_set {
        # both of these options make our .rev_db file very, very important
        # and we can't afford to lose it because rebuild() won't work
        if ($self->use_svm_props || $self->no_metadata) {
+               require File::Copy;
                $sync = 1;
-               copy($db, $db_lock) or die "rev_map_set(@_): ",
+               File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ",
                                           "Failed to copy: ",
                                           "$db => $db_lock ($!)\n";
        } else {
@@ -2290,7 +2438,7 @@ sub _new {
 
        # Older repos imported by us used $GIT_DIR/svn/foo instead of
        # $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo
-       if ($ref_id =~ m{^refs/remotes/(.*)}) {
+       if ($ref_id =~ m{^refs/remotes/(.+)}) {
                my $old_dir = "$ENV{GIT_DIR}/svn/$1";
                if (-d $old_dir && ! -d $dir) {
                        $dir = $old_dir;
@@ -2299,10 +2447,39 @@ sub _new {
 
        $_[3] = $path = '' unless (defined $path);
        mkpath([$dir]);
-       bless {
+       my $obj = bless {
                ref_id => $ref_id, dir => $dir, index => "$dir/index",
-               path => $path, config => "$ENV{GIT_DIR}/svn/config",
+               config => "$ENV{GIT_DIR}/svn/config",
                map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
+
+       # Ensure it gets canonicalized
+       $obj->path($path);
+
+       return $obj;
+}
+
+sub path {
+       my $self = shift;
+
+       if (@_) {
+               my $path = shift;
+               $self->{_path} = canonicalize_path($path);
+               return;
+       }
+
+       return $self->{_path};
+}
+
+sub url {
+       my $self = shift;
+
+       if (@_) {
+               my $url = shift;
+               $self->{url} = canonicalize_url($url);
+               return;
+       }
+
+       return $self->{url};
 }
 
 # for read-only access of old .rev_db formats
@@ -2332,7 +2509,7 @@ sub map_path {
 
 sub uri_encode {
        my ($f) = @_;
-       $f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#uc sprintf("%%%02x",ord($1))#eg;
+       $f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#sprintf("%%%02X",ord($1))#eg;
        $f
 }