Merge branch 'ms/git-svn-1.7'
authorJunio C Hamano <gitster@pobox.com>
Wed, 22 Aug 2012 18:51:20 +0000 (11:51 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 22 Aug 2012 18:51:20 +0000 (11:51 -0700)
A series by Michael Schwern via Eric to update git-svn to revamp the
way URLs are internally passed around, to make it work with SVN 1.7.

* ms/git-svn-1.7:
git-svn: remove ad-hoc canonicalizations
git-svn: canonicalize newly-minted URLs
git-svn: introduce add_path_to_url function
git-svn: canonicalize earlier
git-svn: replace URL escapes with canonicalization
git-svn: attempt to mimic SVN 1.7 URL canonicalization
t9107: fix typo
t9118: workaround inconsistency between SVN versions
Git::SVN{,::Ra}: canonicalize earlier
git-svn: path canonicalization uses SVN API
Git::SVN::Utils: remove irrelevant comment
git-svn: add join_paths() to safely concatenate paths
git-svn: factor out _collapse_dotdot function
git-svn: use SVN 1.7 to canonicalize when possible
git-svn: move canonicalization to Git::SVN::Utils
use Git::SVN{,::RA}->url accessor globally
use Git::SVN->path accessor globally
Git::SVN::Ra: use accessor for URLs
Git::SVN: use accessor for URLs internally
Git::SVN: use accessors internally for path

12 files changed:
git-svn.perl
perl/Git/SVN.pm
perl/Git/SVN/Fetcher.pm
perl/Git/SVN/Migration.pm
perl/Git/SVN/Ra.pm
perl/Git/SVN/Utils.pm
t/Git-SVN/Utils/add_path_to_url.t [new file with mode: 0644]
t/Git-SVN/Utils/canonicalize_url.t [new file with mode: 0644]
t/Git-SVN/Utils/collapse_dotdot.t [new file with mode: 0644]
t/Git-SVN/Utils/join_paths.t [new file with mode: 0644]
t/t9107-git-svn-migrate.sh
t/t9118-git-svn-funky-branch-names.sh
index 828b8f0c8e6de81593db108495565e591cd91363..0d77ffb0b92b9e94454aeb94b434d9f86acd8a13 100755 (executable)
 use Git::SVN::Log;
 use Git::SVN::Migration;
 
-use Git::SVN::Utils qw(fatal can_compress);
+use Git::SVN::Utils qw(
+       fatal
+       can_compress
+       canonicalize_path
+       canonicalize_url
+       join_paths
+       add_path_to_url
+       join_paths
+);
+
 use Git qw(
        git_cmd_try
        command
@@ -1231,7 +1240,7 @@ sub cmd_show_ignore {
        my ($url, $rev, $uuid, $gs) = working_head_info('HEAD');
        $gs ||= Git::SVN->new;
        my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum);
-       $gs->prop_walk($gs->{path}, $r, sub {
+       $gs->prop_walk($gs->path, $r, sub {
                my ($gs, $path, $props) = @_;
                print STDOUT "\n# $path\n";
                my $s = $props->{'svn:ignore'} or return;
@@ -1247,7 +1256,7 @@ sub cmd_show_externals {
        my ($url, $rev, $uuid, $gs) = working_head_info('HEAD');
        $gs ||= Git::SVN->new;
        my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum);
-       $gs->prop_walk($gs->{path}, $r, sub {
+       $gs->prop_walk($gs->path, $r, sub {
                my ($gs, $path, $props) = @_;
                print STDOUT "\n# $path\n";
                my $s = $props->{'svn:externals'} or return;
@@ -1262,7 +1271,7 @@ sub cmd_create_ignore {
        my ($url, $rev, $uuid, $gs) = working_head_info('HEAD');
        $gs ||= Git::SVN->new;
        my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum);
-       $gs->prop_walk($gs->{path}, $r, sub {
+       $gs->prop_walk($gs->path, $r, sub {
                my ($gs, $path, $props) = @_;
                # $path is of the form /path/to/dir/
                $path = '.' . $path;
@@ -1292,31 +1301,6 @@ sub cmd_mkdirs {
        $gs->mkemptydirs($_revision);
 }
 
-sub canonicalize_path {
-       my ($path) = @_;
-       my $dot_slash_added = 0;
-       if (substr($path, 0, 1) ne "/") {
-               $path = "./" . $path;
-               $dot_slash_added = 1;
-       }
-       # File::Spec->canonpath doesn't collapse x/../y into y (for a
-       # good reason), so let's do this manually.
-       $path =~ s#/+#/#g;
-       $path =~ s#/\.(?:/|$)#/#g;
-       $path =~ s#/[^/]+/\.\.##g;
-       $path =~ s#/$##g;
-       $path =~ s#^\./## if $dot_slash_added;
-       $path =~ s#^/##;
-       $path =~ s#^\.$##;
-       return $path;
-}
-
-sub canonicalize_url {
-       my ($url) = @_;
-       $url =~ s#^([^:]+://[^/]*/)(.*)$#$1 . canonicalize_path($2)#e;
-       return $url;
-}
-
 # get_svnprops(PATH)
 # ------------------
 # Helper for cmd_propget and cmd_proplist below.
@@ -1330,7 +1314,7 @@ sub get_svnprops {
        $path = $cmd_dir_prefix . $path;
        fatal("No such file or directory: $path") unless -e $path;
        my $is_dir = -d $path ? 1 : 0;
-       $path = $gs->{path} . '/' . $path;
+       $path = join_paths($gs->{path}, $path);
 
        # canonicalize the path (otherwise libsvn will abort or fail to
        # find the file)
@@ -1431,8 +1415,8 @@ sub cmd_commit_diff {
                        fatal("Needed URL or usable git-svn --id in ",
                              "the command-line\n", $usage);
                }
-               $url = $gs->{url};
-               $svn_path = $gs->{path};
+               $url = $gs->url;
+               $svn_path = $gs->path;
        }
        unless (defined $_revision) {
                fatal("-r|--revision is a required argument\n", $usage);
@@ -1466,24 +1450,6 @@ sub cmd_commit_diff {
        }
 }
 
-sub escape_uri_only {
-       my ($uri) = @_;
-       my @tmp;
-       foreach (split m{/}, $uri) {
-               s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg;
-               push @tmp, $_;
-       }
-       join('/', @tmp);
-}
-
-sub escape_url {
-       my ($url) = @_;
-       if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) {
-               my ($scheme, $domain, $uri) = ($1, $2, escape_uri_only($3));
-               $url = "$scheme://$domain$uri";
-       }
-       $url;
-}
 
 sub cmd_info {
        my $path = canonicalize_path(defined($_[0]) ? $_[0] : ".");
@@ -1508,21 +1474,21 @@ sub cmd_info {
        # canonicalize_path() will return "" to make libsvn 1.5.x happy,
        $path = "." if $path eq "";
 
-       my $full_url = $url . ($fullpath eq "" ? "" : "/$fullpath");
+       my $full_url = canonicalize_url( add_path_to_url( $url, $fullpath ) );
 
        if ($_url) {
-               print escape_url($full_url), "\n";
+               print "$full_url\n";
                return;
        }
 
        my $result = "Path: $path\n";
        $result .= "Name: " . basename($path) . "\n" if $file_type ne "dir";
-       $result .= "URL: " . escape_url($full_url) . "\n";
+       $result .= "URL: $full_url\n";
 
        eval {
                my $repos_root = $gs->repos_root;
                Git::SVN::remove_username($repos_root);
-               $result .= "Repository Root: " . escape_url($repos_root) . "\n";
+               $result .= "Repository Root: " . canonicalize_url($repos_root) . "\n";
        };
        if ($@) {
                $result .= "Repository Root: (offline)\n";
@@ -1669,7 +1635,9 @@ sub post_fetch_checkout {
 
 sub complete_svn_url {
        my ($url, $path) = @_;
-       $path =~ s#/+$##;
+       $path = canonicalize_path($path);
+
+       # If the path is not a URL...
        if ($path !~ m#^[a-z\+]+://#) {
                if (!defined $url || $url !~ m#^[a-z\+]+://#) {
                        fatal("E: '$path' is not a complete URL ",
@@ -1686,7 +1654,7 @@ sub complete_url_ls_init {
                print STDERR "W: $switch not specified\n";
                return;
        }
-       $repo_path =~ s#/+$##;
+       $repo_path = canonicalize_path($repo_path);
        if ($repo_path =~ m#^[a-z\+]+://#) {
                $ra = Git::SVN::Ra->new($repo_path);
                $repo_path = '';
@@ -1697,18 +1665,18 @@ sub complete_url_ls_init {
                              "and a separate URL is not specified");
                }
        }
-       my $url = $ra->{url};
+       my $url = $ra->url;
        my $gs = Git::SVN->init($url, undef, undef, undef, 1);
        my $k = "svn-remote.$gs->{repo_id}.url";
        my $orig_url = eval { command_oneline(qw/config --get/, $k) };
-       if ($orig_url && ($orig_url ne $gs->{url})) {
+       if ($orig_url && ($orig_url ne $gs->url)) {
                die "$k already set: $orig_url\n",
-                   "wanted to set to: $gs->{url}\n";
+                   "wanted to set to: $gs->url\n";
        }
-       command_oneline('config', $k, $gs->{url}) unless $orig_url;
-       my $remote_path = "$gs->{path}/$repo_path";
+       command_oneline('config', $k, $gs->url) unless $orig_url;
+
+       my $remote_path = join_paths( $gs->path, $repo_path );
        $remote_path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
-       $remote_path =~ s#/+#/#g;
        $remote_path =~ s#^/##g;
        $remote_path .= "/*" if $remote_path !~ /\*/;
        my ($n) = ($switch =~ /^--(\w+)/);
index 8478d0c95293b531547084e19b6680cf73187469..acb25394f433bc4043f835be4944b13785d46356 100644 (file)
@@ -23,7 +23,14 @@ package Git::SVN;
     command_output_pipe
     command_close_pipe
 );
-use Git::SVN::Utils qw(fatal can_compress);
+use Git::SVN::Utils qw(
+       fatal
+       can_compress
+       join_paths
+       canonicalize_path
+       canonicalize_url
+       add_path_to_url
+);
 
 my $can_use_yaml;
 BEGIN {
@@ -195,9 +202,9 @@ sub read_all_remotes {
                } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
                        $r->{$1}->{svm} = {};
                } elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
-                       $r->{$1}->{url} = $2;
+                       $r->{$1}->{url} = canonicalize_url($2);
                } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
-                       $r->{$1}->{pushurl} = $2;
+                       $r->{$1}->{pushurl} = canonicalize_url($2);
                } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
                        $r->{$1}->{ignore_refs_regex} = $2;
                } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
@@ -290,7 +297,7 @@ sub find_existing_remote {
 
 sub init_remote_config {
        my ($self, $url, $no_write) = @_;
-       $url =~ s!/+$!!; # strip trailing slash
+       $url = canonicalize_url($url);
        my $r = read_all_remotes();
        my $existing = find_existing_remote($url, $r);
        if ($existing) {
@@ -314,12 +321,10 @@ sub init_remote_config {
                                print STDERR "Using higher level of URL: ",
                                             "$url => $min_url\n";
                        }
-                       my $old_path = $self->{path};
-                       $self->{path} = $url;
-                       $self->{path} =~ s!^\Q$min_url\E(/|$)!!;
-                       if (length $old_path) {
-                               $self->{path} .= "/$old_path";
-                       }
+                       my $old_path = $self->path;
+                       $url =~ s!^\Q$min_url\E(/|$)!!;
+                       $url = join_paths($url, $old_path);
+                       $self->path($url);
                        $url = $min_url;
                }
        }
@@ -343,18 +348,22 @@ sub init_remote_config {
        unless ($no_write) {
                command_noisy('config',
                              "svn-remote.$self->{repo_id}.url", $url);
-               $self->{path} =~ s{^/}{};
-               $self->{path} =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
+               my $path = $self->path;
+               $path =~ s{^/}{};
+               $path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
+               $self->path($path);
                command_noisy('config', '--add',
                              "svn-remote.$self->{repo_id}.fetch",
-                             "$self->{path}:".$self->refname);
+                             $self->path.":".$self->refname);
        }
-       $self->{url} = $url;
+       $self->url($url);
 }
 
 sub find_by_url { # repos_root and, path are optional
        my ($class, $full_url, $repos_root, $path) = @_;
 
+       $full_url = canonicalize_url($full_url);
+
        return undef unless defined $full_url;
        remove_username($full_url);
        remove_username($repos_root) if defined $repos_root;
@@ -393,6 +402,11 @@ sub find_by_url { # repos_root and, path are optional
                        }
                        $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
                }
+
+               # remote fetch paths are not URI escaped.  Decode ours
+               # so they match
+               $p = uri_decode($p);
+
                foreach my $f (keys %$fetch) {
                        next if $f ne $p;
                        return Git::SVN->new($fetch->{$f}, $repo_id, $f);
@@ -435,20 +449,25 @@ sub new {
                }
        }
        my $self = _new($class, $repo_id, $ref_id, $path);
-       if (!defined $self->{path} || !length $self->{path}) {
+       if (!defined $self->path || !length $self->path) {
                my $fetch = command_oneline('config', '--get',
                                            "svn-remote.$repo_id.fetch",
                                            ":$ref_id\$") or
                     die "Failed to read \"svn-remote.$repo_id.fetch\" ",
                         "\":$ref_id\$\" in config\n";
-               ($self->{path}, undef) = split(/\s*:\s*/, $fetch);
+               my($path) = split(/\s*:\s*/, $fetch);
+               $self->path($path);
        }
-       $self->{path} =~ s{/+}{/}g;
-       $self->{path} =~ s{\A/}{};
-       $self->{path} =~ s{/\z}{};
-       $self->{url} = command_oneline('config', '--get',
-                                      "svn-remote.$repo_id.url") or
+       {
+               my $path = $self->path;
+               $path =~ s{\A/}{};
+               $path =~ s{/\z}{};
+               $self->path($path);
+       }
+       my $url = command_oneline('config', '--get',
+                                 "svn-remote.$repo_id.url") or
                   die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
+       $self->url($url);
        $self->{pushurl} = eval { command_oneline('config', '--get',
                                  "svn-remote.$repo_id.pushurl") };
        $self->rebuild;
@@ -552,8 +571,7 @@ sub _set_svm_vars {
                # username is of no interest
                $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
 
-               my $replace = $ra->{url};
-               $replace .= "/$path" if length $path;
+               my $replace = add_path_to_url($ra->url, $path);
 
                my $section = "svn-remote.$self->{repo_id}";
                tmp_config("$section.svm-source", $src);
@@ -567,20 +585,21 @@ sub _set_svm_vars {
        }
 
        my $r = $ra->get_latest_revnum;
-       my $path = $self->{path};
+       my $path = $self->path;
        my %tried;
        while (length $path) {
-               unless ($tried{"$self->{url}/$path"}) {
+               my $try = add_path_to_url($self->url, $path);
+               unless ($tried{$try}) {
                        return $ra if $self->read_svm_props($ra, $path, $r);
-                       $tried{"$self->{url}/$path"} = 1;
+                       $tried{$try} = 1;
                }
                $path =~ s#/?[^/]+$##;
        }
        die "Path: '$path' should be ''\n" if $path ne '';
        return $ra if $self->read_svm_props($ra, $path, $r);
-       $tried{"$self->{url}/$path"} = 1;
+       $tried{ add_path_to_url($self->url, $path) } = 1;
 
-       if ($ra->{repos_root} eq $self->{url}) {
+       if ($ra->{repos_root} eq $self->url) {
                die @err, (map { "  $_\n" } keys %tried), "\n";
        }
 
@@ -590,20 +609,21 @@ sub _set_svm_vars {
        $path = $ra->{svn_path};
        $ra = Git::SVN::Ra->new($ra->{repos_root});
        while (length $path) {
-               unless ($tried{"$ra->{url}/$path"}) {
+               my $try = add_path_to_url($ra->url, $path);
+               unless ($tried{$try}) {
                        $ok = $self->read_svm_props($ra, $path, $r);
                        last if $ok;
-                       $tried{"$ra->{url}/$path"} = 1;
+                       $tried{$try} = 1;
                }
                $path =~ s#/?[^/]+$##;
        }
        die "Path: '$path' should be ''\n" if $path ne '';
        $ok ||= $self->read_svm_props($ra, $path, $r);
-       $tried{"$ra->{url}/$path"} = 1;
+       $tried{ add_path_to_url($ra->url, $path) } = 1;
        if (!$ok) {
                die @err, (map { "  $_\n" } keys %tried), "\n";
        }
-       Git::SVN::Ra->new($self->{url});
+       Git::SVN::Ra->new($self->url);
 }
 
 sub svnsync {
@@ -670,7 +690,7 @@ sub ra_uuid {
                if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
                        $self->{ra_uuid} = $uuid;
                } else {
-                       die "ra_uuid called without URL\n" unless $self->{url};
+                       die "ra_uuid called without URL\n" unless $self->url;
                        $self->{ra_uuid} = $self->ra->get_uuid;
                        tmp_config('--add', $key, $self->{ra_uuid});
                }
@@ -694,7 +714,7 @@ sub repos_root {
 
 sub ra {
        my ($self) = shift;
-       my $ra = Git::SVN::Ra->new($self->{url});
+       my $ra = Git::SVN::Ra->new($self->url);
        $self->_set_repos_root($ra->{repos_root});
        if ($self->use_svm_props && !$self->{svm}) {
                if ($self->no_metadata) {
@@ -728,7 +748,7 @@ sub prop_walk {
        $path =~ s#^/*#/#g;
        my $p = $path;
        # Strip the irrelevant part of the path.
-       $p =~ s#^/+\Q$self->{path}\E(/|$)#/#;
+       $p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#;
        # Ensure the path is terminated by a `/'.
        $p =~ s#/*$#/#;
 
@@ -749,7 +769,7 @@ sub prop_walk {
 
        foreach (sort keys %$dirent) {
                next if $dirent->{$_}->{kind} != $SVN::Node::dir;
-               $self->prop_walk($self->{path} . $p . $_, $rev, $sub);
+               $self->prop_walk($self->path . $p . $_, $rev, $sub);
        }
 }
 
@@ -919,20 +939,19 @@ sub rewrite_uuid {
 
 sub metadata_url {
        my ($self) = @_;
-       ($self->rewrite_root || $self->{url}) .
-          (length $self->{path} ? '/' . $self->{path} : '');
+       my $url = $self->rewrite_root || $self->url;
+       return canonicalize_url( add_path_to_url( $url, $self->path ) );
 }
 
 sub full_url {
        my ($self) = @_;
-       $self->{url} . (length $self->{path} ? '/' . $self->{path} : '');
+       return canonicalize_url( add_path_to_url( $self->url, $self->path ) );
 }
 
 sub full_pushurl {
        my ($self) = @_;
        if ($self->{pushurl}) {
-               return $self->{pushurl} . (length $self->{path} ? '/' .
-                      $self->{path} : '');
+               return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) );
        } else {
                return $self->full_url;
        }
@@ -1048,20 +1067,20 @@ sub do_git_commit {
 
 sub match_paths {
        my ($self, $paths, $r) = @_;
-       return 1 if $self->{path} eq '';
-       if (my $path = $paths->{"/$self->{path}"}) {
+       return 1 if $self->path eq '';
+       if (my $path = $paths->{"/".$self->path}) {
                return ($path->{action} eq 'D') ? 0 : 1;
        }
-       $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\//;
+       $self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/};
        if (grep /$self->{path_regex}/, keys %$paths) {
                return 1;
        }
        my $c = '';
-       foreach (split m#/#, $self->{path}) {
+       foreach (split m#/#, $self->path) {
                $c .= "/$_";
                next unless ($paths->{$c} &&
                             ($paths->{$c}->{action} =~ /^[AR]$/));
-               if ($self->ra->check_path($self->{path}, $r) ==
+               if ($self->ra->check_path($self->path, $r) ==
                    $SVN::Node::dir) {
                        return 1;
                }
@@ -1075,14 +1094,14 @@ sub find_parent_branch {
        unless (defined $paths) {
                my $err_handler = $SVN::Error::handler;
                $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
-               $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1,
+               $self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1,
                                   sub { $paths = $_[0] });
                $SVN::Error::handler = $err_handler;
        }
        return undef unless defined $paths;
 
        # look for a parent from another branch:
-       my @b_path_components = split m#/#, $self->{path};
+       my @b_path_components = split m#/#, $self->path;
        my @a_path_components;
        my $i;
        while (@b_path_components) {
@@ -1099,8 +1118,8 @@ sub find_parent_branch {
        }
        my $r = $i->{copyfrom_rev};
        my $repos_root = $self->ra->{repos_root};
-       my $url = $self->ra->{url};
-       my $new_url = $url . $branch_from;
+       my $url = $self->ra->url;
+       my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) );
        print STDERR  "Found possible branch point: ",
                      "$new_url => ", $self->full_url, ", $r\n"
                      unless $::_q > 1;
@@ -1114,7 +1133,7 @@ sub find_parent_branch {
                        ($base, $head) = parse_revision_argument(0, $r);
                } else {
                        if ($r0 < $r) {
-                               $gs->ra->get_log([$gs->{path}], $r0 + 1, $r, 1,
+                               $gs->ra->get_log([$gs->path], $r0 + 1, $r, 1,
                                        0, 1, sub { $base = $_[1] - 1 });
                        }
                }
@@ -1136,7 +1155,7 @@ sub find_parent_branch {
                        # at the moment), so we can't rely on it
                        $self->{last_rev} = $r0;
                        $self->{last_commit} = $parent;
-                       $ed = Git::SVN::Fetcher->new($self, $gs->{path});
+                       $ed = Git::SVN::Fetcher->new($self, $gs->path);
                        $gs->ra->gs_do_switch($r0, $rev, $gs,
                                              $self->full_url, $ed)
                          or die "SVN connection failed somewhere...\n";
@@ -1235,7 +1254,7 @@ sub mkemptydirs {
                close $fh;
        }
 
-       my $strip = qr/\A\Q$self->{path}\E(?:\/|$)/;
+       my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
        foreach my $d (sort keys %empty_dirs) {
                $d = uri_decode($d);
                $d =~ s/$strip//;
@@ -1429,12 +1448,11 @@ sub find_extra_svk_parents {
        for my $ticket ( @tickets ) {
                my ($uuid, $path, $rev) = split /:/, $ticket;
                if ( $uuid eq $self->ra_uuid ) {
-                       my $url = $self->{url};
-                       my $repos_root = $url;
+                       my $repos_root = $self->url;
                        my $branch_from = $path;
                        $branch_from =~ s{^/}{};
-                       my $gs = $self->other_gs($repos_root."/".$branch_from,
-                                                $url,
+                       my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ),
+                                                $repos_root,
                                                 $branch_from,
                                                 $rev,
                                                 $self->{ref_id});
@@ -1693,7 +1711,7 @@ sub find_extra_svn_parents {
        # are now marked as merge, we can add the tip as a parent.
        my @merges = split "\n", $mergeinfo;
        my @merge_tips;
-       my $url = $self->{url};
+       my $url = $self->url;
        my $uuid = $self->ra_uuid;
        my %ranges;
        for my $merge ( @merges ) {
@@ -1875,8 +1893,9 @@ sub make_log_entry {
                $email ||= "$author\@$uuid";
                $commit_email ||= "$author\@$uuid";
        } elsif ($self->use_svnsync_props) {
-               my $full_url = $self->svnsync->{url};
-               $full_url .= "/$self->{path}" if length $self->{path};
+               my $full_url = canonicalize_url(
+                       add_path_to_url( $self->svnsync->{url}, $self->path )
+               );
                remove_username($full_url);
                my $uuid = $self->svnsync->{uuid};
                $log_entry{metadata} = "$full_url\@$rev $uuid";
@@ -1923,7 +1942,7 @@ sub set_tree {
                        tree_b => $tree,
                        editor_cb => sub {
                               $self->set_tree_cb($log_entry, $tree, @_) },
-                       svn_path => $self->{path} );
+                       svn_path => $self->path );
        if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) {
                print "No changes\nr$self->{last_rev} = $tree\n";
        }
@@ -2299,10 +2318,39 @@ sub _new {
 
        $_[3] = $path = '' unless (defined $path);
        mkpath([$dir]);
-       bless {
+       my $obj = bless {
                ref_id => $ref_id, dir => $dir, index => "$dir/index",
-               path => $path, config => "$ENV{GIT_DIR}/svn/config",
+               config => "$ENV{GIT_DIR}/svn/config",
                map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
+
+       # Ensure it gets canonicalized
+       $obj->path($path);
+
+       return $obj;
+}
+
+sub path {
+       my $self = shift;
+
+       if (@_) {
+               my $path = shift;
+               $self->{path} = canonicalize_path($path);
+               return;
+       }
+
+       return $self->{path};
+}
+
+sub url {
+       my $self = shift;
+
+       if (@_) {
+               my $url = shift;
+               $self->{url} = canonicalize_url($url);
+               return;
+       }
+
+       return $self->{url};
 }
 
 # for read-only access of old .rev_db formats
index 76fae9bce04ded09a4bb507995df4aab27ef6ea5..046a7a2f31cf923f634bd8b516b3da38ff989a46 100644 (file)
@@ -83,7 +83,7 @@ sub _mark_empty_symlinks {
        chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
        my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
        local $/ = "\0";
-       my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path};
+       my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
        $pfx .= '/' if length($pfx);
        while (<$ls>) {
                chomp;
index 75d74298ea5e39ed846254ac4d95f88d0be6ed7b..30daf354655f0b4cc02d08e7ba5af43133a09b95 100644 (file)
@@ -177,14 +177,14 @@ sub minimize_connections {
                my $ra = Git::SVN::Ra->new($url);
 
                # skip existing cases where we already connect to the root
-               if (($ra->{url} eq $ra->{repos_root}) ||
+               if (($ra->url eq $ra->{repos_root}) ||
                    ($ra->{repos_root} eq $repo_id)) {
-                       $root_repos->{$ra->{url}} = $repo_id;
+                       $root_repos->{$ra->url} = $repo_id;
                        next;
                }
 
                my $root_ra = Git::SVN::Ra->new($ra->{repos_root});
-               my $root_path = $ra->{url};
+               my $root_path = $ra->url;
                $root_path =~ s#^\Q$ra->{repos_root}\E(/|$)##;
                foreach my $path (keys %$fetch) {
                        my $ref_id = $fetch->{$path};
index 23ff43e86b4f08fb38b8d9ca90a6c77319ae8f63..90ec30bfff25c5cb5cd3c171fdc4a81c46e0f156 100644 (file)
@@ -3,6 +3,12 @@ package Git::SVN::Ra;
 use strict;
 use warnings;
 use SVN::Client;
+use Git::SVN::Utils qw(
+       canonicalize_url
+       canonicalize_path
+       add_path_to_url
+);
+
 use SVN::Ra;
 BEGIN {
        @ISA = qw(SVN::Ra);
@@ -62,29 +68,11 @@ ()
        \@rv;
 }
 
-sub escape_uri_only {
-       my ($uri) = @_;
-       my @tmp;
-       foreach (split m{/}, $uri) {
-               s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg;
-               push @tmp, $_;
-       }
-       join('/', @tmp);
-}
-
-sub escape_url {
-       my ($url) = @_;
-       if ($url =~ m#^(https?)://([^/]+)(.*)$#) {
-               my ($scheme, $domain, $uri) = ($1, $2, escape_uri_only($3));
-               $url = "$scheme://$domain$uri";
-       }
-       $url;
-}
 
 sub new {
        my ($class, $url) = @_;
-       $url =~ s!/+$!!;
-       return $RA if ($RA && $RA->{url} eq $url);
+       $url = canonicalize_url($url);
+       return $RA if ($RA && $RA->url eq $url);
 
        ::_req_svn();
 
@@ -115,17 +103,34 @@ sub new {
                        $Git::SVN::Prompt::_no_auth_cache = 1;
                }
        } # no warnings 'once'
-       my $self = SVN::Ra->new(url => escape_url($url), auth => $baton,
+
+       my $self = SVN::Ra->new(url => $url, auth => $baton,
                              config => $config,
                              pool => SVN::Pool->new,
                              auth_provider_callbacks => $callbacks);
-       $self->{url} = $url;
+       $RA = bless $self, $class;
+
+       # Make sure its canonicalized
+       $self->url($url);
        $self->{svn_path} = $url;
        $self->{repos_root} = $self->get_repos_root;
        $self->{svn_path} =~ s#^\Q$self->{repos_root}\E(/|$)##;
        $self->{cache} = { check_path => { r => 0, data => {} },
                           get_dir => { r => 0, data => {} } };
-       $RA = bless $self, $class;
+
+       return $RA;
+}
+
+sub url {
+       my $self = shift;
+
+       if (@_) {
+               my $url = shift;
+               $self->{url} = canonicalize_url($url);
+               return;
+       }
+
+       return $self->{url};
 }
 
 sub check_path {
@@ -195,6 +200,7 @@ sub get_log {
                                qw/copyfrom_path copyfrom_rev action/;
                        if ($s{'copyfrom_path'}) {
                                $s{'copyfrom_path'} =~ s/$prefix_regex//;
+                               $s{'copyfrom_path'} = canonicalize_path($s{'copyfrom_path'});
                        }
                        $_[0]{$p} = \%s;
                }
@@ -246,7 +252,7 @@ sub get_commit_editor {
 sub gs_do_update {
        my ($self, $rev_a, $rev_b, $gs, $editor) = @_;
        my $new = ($rev_a == $rev_b);
-       my $path = $gs->{path};
+       my $path = $gs->path;
 
        if ($new && -e $gs->{index}) {
                unlink $gs->{index} or die
@@ -282,30 +288,33 @@ sub gs_do_update {
 # svn_ra_reparent didn't work before 1.4)
 sub gs_do_switch {
        my ($self, $rev_a, $rev_b, $gs, $url_b, $editor) = @_;
-       my $path = $gs->{path};
+       my $path = $gs->path;
        my $pool = SVN::Pool->new;
 
-       my $full_url = $self->{url};
-       my $old_url = $full_url;
-       $full_url .= '/' . $path if length $path;
+       my $old_url = $self->url;
+       my $full_url = add_path_to_url( $self->url, $path );
        my ($ra, $reparented);
 
        if ($old_url =~ m#^svn(\+ssh)?://# ||
            ($full_url =~ m#^https?://# &&
-            escape_url($full_url) ne $full_url)) {
+            canonicalize_url($full_url) ne $full_url)) {
                $_[0] = undef;
                $self = undef;
                $RA = undef;
                $ra = Git::SVN::Ra->new($full_url);
                $ra_invalid = 1;
        } elsif ($old_url ne $full_url) {
-               SVN::_Ra::svn_ra_reparent($self->{session}, $full_url, $pool);
-               $self->{url} = $full_url;
+               SVN::_Ra::svn_ra_reparent(
+                       $self->{session},
+                       canonicalize_url($full_url),
+                       $pool
+               );
+               $self->url($full_url);
                $reparented = 1;
        }
 
        $ra ||= $self;
-       $url_b = escape_url($url_b);
+       $url_b = canonicalize_url($url_b);
        my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool);
        my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : ();
        $reporter->set_path('', $rev_a, 0, @lock, $pool);
@@ -313,7 +322,7 @@ sub gs_do_switch {
 
        if ($reparented) {
                SVN::_Ra::svn_ra_reparent($self->{session}, $old_url, $pool);
-               $self->{url} = $old_url;
+               $self->url($old_url);
        }
 
        $pool->clear;
@@ -326,7 +335,7 @@ sub longest_common_path {
        my $common_max = scalar @$gsv;
 
        foreach my $gs (@$gsv) {
-               my @tmp = split m#/#, $gs->{path};
+               my @tmp = split m#/#, $gs->path;
                my $p = '';
                foreach (@tmp) {
                        $p .= length($p) ? "/$_" : $_;
@@ -362,7 +371,7 @@ sub gs_fetch_loop_common {
        my $inc = $_log_window_size;
        my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc);
        my $longest_path = longest_common_path($gsv, $globs);
-       my $ra_url = $self->{url};
+       my $ra_url = $self->url;
        my $find_trailing_edge;
        while (1) {
                my %revs;
@@ -508,7 +517,7 @@ sub match_globs {
                                 ($self->check_path($p, $r) !=
                                  $SVN::Node::dir));
                        next unless $p =~ /$g->{path}->{regex}/;
-                       $exists->{$p} = Git::SVN->init($self->{url}, $p, undef,
+                       $exists->{$p} = Git::SVN->init($self->url, $p, undef,
                                         $g->{ref}->full_path($de), 1);
                }
        }
@@ -532,7 +541,7 @@ sub match_globs {
                        next if ($self->check_path($pathname, $r) !=
                                 $SVN::Node::dir);
                        $exists->{$pathname} = Git::SVN->init(
-                                             $self->{url}, $pathname, undef,
+                                             $self->url, $pathname, undef,
                                              $g->{ref}->full_path($p), 1);
                }
                my $c = '';
@@ -548,19 +557,20 @@ sub match_globs {
 
 sub minimize_url {
        my ($self) = @_;
-       return $self->{url} if ($self->{url} eq $self->{repos_root});
+       return $self->url if ($self->url eq $self->{repos_root});
        my $url = $self->{repos_root};
        my @components = split(m!/!, $self->{svn_path});
        my $c = '';
        do {
-               $url .= "/$c" if length $c;
+               $url = add_path_to_url($url, $c);
                eval {
                        my $ra = (ref $self)->new($url);
                        my $latest = $ra->get_latest_revnum;
                        $ra->get_log("", $latest, 0, 1, 0, 1, sub {});
                };
        } while ($@ && ($c = shift @components));
-       $url;
+
+       return canonicalize_url($url);
 }
 
 sub can_do_switch {
@@ -568,7 +578,7 @@ sub can_do_switch {
        unless (defined $can_do_switch) {
                my $pool = SVN::Pool->new;
                my $rep = eval {
-                       $self->do_switch(1, '', 0, $self->{url},
+                       $self->do_switch(1, '', 0, $self->url,
                                         SVN::Delta::Editor->new, $pool);
                };
                if ($@) {
index 496006bc7b3b9492f2747f689a8cdda411eb5806..4bb4dde89a3821ad7714c299b429f751cc98ebcc 100644 (file)
@@ -3,9 +3,18 @@ package Git::SVN::Utils;
 use strict;
 use warnings;
 
+use SVN::Core;
+
 use base qw(Exporter);
 
-our @EXPORT_OK = qw(fatal can_compress);
+our @EXPORT_OK = qw(
+       fatal
+       can_compress
+       canonicalize_path
+       canonicalize_url
+       join_paths
+       add_path_to_url
+);
 
 
 =head1 NAME
@@ -56,4 +65,169 @@ sub can_compress {
 }
 
 
+=head3 canonicalize_path
+
+    my $canoncalized_path = canonicalize_path($path);
+
+Converts $path into a canonical form which is safe to pass to the SVN
+API as a file path.
+
+=cut
+
+# Turn foo/../bar into bar
+sub _collapse_dotdot {
+       my $path = shift;
+
+       1 while $path =~ s{/[^/]+/+\.\.}{};
+       1 while $path =~ s{[^/]+/+\.\./}{};
+       1 while $path =~ s{[^/]+/+\.\.}{};
+
+       return $path;
+}
+
+
+sub canonicalize_path {
+       my $path = shift;
+       my $rv;
+
+       # The 1.7 way to do it
+       if ( defined &SVN::_Core::svn_dirent_canonicalize ) {
+               $path = _collapse_dotdot($path);
+               $rv = SVN::_Core::svn_dirent_canonicalize($path);
+       }
+       # The 1.6 way to do it
+       # This can return undef on subversion-perl-1.4.2-2.el5 (CentOS 5.2)
+       elsif ( defined &SVN::_Core::svn_path_canonicalize ) {
+               $path = _collapse_dotdot($path);
+               $rv = SVN::_Core::svn_path_canonicalize($path);
+       }
+
+       return $rv if defined $rv;
+
+       # No SVN API canonicalization is available, or the SVN API
+       # didn't return a successful result, do it ourselves
+       return _canonicalize_path_ourselves($path);
+}
+
+
+sub _canonicalize_path_ourselves {
+       my ($path) = @_;
+       my $dot_slash_added = 0;
+       if (substr($path, 0, 1) ne "/") {
+               $path = "./" . $path;
+               $dot_slash_added = 1;
+       }
+       $path =~ s#/+#/#g;
+       $path =~ s#/\.(?:/|$)#/#g;
+       $path = _collapse_dotdot($path);
+       $path =~ s#/$##g;
+       $path =~ s#^\./## if $dot_slash_added;
+       $path =~ s#^/##;
+       $path =~ s#^\.$##;
+       return $path;
+}
+
+
+=head3 canonicalize_url
+
+    my $canonicalized_url = canonicalize_url($url);
+
+Converts $url into a canonical form which is safe to pass to the SVN
+API as a URL.
+
+=cut
+
+sub canonicalize_url {
+       my $url = shift;
+
+       # The 1.7 way to do it
+       if ( defined &SVN::_Core::svn_uri_canonicalize ) {
+               return SVN::_Core::svn_uri_canonicalize($url);
+       }
+       # There wasn't a 1.6 way to do it, so we do it ourself.
+       else {
+               return _canonicalize_url_ourselves($url);
+       }
+}
+
+
+sub _canonicalize_url_path {
+       my ($uri_path) = @_;
+
+       my @parts;
+       foreach my $part (split m{/+}, $uri_path) {
+               $part =~ s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg;
+               push @parts, $part;
+       }
+
+       return join('/', @parts);
+}
+
+sub _canonicalize_url_ourselves {
+       my ($url) = @_;
+       if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) {
+               my ($scheme, $domain, $uri) = ($1, $2, _canonicalize_url_path(canonicalize_path($3)));
+               $url = "$scheme://$domain$uri";
+       }
+       $url;
+}
+
+
+=head3 join_paths
+
+    my $new_path = join_paths(@paths);
+
+Appends @paths together into a single path.  Any empty paths are ignored.
+
+=cut
+
+sub join_paths {
+       my @paths = @_;
+
+       @paths = grep { defined $_ && length $_ } @paths;
+
+       return '' unless @paths;
+       return $paths[0] if @paths == 1;
+
+       my $new_path = shift @paths;
+       $new_path =~ s{/+$}{};
+
+       my $last_path = pop @paths;
+       $last_path =~ s{^/+}{};
+
+       for my $path (@paths) {
+               $path =~ s{^/+}{};
+               $path =~ s{/+$}{};
+               $new_path .= "/$path";
+       }
+
+       return $new_path .= "/$last_path";
+}
+
+
+=head3 add_path_to_url
+
+    my $new_url = add_path_to_url($url, $path);
+
+Appends $path onto the $url.  If $path is empty, $url is returned unchanged.
+
+=cut
+
+sub add_path_to_url {
+       my($url, $path) = @_;
+
+       return $url if !defined $path or !length $path;
+
+       # Strip trailing and leading slashes so we don't
+       # wind up with http://x.com///path
+       $url  =~ s{/+$}{};
+       $path =~ s{^/+}{};
+
+       # If a path has a % in it, URI escape it so it's not
+       # mistaken for a URI escape later.
+       $path =~ s{%}{%25}g;
+
+       return join '/', $url, $path;
+}
+
 1;
diff --git a/t/Git-SVN/Utils/add_path_to_url.t b/t/Git-SVN/Utils/add_path_to_url.t
new file mode 100644 (file)
index 0000000..bfbd878
--- /dev/null
@@ -0,0 +1,27 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Test::More 'no_plan';
+
+use Git::SVN::Utils qw(
+       add_path_to_url
+);
+
+# A reference cannot be a hash key, so we use an array.
+my @tests = (
+       ["http://x.com", "bar"]                 => 'http://x.com/bar',
+       ["http://x.com", ""]                    => 'http://x.com',
+       ["http://x.com/foo/", undef]            => 'http://x.com/foo/',
+       ["http://x.com/foo/", "/bar/baz/"]      => 'http://x.com/foo/bar/baz/',
+       ["http://x.com", 'per%cent']            => 'http://x.com/per%25cent',
+);
+
+while(@tests) {
+       my($have, $want) = splice @tests, 0, 2;
+
+       my $args = join ", ", map { qq['$_'] } map { defined($_) ? $_ : 'undef' } @$have;
+       my $name = "add_path_to_url($args) eq $want";
+       is add_path_to_url(@$have), $want, $name;
+}
diff --git a/t/Git-SVN/Utils/canonicalize_url.t b/t/Git-SVN/Utils/canonicalize_url.t
new file mode 100644 (file)
index 0000000..05795ab
--- /dev/null
@@ -0,0 +1,26 @@
+#!/usr/bin/env perl
+
+# Test our own home rolled URL canonicalizer.  Test the private one
+# directly because we can't predict what the SVN API is doing to do.
+
+use strict;
+use warnings;
+
+use Test::More 'no_plan';
+
+use Git::SVN::Utils;
+my $canonicalize_url = \&Git::SVN::Utils::_canonicalize_url_ourselves;
+
+my %tests = (
+       "http://x.com"                  => "http://x.com",
+       "http://x.com/"                 => "http://x.com",
+       "http://x.com/foo/bar"          => "http://x.com/foo/bar",
+       "http://x.com//foo//bar//"      => "http://x.com/foo/bar",
+       "http://x.com/  /%/"            => "http://x.com/%20%20/%25",
+);
+
+for my $arg (keys %tests) {
+       my $want = $tests{$arg};
+
+       is $canonicalize_url->($arg), $want, "canonicalize_url('$arg') => $want";
+}
diff --git a/t/Git-SVN/Utils/collapse_dotdot.t b/t/Git-SVN/Utils/collapse_dotdot.t
new file mode 100644 (file)
index 0000000..1da1cce
--- /dev/null
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Test::More 'no_plan';
+
+use Git::SVN::Utils;
+my $collapse_dotdot = \&Git::SVN::Utils::_collapse_dotdot;
+
+my %tests = (
+       "foo/bar/baz"                   => "foo/bar/baz",
+       ".."                            => "..",
+       "foo/.."                        => "",
+       "/foo/bar/../../baz"            => "/baz",
+       "deeply/.././deeply/nested"     => "./deeply/nested",
+);
+
+for my $arg (keys %tests) {
+       my $want = $tests{$arg};
+
+       is $collapse_dotdot->($arg), $want, "_collapse_dotdot('$arg') => $want";
+}
diff --git a/t/Git-SVN/Utils/join_paths.t b/t/Git-SVN/Utils/join_paths.t
new file mode 100644 (file)
index 0000000..d4488e7
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Test::More 'no_plan';
+
+use Git::SVN::Utils qw(
+       join_paths
+);
+
+# A reference cannot be a hash key, so we use an array.
+my @tests = (
+       []                                      => '',
+       ["/x.com", "bar"]                       => '/x.com/bar',
+       ["x.com", ""]                           => 'x.com',
+       ["/x.com/foo/", undef, "bar"]           => '/x.com/foo/bar',
+       ["x.com/foo/", "/bar/baz/"]             => 'x.com/foo/bar/baz/',
+       ["foo", "bar"]                          => 'foo/bar',
+       ["/foo/bar", "baz", "/biff"]            => '/foo/bar/baz/biff',
+       ["", undef, "."]                        => '.',
+       []                                      => '',
+
+);
+
+while(@tests) {
+       my($have, $want) = splice @tests, 0, 2;
+
+       my $args = join ", ", map { qq['$_'] } map { defined($_) ? $_ : 'undef' } @$have;
+       my $name = "join_paths($args) eq '$want'";
+       is join_paths(@$have), $want, $name;
+}
index 289fc313fb737ac7895580fd8407cff758b085e7..ee73013eed0b1c6410319aa3d7e4e57ea242a594 100755 (executable)
@@ -27,15 +27,17 @@ test_expect_success 'setup old-looking metadata' '
 head=`git rev-parse --verify refs/heads/git-svn-HEAD^0`
 test_expect_success 'git-svn-HEAD is a real HEAD' "test -n '$head'"
 
+svnrepo_escaped=`echo $svnrepo | sed 's/ /%20/'`
+
 test_expect_success 'initialize old-style (v0) git svn layout' '
        mkdir -p "$GIT_DIR"/git-svn/info "$GIT_DIR"/svn/info &&
        echo "$svnrepo" > "$GIT_DIR"/git-svn/info/url &&
        echo "$svnrepo" > "$GIT_DIR"/svn/info/url &&
        git svn migrate &&
-       ! test -d "$GIT_DIR"/git svn &&
+       ! test -d "$GIT_DIR"/git-svn &&
        git rev-parse --verify refs/${remotes_git_svn}^0 &&
        git rev-parse --verify refs/remotes/svn^0 &&
-       test "$(git config --get svn-remote.svn.url)" = "$svnrepo" &&
+       test "$(git config --get svn-remote.svn.url)" = "$svnrepo_escaped" &&
        test `git config --get svn-remote.svn.fetch` = \
              ":refs/${remotes_git_svn}"
        '
index 63fc982c8cdbd9c19eb06bba58ad5e86da5dd03e..193d3cabddec0c4ed63edeade9b3fb93bef790bb 100755 (executable)
@@ -32,6 +32,11 @@ test_expect_success 'setup svnrepo' '
        start_httpd
        '
 
+# SVN 1.7 will truncate "not-a%40{0]" to just "not-a".
+# Look at what SVN wound up naming the branch and use that.
+# Be sure to escape the @ if it shows up.
+non_reflog=`svn_cmd ls "$svnrepo/pr ject/branches" | grep not-a | sed 's/\///' | sed 's/@/%40/'`
+
 test_expect_success 'test clone with funky branch names' '
        git svn clone -s "$svnrepo/pr ject" project &&
        (
@@ -42,7 +47,7 @@ test_expect_success 'test clone with funky branch names' '
                git rev-parse "refs/remotes/%2Eleading_dot" &&
                git rev-parse "refs/remotes/trailing_dot%2E" &&
                git rev-parse "refs/remotes/trailing_dotlock%2Elock" &&
-               git rev-parse "refs/remotes/not-a%40{0}reflog"
+               git rev-parse "refs/remotes/$non_reflog"
        )
        '