canonicalize_path
canonicalize_url
join_paths
+ add_path_to_url
);
}
+sub _canonicalize_url_path {
+ my ($uri_path) = @_;
+
+ my @parts;
+ foreach my $part (split m{/+}, $uri_path) {
+ $part =~ s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg;
+ push @parts, $part;
+ }
+
+ return join('/', @parts);
+}
+
sub _canonicalize_url_ourselves {
my ($url) = @_;
- $url =~ s#^([^:]+://[^/]*/)(.*)$#$1 . canonicalize_path($2)#e;
- return $url;
+ if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) {
+ my ($scheme, $domain, $uri) = ($1, $2, _canonicalize_url_path(canonicalize_path($3)));
+ $url = "$scheme://$domain$uri";
+ }
+ $url;
}
return $new_path .= "/$last_path";
}
+
+=head3 add_path_to_url
+
+ my $new_url = add_path_to_url($url, $path);
+
+Appends $path onto the $url. If $path is empty, $url is returned unchanged.
+
+=cut
+
+sub add_path_to_url {
+ my($url, $path) = @_;
+
+ return $url if !defined $path or !length $path;
+
+ # Strip trailing and leading slashes so we don't
+ # wind up with http://x.com///path
+ $url =~ s{/+$}{};
+ $path =~ s{^/+}{};
+
+ # If a path has a % in it, URI escape it so it's not
+ # mistaken for a URI escape later.
+ $path =~ s{%}{%25}g;
+
+ return join '/', $url, $path;
+}
+
1;