perl / Git / SVN.pmon commit Merge branch 'lv/add-doc-working-tree' (465c30a)
   1package Git::SVN;
   2use strict;
   3use warnings;
   4use Fcntl qw/:DEFAULT :seek/;
   5use constant rev_map_fmt => 'NH40';
   6use vars qw/$_no_metadata
   7            $_repack $_repack_flags $_use_svm_props $_head
   8            $_use_svnsync_props $no_reuse_existing
   9            $_use_log_author $_add_author_from $_localtime/;
  10use Carp qw/croak/;
  11use File::Path qw/mkpath/;
  12use IPC::Open3;
  13use Memoize;  # core since 5.8.0, Jul 2002
  14use POSIX qw(:signal_h);
  15use Time::Local;
  16
  17use Git qw(
  18    command
  19    command_oneline
  20    command_noisy
  21    command_output_pipe
  22    command_close_pipe
  23    get_tz_offset
  24);
  25use Git::SVN::Utils qw(
  26        fatal
  27        can_compress
  28        join_paths
  29        canonicalize_path
  30        canonicalize_url
  31        add_path_to_url
  32);
  33
  34my $memo_backend;
  35our $_follow_parent  = 1;
  36our $_minimize_url   = 'unset';
  37our $default_repo_id = 'svn';
  38our $default_ref_id  = $ENV{GIT_SVN_ID} || 'git-svn';
  39
  40my ($_gc_nr, $_gc_period);
  41
  42# properties that we do not log:
  43my %SKIP_PROP;
  44BEGIN {
  45        %SKIP_PROP = map { $_ => 1 } qw/svn:wc:ra_dav:version-url
  46                                        svn:special svn:executable
  47                                        svn:entry:committed-rev
  48                                        svn:entry:last-author
  49                                        svn:entry:uuid
  50                                        svn:entry:committed-date/;
  51
  52        # some options are read globally, but can be overridden locally
  53        # per [svn-remote "..."] section.  Command-line options will *NOT*
  54        # override options set in an [svn-remote "..."] section
  55        no strict 'refs';
  56        for my $option (qw/follow_parent no_metadata use_svm_props
  57                           use_svnsync_props/) {
  58                my $key = $option;
  59                $key =~ tr/_//d;
  60                my $prop = "-$option";
  61                *$option = sub {
  62                        my ($self) = @_;
  63                        return $self->{$prop} if exists $self->{$prop};
  64                        my $k = "svn-remote.$self->{repo_id}.$key";
  65                        eval { command_oneline(qw/config --get/, $k) };
  66                        if ($@) {
  67                                $self->{$prop} = ${"Git::SVN::_$option"};
  68                        } else {
  69                                my $v = command_oneline(qw/config --bool/,$k);
  70                                $self->{$prop} = $v eq 'false' ? 0 : 1;
  71                        }
  72                        return $self->{$prop};
  73                }
  74        }
  75}
  76
  77
  78my (%LOCKFILES, %INDEX_FILES);
  79END {
  80        unlink keys %LOCKFILES if %LOCKFILES;
  81        unlink keys %INDEX_FILES if %INDEX_FILES;
  82}
  83
  84sub resolve_local_globs {
  85        my ($url, $fetch, $glob_spec) = @_;
  86        return unless defined $glob_spec;
  87        my $ref = $glob_spec->{ref};
  88        my $path = $glob_spec->{path};
  89        foreach (command(qw#for-each-ref --format=%(refname) refs/#)) {
  90                next unless m#^$ref->{regex}$#;
  91                my $p = $1;
  92                my $pathname = desanitize_refname($path->full_path($p));
  93                my $refname = desanitize_refname($ref->full_path($p));
  94                if (my $existing = $fetch->{$pathname}) {
  95                        if ($existing ne $refname) {
  96                                die "Refspec conflict:\n",
  97                                    "existing: $existing\n",
  98                                    " globbed: $refname\n";
  99                        }
 100                        my $u = (::cmt_metadata("$refname"))[0];
 101                        $u =~ s!^\Q$url\E(/|$)!! or die
 102                          "$refname: '$url' not found in '$u'\n";
 103                        if ($pathname ne $u) {
 104                                warn "W: Refspec glob conflict ",
 105                                     "(ref: $refname):\n",
 106                                     "expected path: $pathname\n",
 107                                     "    real path: $u\n",
 108                                     "Continuing ahead with $u\n";
 109                                next;
 110                        }
 111                } else {
 112                        $fetch->{$pathname} = $refname;
 113                }
 114        }
 115}
 116
 117sub parse_revision_argument {
 118        my ($base, $head) = @_;
 119        if (!defined $::_revision || $::_revision eq 'BASE:HEAD') {
 120                return ($base, $head);
 121        }
 122        return ($1, $2) if ($::_revision =~ /^(\d+):(\d+)$/);
 123        return ($::_revision, $::_revision) if ($::_revision =~ /^\d+$/);
 124        return ($head, $head) if ($::_revision eq 'HEAD');
 125        return ($base, $1) if ($::_revision =~ /^BASE:(\d+)$/);
 126        return ($1, $head) if ($::_revision =~ /^(\d+):HEAD$/);
 127        die "revision argument: $::_revision not understood by git-svn\n";
 128}
 129
 130sub fetch_all {
 131        my ($repo_id, $remotes) = @_;
 132        if (ref $repo_id) {
 133                my $gs = $repo_id;
 134                $repo_id = undef;
 135                $repo_id = $gs->{repo_id};
 136        }
 137        $remotes ||= read_all_remotes();
 138        my $remote = $remotes->{$repo_id} or
 139                     die "[svn-remote \"$repo_id\"] unknown\n";
 140        my $fetch = $remote->{fetch};
 141        my $url = $remote->{url} or die "svn-remote.$repo_id.url not defined\n";
 142        my (@gs, @globs);
 143        my $ra = Git::SVN::Ra->new($url);
 144        my $uuid = $ra->get_uuid;
 145        my $head = $ra->get_latest_revnum;
 146
 147        # ignore errors, $head revision may not even exist anymore
 148        eval { $ra->get_log("", $head, 0, 1, 0, 1, sub { $head = $_[1] }) };
 149        warn "W: $@\n" if $@;
 150
 151        my $base = defined $fetch ? $head : 0;
 152
 153        # read the max revs for wildcard expansion (branches/*, tags/*)
 154        foreach my $t (qw/branches tags/) {
 155                defined $remote->{$t} or next;
 156                push @globs, @{$remote->{$t}};
 157
 158                my $max_rev = eval { tmp_config(qw/--int --get/,
 159                                         "svn-remote.$repo_id.${t}-maxRev") };
 160                if (defined $max_rev && ($max_rev < $base)) {
 161                        $base = $max_rev;
 162                } elsif (!defined $max_rev) {
 163                        $base = 0;
 164                }
 165        }
 166
 167        if ($fetch) {
 168                foreach my $p (sort keys %$fetch) {
 169                        my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p);
 170                        my $lr = $gs->rev_map_max;
 171                        if (defined $lr) {
 172                                $base = $lr if ($lr < $base);
 173                        }
 174                        push @gs, $gs;
 175                }
 176        }
 177
 178        ($base, $head) = parse_revision_argument($base, $head);
 179        $ra->gs_fetch_loop_common($base, $head, \@gs, \@globs);
 180}
 181
 182sub read_all_remotes {
 183        my $r = {};
 184        my $use_svm_props = eval { command_oneline(qw/config --bool
 185            svn.useSvmProps/) };
 186        $use_svm_props = $use_svm_props eq 'true' if $use_svm_props;
 187        my $svn_refspec = qr{\s*(.*?)\s*:\s*(.+?)\s*};
 188        foreach (grep { s/^svn-remote\.// } command(qw/config -l/)) {
 189                if (m!^(.+)\.fetch=$svn_refspec$!) {
 190                        my ($remote, $local_ref, $remote_ref) = ($1, $2, $3);
 191                        die("svn-remote.$remote: remote ref '$remote_ref' "
 192                            . "must start with 'refs/'\n")
 193                                unless $remote_ref =~ m{^refs/};
 194                        $local_ref = uri_decode($local_ref);
 195                        $r->{$remote}->{fetch}->{$local_ref} = $remote_ref;
 196                        $r->{$remote}->{svm} = {} if $use_svm_props;
 197                } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
 198                        $r->{$1}->{svm} = {};
 199                } elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
 200                        $r->{$1}->{url} = canonicalize_url($2);
 201                } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
 202                        $r->{$1}->{pushurl} = canonicalize_url($2);
 203                } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
 204                        $r->{$1}->{ignore_refs_regex} = $2;
 205                } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
 206                        my ($remote, $t, $local_ref, $remote_ref) =
 207                                                             ($1, $2, $3, $4);
 208                        die("svn-remote.$remote: remote ref '$remote_ref' ($t) "
 209                            . "must start with 'refs/'\n")
 210                                unless $remote_ref =~ m{^refs/};
 211                        $local_ref = uri_decode($local_ref);
 212
 213                        require Git::SVN::GlobSpec;
 214                        my $rs = {
 215                            t => $t,
 216                            remote => $remote,
 217                            path => Git::SVN::GlobSpec->new($local_ref, 1),
 218                            ref => Git::SVN::GlobSpec->new($remote_ref, 0) };
 219                        if (length($rs->{ref}->{right}) != 0) {
 220                                die "The '*' glob character must be the last ",
 221                                    "character of '$remote_ref'\n";
 222                        }
 223                        push @{ $r->{$remote}->{$t} }, $rs;
 224                }
 225        }
 226
 227        map {
 228                if (defined $r->{$_}->{svm}) {
 229                        my $svm;
 230                        eval {
 231                                my $section = "svn-remote.$_";
 232                                $svm = {
 233                                        source => tmp_config('--get',
 234                                            "$section.svm-source"),
 235                                        replace => tmp_config('--get',
 236                                            "$section.svm-replace"),
 237                                }
 238                        };
 239                        $r->{$_}->{svm} = $svm;
 240                }
 241        } keys %$r;
 242
 243        foreach my $remote (keys %$r) {
 244                foreach ( grep { defined $_ }
 245                          map { $r->{$remote}->{$_} } qw(branches tags) ) {
 246                        foreach my $rs ( @$_ ) {
 247                                $rs->{ignore_refs_regex} =
 248                                    $r->{$remote}->{ignore_refs_regex};
 249                        }
 250                }
 251        }
 252
 253        $r;
 254}
 255
 256sub init_vars {
 257        $_gc_nr = $_gc_period = 1000;
 258        if (defined $_repack || defined $_repack_flags) {
 259               warn "Repack options are obsolete; they have no effect.\n";
 260        }
 261}
 262
 263sub verify_remotes_sanity {
 264        return unless -d $ENV{GIT_DIR};
 265        my %seen;
 266        foreach (command(qw/config -l/)) {
 267                if (m!^svn-remote\.(?:.+)\.fetch=.*:refs/remotes/(\S+)\s*$!) {
 268                        if ($seen{$1}) {
 269                                die "Remote ref refs/remote/$1 is tracked by",
 270                                    "\n  \"$_\"\nand\n  \"$seen{$1}\"\n",
 271                                    "Please resolve this ambiguity in ",
 272                                    "your git configuration file before ",
 273                                    "continuing\n";
 274                        }
 275                        $seen{$1} = $_;
 276                }
 277        }
 278}
 279
 280sub find_existing_remote {
 281        my ($url, $remotes) = @_;
 282        return undef if $no_reuse_existing;
 283        my $existing;
 284        foreach my $repo_id (keys %$remotes) {
 285                my $u = $remotes->{$repo_id}->{url} or next;
 286                next if $u ne $url;
 287                $existing = $repo_id;
 288                last;
 289        }
 290        $existing;
 291}
 292
 293sub init_remote_config {
 294        my ($self, $url, $no_write) = @_;
 295        $url = canonicalize_url($url);
 296        my $r = read_all_remotes();
 297        my $existing = find_existing_remote($url, $r);
 298        if ($existing) {
 299                unless ($no_write) {
 300                        print STDERR "Using existing ",
 301                                     "[svn-remote \"$existing\"]\n";
 302                }
 303                $self->{repo_id} = $existing;
 304        } elsif ($_minimize_url) {
 305                my $min_url = Git::SVN::Ra->new($url)->minimize_url;
 306                $existing = find_existing_remote($min_url, $r);
 307                if ($existing) {
 308                        unless ($no_write) {
 309                                print STDERR "Using existing ",
 310                                             "[svn-remote \"$existing\"]\n";
 311                        }
 312                        $self->{repo_id} = $existing;
 313                }
 314                if ($min_url ne $url) {
 315                        unless ($no_write) {
 316                                print STDERR "Using higher level of URL: ",
 317                                             "$url => $min_url\n";
 318                        }
 319                        my $old_path = $self->path;
 320                        $url =~ s!^\Q$min_url\E(/|$)!!;
 321                        $url = join_paths($url, $old_path);
 322                        $self->path($url);
 323                        $url = $min_url;
 324                }
 325        }
 326        my $orig_url;
 327        if (!$existing) {
 328                # verify that we aren't overwriting anything:
 329                $orig_url = eval {
 330                        command_oneline('config', '--get',
 331                                        "svn-remote.$self->{repo_id}.url")
 332                };
 333                if ($orig_url && ($orig_url ne $url)) {
 334                        die "svn-remote.$self->{repo_id}.url already set: ",
 335                            "$orig_url\nwanted to set to: $url\n";
 336                }
 337        }
 338        my ($xrepo_id, $xpath) = find_ref($self->refname);
 339        if (!$no_write && defined $xpath) {
 340                die "svn-remote.$xrepo_id.fetch already set to track ",
 341                    "$xpath:", $self->refname, "\n";
 342        }
 343        unless ($no_write) {
 344                command_noisy('config',
 345                              "svn-remote.$self->{repo_id}.url", $url);
 346                my $path = $self->path;
 347                $path =~ s{^/}{};
 348                $path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
 349                $self->path($path);
 350                command_noisy('config', '--add',
 351                              "svn-remote.$self->{repo_id}.fetch",
 352                              $self->path.":".$self->refname);
 353        }
 354        $self->url($url);
 355}
 356
 357sub find_by_url { # repos_root and, path are optional
 358        my ($class, $full_url, $repos_root, $path) = @_;
 359
 360        $full_url = canonicalize_url($full_url);
 361
 362        return undef unless defined $full_url;
 363        remove_username($full_url);
 364        remove_username($repos_root) if defined $repos_root;
 365        my $remotes = read_all_remotes();
 366        if (defined $full_url && defined $repos_root && !defined $path) {
 367                $path = $full_url;
 368                $path =~ s#^\Q$repos_root\E(?:/|$)##;
 369        }
 370        foreach my $repo_id (keys %$remotes) {
 371                my $u = $remotes->{$repo_id}->{url} or next;
 372                remove_username($u);
 373                next if defined $repos_root && $repos_root ne $u;
 374
 375                my $fetch = $remotes->{$repo_id}->{fetch} || {};
 376                foreach my $t (qw/branches tags/) {
 377                        foreach my $globspec (@{$remotes->{$repo_id}->{$t}}) {
 378                                resolve_local_globs($u, $fetch, $globspec);
 379                        }
 380                }
 381                my $p = $path;
 382                my $rwr = rewrite_root({repo_id => $repo_id});
 383                my $svm = $remotes->{$repo_id}->{svm}
 384                        if defined $remotes->{$repo_id}->{svm};
 385                unless (defined $p) {
 386                        $p = $full_url;
 387                        my $z = $u;
 388                        my $prefix = '';
 389                        if ($rwr) {
 390                                $z = $rwr;
 391                                remove_username($z);
 392                        } elsif (defined $svm) {
 393                                $z = $svm->{source};
 394                                $prefix = $svm->{replace};
 395                                $prefix =~ s#^\Q$u\E(?:/|$)##;
 396                                $prefix =~ s#/$##;
 397                        }
 398                        $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
 399                }
 400
 401                # remote fetch paths are not URI escaped.  Decode ours
 402                # so they match
 403                $p = uri_decode($p);
 404
 405                foreach my $f (keys %$fetch) {
 406                        next if $f ne $p;
 407                        return Git::SVN->new($fetch->{$f}, $repo_id, $f);
 408                }
 409        }
 410        undef;
 411}
 412
 413sub init {
 414        my ($class, $url, $path, $repo_id, $ref_id, $no_write) = @_;
 415        my $self = _new($class, $repo_id, $ref_id, $path);
 416        if (defined $url) {
 417                $self->init_remote_config($url, $no_write);
 418        }
 419        $self;
 420}
 421
 422sub find_ref {
 423        my ($ref_id) = @_;
 424        foreach (command(qw/config -l/)) {
 425                next unless m!^svn-remote\.(.+)\.fetch=
 426                              \s*(.*?)\s*:\s*(.+?)\s*$!x;
 427                my ($repo_id, $path, $ref) = ($1, $2, $3);
 428                if ($ref eq $ref_id) {
 429                        $path = '' if ($path =~ m#^\./?#);
 430                        return ($repo_id, $path);
 431                }
 432        }
 433        (undef, undef, undef);
 434}
 435
 436sub new {
 437        my ($class, $ref_id, $repo_id, $path) = @_;
 438        if (defined $ref_id && !defined $repo_id && !defined $path) {
 439                ($repo_id, $path) = find_ref($ref_id);
 440                if (!defined $repo_id) {
 441                        die "Could not find a \"svn-remote.*.fetch\" key ",
 442                            "in the repository configuration matching: ",
 443                            "$ref_id\n";
 444                }
 445        }
 446        my $self = _new($class, $repo_id, $ref_id, $path);
 447        if (!defined $self->path || !length $self->path) {
 448                my $fetch = command_oneline('config', '--get',
 449                                            "svn-remote.$repo_id.fetch",
 450                                            ":$ref_id\$") or
 451                     die "Failed to read \"svn-remote.$repo_id.fetch\" ",
 452                         "\":$ref_id\$\" in config\n";
 453                my($path) = split(/\s*:\s*/, $fetch);
 454                $self->path($path);
 455        }
 456        {
 457                my $path = $self->path;
 458                $path =~ s{\A/}{};
 459                $path =~ s{/\z}{};
 460                $self->path($path);
 461        }
 462        my $url = command_oneline('config', '--get',
 463                                  "svn-remote.$repo_id.url") or
 464                  die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
 465        $self->url($url);
 466        $self->{pushurl} = eval { command_oneline('config', '--get',
 467                                  "svn-remote.$repo_id.pushurl") };
 468        $self->rebuild;
 469        $self;
 470}
 471
 472sub refname {
 473        my ($refname) = $_[0]->{ref_id} ;
 474
 475        # It cannot end with a slash /, we'll throw up on this because
 476        # SVN can't have directories with a slash in their name, either:
 477        if ($refname =~ m{/$}) {
 478                die "ref: '$refname' ends with a trailing slash; this is ",
 479                    "not permitted by git or Subversion\n";
 480        }
 481
 482        # It cannot have ASCII control character space, tilde ~, caret ^,
 483        # colon :, question-mark ?, asterisk *, space, or open bracket [
 484        # anywhere.
 485        #
 486        # Additionally, % must be escaped because it is used for escaping
 487        # and we want our escaped refname to be reversible
 488        $refname =~ s{([ \%~\^:\?\*\[\t])}{sprintf('%%%02X',ord($1))}eg;
 489
 490        # no slash-separated component can begin with a dot .
 491        # /.* becomes /%2E*
 492        $refname =~ s{/\.}{/%2E}g;
 493
 494        # It cannot have two consecutive dots .. anywhere
 495        # .. becomes %2E%2E
 496        $refname =~ s{\.\.}{%2E%2E}g;
 497
 498        # trailing dots and .lock are not allowed
 499        # .$ becomes %2E and .lock becomes %2Elock
 500        $refname =~ s{\.(?=$|lock$)}{%2E};
 501
 502        # the sequence @{ is used to access the reflog
 503        # @{ becomes %40{
 504        $refname =~ s{\@\{}{%40\{}g;
 505
 506        return $refname;
 507}
 508
 509sub desanitize_refname {
 510        my ($refname) = @_;
 511        $refname =~ s{%(?:([0-9A-F]{2}))}{chr hex($1)}eg;
 512        return $refname;
 513}
 514
 515sub svm_uuid {
 516        my ($self) = @_;
 517        return $self->{svm}->{uuid} if $self->svm;
 518        $self->ra;
 519        unless ($self->{svm}) {
 520                die "SVM UUID not cached, and reading remotely failed\n";
 521        }
 522        $self->{svm}->{uuid};
 523}
 524
 525sub svm {
 526        my ($self) = @_;
 527        return $self->{svm} if $self->{svm};
 528        my $svm;
 529        # see if we have it in our config, first:
 530        eval {
 531                my $section = "svn-remote.$self->{repo_id}";
 532                $svm = {
 533                  source => tmp_config('--get', "$section.svm-source"),
 534                  uuid => tmp_config('--get', "$section.svm-uuid"),
 535                  replace => tmp_config('--get', "$section.svm-replace"),
 536                }
 537        };
 538        if ($svm && $svm->{source} && $svm->{uuid} && $svm->{replace}) {
 539                $self->{svm} = $svm;
 540        }
 541        $self->{svm};
 542}
 543
 544sub _set_svm_vars {
 545        my ($self, $ra) = @_;
 546        return $ra if $self->svm;
 547
 548        my @err = ( "useSvmProps set, but failed to read SVM properties\n",
 549                    "(svm:source, svm:uuid) ",
 550                    "from the following URLs:\n" );
 551        sub read_svm_props {
 552                my ($self, $ra, $path, $r) = @_;
 553                my $props = ($ra->get_dir($path, $r))[2];
 554                my $src = $props->{'svm:source'};
 555                my $uuid = $props->{'svm:uuid'};
 556                return undef if (!$src || !$uuid);
 557
 558                chomp($src, $uuid);
 559
 560                $uuid =~ m{^[0-9a-f\-]{30,}$}i
 561                    or die "doesn't look right - svm:uuid is '$uuid'\n";
 562
 563                # the '!' is used to mark the repos_root!/relative/path
 564                $src =~ s{/?!/?}{/};
 565                $src =~ s{/+$}{}; # no trailing slashes please
 566                # username is of no interest
 567                $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
 568
 569                my $replace = add_path_to_url($ra->url, $path);
 570
 571                my $section = "svn-remote.$self->{repo_id}";
 572                tmp_config("$section.svm-source", $src);
 573                tmp_config("$section.svm-replace", $replace);
 574                tmp_config("$section.svm-uuid", $uuid);
 575                $self->{svm} = {
 576                        source => $src,
 577                        uuid => $uuid,
 578                        replace => $replace
 579                };
 580        }
 581
 582        my $r = $ra->get_latest_revnum;
 583        my $path = $self->path;
 584        my %tried;
 585        while (length $path) {
 586                my $try = add_path_to_url($self->url, $path);
 587                unless ($tried{$try}) {
 588                        return $ra if $self->read_svm_props($ra, $path, $r);
 589                        $tried{$try} = 1;
 590                }
 591                $path =~ s#/?[^/]+$##;
 592        }
 593        die "Path: '$path' should be ''\n" if $path ne '';
 594        return $ra if $self->read_svm_props($ra, $path, $r);
 595        $tried{ add_path_to_url($self->url, $path) } = 1;
 596
 597        if ($ra->{repos_root} eq $self->url) {
 598                die @err, (map { "  $_\n" } keys %tried), "\n";
 599        }
 600
 601        # nope, make sure we're connected to the repository root:
 602        my $ok;
 603        my @tried_b;
 604        $path = $ra->{svn_path};
 605        $ra = Git::SVN::Ra->new($ra->{repos_root});
 606        while (length $path) {
 607                my $try = add_path_to_url($ra->url, $path);
 608                unless ($tried{$try}) {
 609                        $ok = $self->read_svm_props($ra, $path, $r);
 610                        last if $ok;
 611                        $tried{$try} = 1;
 612                }
 613                $path =~ s#/?[^/]+$##;
 614        }
 615        die "Path: '$path' should be ''\n" if $path ne '';
 616        $ok ||= $self->read_svm_props($ra, $path, $r);
 617        $tried{ add_path_to_url($ra->url, $path) } = 1;
 618        if (!$ok) {
 619                die @err, (map { "  $_\n" } keys %tried), "\n";
 620        }
 621        Git::SVN::Ra->new($self->url);
 622}
 623
 624sub svnsync {
 625        my ($self) = @_;
 626        return $self->{svnsync} if $self->{svnsync};
 627
 628        if ($self->no_metadata) {
 629                die "Can't have both 'noMetadata' and ",
 630                    "'useSvnsyncProps' options set!\n";
 631        }
 632        if ($self->rewrite_root) {
 633                die "Can't have both 'useSvnsyncProps' and 'rewriteRoot' ",
 634                    "options set!\n";
 635        }
 636        if ($self->rewrite_uuid) {
 637                die "Can't have both 'useSvnsyncProps' and 'rewriteUUID' ",
 638                    "options set!\n";
 639        }
 640
 641        my $svnsync;
 642        # see if we have it in our config, first:
 643        eval {
 644                my $section = "svn-remote.$self->{repo_id}";
 645
 646                my $url = tmp_config('--get', "$section.svnsync-url");
 647                ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
 648                   die "doesn't look right - svn:sync-from-url is '$url'\n";
 649
 650                my $uuid = tmp_config('--get', "$section.svnsync-uuid");
 651                ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
 652                   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
 653
 654                $svnsync = { url => $url, uuid => $uuid }
 655        };
 656        if ($svnsync && $svnsync->{url} && $svnsync->{uuid}) {
 657                return $self->{svnsync} = $svnsync;
 658        }
 659
 660        my $err = "useSvnsyncProps set, but failed to read " .
 661                  "svnsync property: svn:sync-from-";
 662        my $rp = $self->ra->rev_proplist(0);
 663
 664        my $url = $rp->{'svn:sync-from-url'} or die $err . "url\n";
 665        ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
 666                   die "doesn't look right - svn:sync-from-url is '$url'\n";
 667
 668        my $uuid = $rp->{'svn:sync-from-uuid'} or die $err . "uuid\n";
 669        ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
 670                   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
 671
 672        my $section = "svn-remote.$self->{repo_id}";
 673        tmp_config('--add', "$section.svnsync-uuid", $uuid);
 674        tmp_config('--add', "$section.svnsync-url", $url);
 675        return $self->{svnsync} = { url => $url, uuid => $uuid };
 676}
 677
 678# this allows us to memoize our SVN::Ra UUID locally and avoid a
 679# remote lookup (useful for 'git svn log').
 680sub ra_uuid {
 681        my ($self) = @_;
 682        unless ($self->{ra_uuid}) {
 683                my $key = "svn-remote.$self->{repo_id}.uuid";
 684                my $uuid = eval { tmp_config('--get', $key) };
 685                if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
 686                        $self->{ra_uuid} = $uuid;
 687                } else {
 688                        die "ra_uuid called without URL\n" unless $self->url;
 689                        $self->{ra_uuid} = $self->ra->get_uuid;
 690                        tmp_config('--add', $key, $self->{ra_uuid});
 691                }
 692        }
 693        $self->{ra_uuid};
 694}
 695
 696sub _set_repos_root {
 697        my ($self, $repos_root) = @_;
 698        my $k = "svn-remote.$self->{repo_id}.reposRoot";
 699        $repos_root ||= $self->ra->{repos_root};
 700        tmp_config($k, $repos_root);
 701        $repos_root;
 702}
 703
 704sub repos_root {
 705        my ($self) = @_;
 706        my $k = "svn-remote.$self->{repo_id}.reposRoot";
 707        eval { tmp_config('--get', $k) } || $self->_set_repos_root;
 708}
 709
 710sub ra {
 711        my ($self) = shift;
 712        my $ra = Git::SVN::Ra->new($self->url);
 713        $self->_set_repos_root($ra->{repos_root});
 714        if ($self->use_svm_props && !$self->{svm}) {
 715                if ($self->no_metadata) {
 716                        die "Can't have both 'noMetadata' and ",
 717                            "'useSvmProps' options set!\n";
 718                } elsif ($self->use_svnsync_props) {
 719                        die "Can't have both 'useSvnsyncProps' and ",
 720                            "'useSvmProps' options set!\n";
 721                }
 722                $ra = $self->_set_svm_vars($ra);
 723                $self->{-want_revprops} = 1;
 724        }
 725        $ra;
 726}
 727
 728# prop_walk(PATH, REV, SUB)
 729# -------------------------
 730# Recursively traverse PATH at revision REV and invoke SUB for each
 731# directory that contains a SVN property.  SUB will be invoked as
 732# follows:  &SUB(gs, path, props);  where `gs' is this instance of
 733# Git::SVN, `path' the path to the directory where the properties
 734# `props' were found.  The `path' will be relative to point of checkout,
 735# that is, if url://repo/trunk is the current Git branch, and that
 736# directory contains a sub-directory `d', SUB will be invoked with `/d/'
 737# as `path' (note the trailing `/').
 738sub prop_walk {
 739        my ($self, $path, $rev, $sub) = @_;
 740
 741        $path =~ s#^/##;
 742        my ($dirent, undef, $props) = $self->ra->get_dir($path, $rev);
 743        $path =~ s#^/*#/#g;
 744        my $p = $path;
 745        # Strip the irrelevant part of the path.
 746        $p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#;
 747        # Ensure the path is terminated by a `/'.
 748        $p =~ s#/*$#/#;
 749
 750        # The properties contain all the internal SVN stuff nobody
 751        # (usually) cares about.
 752        my $interesting_props = 0;
 753        foreach (keys %{$props}) {
 754                # If it doesn't start with `svn:', it must be a
 755                # user-defined property.
 756                ++$interesting_props and next if $_ !~ /^svn:/;
 757                # FIXME: Fragile, if SVN adds new public properties,
 758                # this needs to be updated.
 759                ++$interesting_props if /^svn:(?:ignore|keywords|executable
 760                                                 |eol-style|mime-type
 761                                                 |externals|needs-lock)$/x;
 762        }
 763        &$sub($self, $p, $props) if $interesting_props;
 764
 765        foreach (sort keys %$dirent) {
 766                next if $dirent->{$_}->{kind} != $SVN::Node::dir;
 767                $self->prop_walk($self->path . $p . $_, $rev, $sub);
 768        }
 769}
 770
 771sub last_rev { ($_[0]->last_rev_commit)[0] }
 772sub last_commit { ($_[0]->last_rev_commit)[1] }
 773
 774# returns the newest SVN revision number and newest commit SHA1
 775sub last_rev_commit {
 776        my ($self) = @_;
 777        if (defined $self->{last_rev} && defined $self->{last_commit}) {
 778                return ($self->{last_rev}, $self->{last_commit});
 779        }
 780        my $c = ::verify_ref($self->refname.'^0');
 781        if ($c && !$self->use_svm_props && !$self->no_metadata) {
 782                my $rev = (::cmt_metadata($c))[1];
 783                if (defined $rev) {
 784                        ($self->{last_rev}, $self->{last_commit}) = ($rev, $c);
 785                        return ($rev, $c);
 786                }
 787        }
 788        my $map_path = $self->map_path;
 789        unless (-e $map_path) {
 790                ($self->{last_rev}, $self->{last_commit}) = (undef, undef);
 791                return (undef, undef);
 792        }
 793        my ($rev, $commit) = $self->rev_map_max(1);
 794        ($self->{last_rev}, $self->{last_commit}) = ($rev, $commit);
 795        return ($rev, $commit);
 796}
 797
 798sub get_fetch_range {
 799        my ($self, $min, $max) = @_;
 800        $max ||= $self->ra->get_latest_revnum;
 801        $min ||= $self->rev_map_max;
 802        (++$min, $max);
 803}
 804
 805sub tmp_config {
 806        my (@args) = @_;
 807        my $old_def_config = "$ENV{GIT_DIR}/svn/config";
 808        my $config = "$ENV{GIT_DIR}/svn/.metadata";
 809        if (! -f $config && -f $old_def_config) {
 810                rename $old_def_config, $config or
 811                       die "Failed rename $old_def_config => $config: $!\n";
 812        }
 813        my $old_config = $ENV{GIT_CONFIG};
 814        $ENV{GIT_CONFIG} = $config;
 815        $@ = undef;
 816        my @ret = eval {
 817                unless (-f $config) {
 818                        mkfile($config);
 819                        open my $fh, '>', $config or
 820                            die "Can't open $config: $!\n";
 821                        print $fh "; This file is used internally by ",
 822                                  "git-svn\n" or die
 823                                  "Couldn't write to $config: $!\n";
 824                        print $fh "; You should not have to edit it\n" or
 825                              die "Couldn't write to $config: $!\n";
 826                        close $fh or die "Couldn't close $config: $!\n";
 827                }
 828                command('config', @args);
 829        };
 830        my $err = $@;
 831        if (defined $old_config) {
 832                $ENV{GIT_CONFIG} = $old_config;
 833        } else {
 834                delete $ENV{GIT_CONFIG};
 835        }
 836        die $err if $err;
 837        wantarray ? @ret : $ret[0];
 838}
 839
 840sub tmp_index_do {
 841        my ($self, $sub) = @_;
 842        my $old_index = $ENV{GIT_INDEX_FILE};
 843        $ENV{GIT_INDEX_FILE} = $self->{index};
 844        $@ = undef;
 845        my @ret = eval {
 846                my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#);
 847                mkpath([$dir]) unless -d $dir;
 848                &$sub;
 849        };
 850        my $err = $@;
 851        if (defined $old_index) {
 852                $ENV{GIT_INDEX_FILE} = $old_index;
 853        } else {
 854                delete $ENV{GIT_INDEX_FILE};
 855        }
 856        die $err if $err;
 857        wantarray ? @ret : $ret[0];
 858}
 859
 860sub assert_index_clean {
 861        my ($self, $treeish) = @_;
 862
 863        $self->tmp_index_do(sub {
 864                command_noisy('read-tree', $treeish) unless -e $self->{index};
 865                my $x = command_oneline('write-tree');
 866                my ($y) = (command(qw/cat-file commit/, $treeish) =~
 867                           /^tree ($::sha1)/mo);
 868                return if $y eq $x;
 869
 870                warn "Index mismatch: $y != $x\nrereading $treeish\n";
 871                unlink $self->{index} or die "unlink $self->{index}: $!\n";
 872                command_noisy('read-tree', $treeish);
 873                $x = command_oneline('write-tree');
 874                if ($y ne $x) {
 875                        fatal "trees ($treeish) $y != $x\n",
 876                              "Something is seriously wrong...";
 877                }
 878        });
 879}
 880
 881sub get_commit_parents {
 882        my ($self, $log_entry) = @_;
 883        my (%seen, @ret, @tmp);
 884        # legacy support for 'set-tree'; this is only used by set_tree_cb:
 885        if (my $ip = $self->{inject_parents}) {
 886                if (my $commit = delete $ip->{$log_entry->{revision}}) {
 887                        push @tmp, $commit;
 888                }
 889        }
 890        if (my $cur = ::verify_ref($self->refname.'^0')) {
 891                push @tmp, $cur;
 892        }
 893        if (my $ipd = $self->{inject_parents_dcommit}) {
 894                if (my $commit = delete $ipd->{$log_entry->{revision}}) {
 895                        push @tmp, @$commit;
 896                }
 897        }
 898        push @tmp, $_ foreach (@{$log_entry->{parents}}, @tmp);
 899        while (my $p = shift @tmp) {
 900                next if $seen{$p};
 901                $seen{$p} = 1;
 902                push @ret, $p;
 903        }
 904        @ret;
 905}
 906
 907sub rewrite_root {
 908        my ($self) = @_;
 909        return $self->{-rewrite_root} if exists $self->{-rewrite_root};
 910        my $k = "svn-remote.$self->{repo_id}.rewriteRoot";
 911        my $rwr = eval { command_oneline(qw/config --get/, $k) };
 912        if ($rwr) {
 913                $rwr =~ s#/+$##;
 914                if ($rwr !~ m#^[a-z\+]+://#) {
 915                        die "$rwr is not a valid URL (key: $k)\n";
 916                }
 917        }
 918        $self->{-rewrite_root} = $rwr;
 919}
 920
 921sub rewrite_uuid {
 922        my ($self) = @_;
 923        return $self->{-rewrite_uuid} if exists $self->{-rewrite_uuid};
 924        my $k = "svn-remote.$self->{repo_id}.rewriteUUID";
 925        my $rwid = eval { command_oneline(qw/config --get/, $k) };
 926        if ($rwid) {
 927                $rwid =~ s#/+$##;
 928                if ($rwid !~ m#^[a-f0-9]{8}-(?:[a-f0-9]{4}-){3}[a-f0-9]{12}$#) {
 929                        die "$rwid is not a valid UUID (key: $k)\n";
 930                }
 931        }
 932        $self->{-rewrite_uuid} = $rwid;
 933}
 934
 935sub metadata_url {
 936        my ($self) = @_;
 937        my $url = $self->rewrite_root || $self->url;
 938        return canonicalize_url( add_path_to_url( $url, $self->path ) );
 939}
 940
 941sub full_url {
 942        my ($self) = @_;
 943        return canonicalize_url( add_path_to_url( $self->url, $self->path ) );
 944}
 945
 946sub full_pushurl {
 947        my ($self) = @_;
 948        if ($self->{pushurl}) {
 949                return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) );
 950        } else {
 951                return $self->full_url;
 952        }
 953}
 954
 955sub set_commit_header_env {
 956        my ($log_entry) = @_;
 957        my %env;
 958        foreach my $ned (qw/NAME EMAIL DATE/) {
 959                foreach my $ac (qw/AUTHOR COMMITTER/) {
 960                        $env{"GIT_${ac}_${ned}"} = $ENV{"GIT_${ac}_${ned}"};
 961                }
 962        }
 963
 964        $ENV{GIT_AUTHOR_NAME} = $log_entry->{name};
 965        $ENV{GIT_AUTHOR_EMAIL} = $log_entry->{email};
 966        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date};
 967
 968        $ENV{GIT_COMMITTER_NAME} = (defined $log_entry->{commit_name})
 969                                                ? $log_entry->{commit_name}
 970                                                : $log_entry->{name};
 971        $ENV{GIT_COMMITTER_EMAIL} = (defined $log_entry->{commit_email})
 972                                                ? $log_entry->{commit_email}
 973                                                : $log_entry->{email};
 974        \%env;
 975}
 976
 977sub restore_commit_header_env {
 978        my ($env) = @_;
 979        foreach my $ned (qw/NAME EMAIL DATE/) {
 980                foreach my $ac (qw/AUTHOR COMMITTER/) {
 981                        my $k = "GIT_${ac}_${ned}";
 982                        if (defined $env->{$k}) {
 983                                $ENV{$k} = $env->{$k};
 984                        } else {
 985                                delete $ENV{$k};
 986                        }
 987                }
 988        }
 989}
 990
 991sub gc {
 992        command_noisy('gc', '--auto');
 993};
 994
 995sub do_git_commit {
 996        my ($self, $log_entry) = @_;
 997        my $lr = $self->last_rev;
 998        if (defined $lr && $lr >= $log_entry->{revision}) {
 999                die "Last fetched revision of ", $self->refname,
1000                    " was r$lr, but we are about to fetch: ",
1001                    "r$log_entry->{revision}!\n";
1002        }
1003        if (my $c = $self->rev_map_get($log_entry->{revision})) {
1004                croak "$log_entry->{revision} = $c already exists! ",
1005                      "Why are we refetching it?\n";
1006        }
1007        my $old_env = set_commit_header_env($log_entry);
1008        my $tree = $log_entry->{tree};
1009        if (!defined $tree) {
1010                $tree = $self->tmp_index_do(sub {
1011                                            command_oneline('write-tree') });
1012        }
1013        die "Tree is not a valid sha1: $tree\n" if $tree !~ /^$::sha1$/o;
1014
1015        my @exec = ('git', 'commit-tree', $tree);
1016        foreach ($self->get_commit_parents($log_entry)) {
1017                push @exec, '-p', $_;
1018        }
1019        defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
1020                                                                   or croak $!;
1021        binmode $msg_fh;
1022
1023        # we always get UTF-8 from SVN, but we may want our commits in
1024        # a different encoding.
1025        if (my $enc = Git::config('i18n.commitencoding')) {
1026                require Encode;
1027                Encode::from_to($log_entry->{log}, 'UTF-8', $enc);
1028        }
1029        print $msg_fh $log_entry->{log} or croak $!;
1030        restore_commit_header_env($old_env);
1031        unless ($self->no_metadata) {
1032                print $msg_fh "\ngit-svn-id: $log_entry->{metadata}\n"
1033                              or croak $!;
1034        }
1035        $msg_fh->flush == 0 or croak $!;
1036        close $msg_fh or croak $!;
1037        chomp(my $commit = do { local $/; <$out_fh> });
1038        close $out_fh or croak $!;
1039        waitpid $pid, 0;
1040        croak $? if $?;
1041        if ($commit !~ /^$::sha1$/o) {
1042                die "Failed to commit, invalid sha1: $commit\n";
1043        }
1044
1045        $self->rev_map_set($log_entry->{revision}, $commit, 1);
1046
1047        $self->{last_rev} = $log_entry->{revision};
1048        $self->{last_commit} = $commit;
1049        print "r$log_entry->{revision}" unless $::_q > 1;
1050        if (defined $log_entry->{svm_revision}) {
1051                 print " (\@$log_entry->{svm_revision})" unless $::_q > 1;
1052                 $self->rev_map_set($log_entry->{svm_revision}, $commit,
1053                                   0, $self->svm_uuid);
1054        }
1055        print " = $commit ($self->{ref_id})\n" unless $::_q > 1;
1056        if (--$_gc_nr == 0) {
1057                $_gc_nr = $_gc_period;
1058                gc();
1059        }
1060        return $commit;
1061}
1062
1063sub match_paths {
1064        my ($self, $paths, $r) = @_;
1065        return 1 if $self->path eq '';
1066        if (my $path = $paths->{"/".$self->path}) {
1067                return ($path->{action} eq 'D') ? 0 : 1;
1068        }
1069        $self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/};
1070        if (grep /$self->{path_regex}/, keys %$paths) {
1071                return 1;
1072        }
1073        my $c = '';
1074        foreach (split m#/#, $self->path) {
1075                $c .= "/$_";
1076                next unless ($paths->{$c} &&
1077                             ($paths->{$c}->{action} =~ /^[AR]$/));
1078                if ($self->ra->check_path($self->path, $r) ==
1079                    $SVN::Node::dir) {
1080                        return 1;
1081                }
1082        }
1083        return 0;
1084}
1085
1086sub find_parent_branch {
1087        my ($self, $paths, $rev) = @_;
1088        return undef unless $self->follow_parent;
1089        unless (defined $paths) {
1090                my $err_handler = $SVN::Error::handler;
1091                $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
1092                $self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1,
1093                                   sub { $paths = $_[0] });
1094                $SVN::Error::handler = $err_handler;
1095        }
1096        return undef unless defined $paths;
1097
1098        # look for a parent from another branch:
1099        my @b_path_components = split m#/#, $self->path;
1100        my @a_path_components;
1101        my $i;
1102        while (@b_path_components) {
1103                $i = $paths->{'/'.join('/', @b_path_components)};
1104                last if $i && defined $i->{copyfrom_path};
1105                unshift(@a_path_components, pop(@b_path_components));
1106        }
1107        return undef unless defined $i && defined $i->{copyfrom_path};
1108        my $branch_from = $i->{copyfrom_path};
1109        if (@a_path_components) {
1110                print STDERR "branch_from: $branch_from => ";
1111                $branch_from .= '/'.join('/', @a_path_components);
1112                print STDERR $branch_from, "\n";
1113        }
1114        my $r = $i->{copyfrom_rev};
1115        my $repos_root = $self->ra->{repos_root};
1116        my $url = $self->ra->url;
1117        my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) );
1118        print STDERR  "Found possible branch point: ",
1119                      "$new_url => ", $self->full_url, ", $r\n"
1120                      unless $::_q > 1;
1121        $branch_from =~ s#^/##;
1122        my $gs = $self->other_gs($new_url, $url,
1123                                 $branch_from, $r, $self->{ref_id});
1124        my ($r0, $parent) = $gs->find_rev_before($r, 1);
1125        {
1126                my ($base, $head);
1127                if (!defined $r0 || !defined $parent) {
1128                        ($base, $head) = parse_revision_argument(0, $r);
1129                } else {
1130                        if ($r0 < $r) {
1131                                $gs->ra->get_log([$gs->path], $r0 + 1, $r, 1,
1132                                        0, 1, sub { $base = $_[1] - 1 });
1133                        }
1134                }
1135                if (defined $base && $base <= $r) {
1136                        $gs->fetch($base, $r);
1137                }
1138                ($r0, $parent) = $gs->find_rev_before($r, 1);
1139        }
1140        if (defined $r0 && defined $parent) {
1141                print STDERR "Found branch parent: ($self->{ref_id}) $parent\n"
1142                             unless $::_q > 1;
1143                my $ed;
1144                if ($self->ra->can_do_switch) {
1145                        $self->assert_index_clean($parent);
1146                        print STDERR "Following parent with do_switch\n"
1147                                     unless $::_q > 1;
1148                        # do_switch works with svn/trunk >= r22312, but that
1149                        # is not included with SVN 1.4.3 (the latest version
1150                        # at the moment), so we can't rely on it
1151                        $self->{last_rev} = $r0;
1152                        $self->{last_commit} = $parent;
1153                        $ed = Git::SVN::Fetcher->new($self, $gs->path);
1154                        $gs->ra->gs_do_switch($r0, $rev, $gs,
1155                                              $self->full_url, $ed)
1156                          or die "SVN connection failed somewhere...\n";
1157                } elsif ($self->ra->trees_match($new_url, $r0,
1158                                                $self->full_url, $rev)) {
1159                        print STDERR "Trees match:\n",
1160                                     "  $new_url\@$r0\n",
1161                                     "  ${\$self->full_url}\@$rev\n",
1162                                     "Following parent with no changes\n"
1163                                     unless $::_q > 1;
1164                        $self->tmp_index_do(sub {
1165                            command_noisy('read-tree', $parent);
1166                        });
1167                        $self->{last_commit} = $parent;
1168                } else {
1169                        print STDERR "Following parent with do_update\n"
1170                                     unless $::_q > 1;
1171                        $ed = Git::SVN::Fetcher->new($self);
1172                        $self->ra->gs_do_update($rev, $rev, $self, $ed)
1173                          or die "SVN connection failed somewhere...\n";
1174                }
1175                print STDERR "Successfully followed parent\n" unless $::_q > 1;
1176                return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
1177        }
1178        return undef;
1179}
1180
1181sub do_fetch {
1182        my ($self, $paths, $rev) = @_;
1183        my $ed;
1184        my ($last_rev, @parents);
1185        if (my $lc = $self->last_commit) {
1186                # we can have a branch that was deleted, then re-added
1187                # under the same name but copied from another path, in
1188                # which case we'll have multiple parents (we don't
1189                # want to break the original ref or lose copypath info):
1190                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1191                        push @{$log_entry->{parents}}, $lc;
1192                        return $log_entry;
1193                }
1194                $ed = Git::SVN::Fetcher->new($self);
1195                $last_rev = $self->{last_rev};
1196                $ed->{c} = $lc;
1197                @parents = ($lc);
1198        } else {
1199                $last_rev = $rev;
1200                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1201                        return $log_entry;
1202                }
1203                $ed = Git::SVN::Fetcher->new($self);
1204        }
1205        unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
1206                die "SVN connection failed somewhere...\n";
1207        }
1208        $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
1209}
1210
1211sub mkemptydirs {
1212        my ($self, $r) = @_;
1213
1214        # add/remove/collect a paths table
1215        #
1216        # Paths are split into a tree of nodes, stored as a hash of hashes.
1217        #
1218        # Each node contains a 'path' entry for the path (if any) associated
1219        # with that node and a 'children' entry for any nodes under that
1220        # location.
1221        #
1222        # Removing a path requires a hash lookup for each component then
1223        # dropping that node (and anything under it), which is substantially
1224        # faster than a grep slice into a single hash of paths for large
1225        # numbers of paths.
1226        #
1227        # For a large (200K) number of empty_dir directives this reduces
1228        # scanning time to 3 seconds vs 10 minutes for grep+delete on a single
1229        # hash of paths.
1230        sub add_path {
1231                my ($paths_table, $path) = @_;
1232                my $node_ref;
1233
1234                foreach my $x (split('/', $path)) {
1235                        if (!exists($paths_table->{$x})) {
1236                                $paths_table->{$x} = { children => {} };
1237                        }
1238
1239                        $node_ref = $paths_table->{$x};
1240                        $paths_table = $paths_table->{$x}->{children};
1241                }
1242
1243                $node_ref->{path} = $path;
1244        }
1245
1246        sub remove_path {
1247                my ($paths_table, $path) = @_;
1248                my $nodes_ref;
1249                my $node_name;
1250
1251                foreach my $x (split('/', $path)) {
1252                        if (!exists($paths_table->{$x})) {
1253                                return;
1254                        }
1255
1256                        $nodes_ref = $paths_table;
1257                        $node_name = $x;
1258
1259                        $paths_table = $paths_table->{$x}->{children};
1260                }
1261
1262                delete($nodes_ref->{$node_name});
1263        }
1264
1265        sub collect_paths {
1266                my ($paths_table, $paths_ref) = @_;
1267
1268                foreach my $v (values %$paths_table) {
1269                        my $p = $v->{path};
1270                        my $c = $v->{children};
1271
1272                        collect_paths($c, $paths_ref);
1273
1274                        if (defined($p)) {
1275                                push(@$paths_ref, $p);
1276                        }
1277                }
1278        }
1279
1280        sub scan {
1281                my ($r, $paths_table, $line) = @_;
1282                if (defined $r && $line =~ /^r(\d+)$/) {
1283                        return 0 if $1 > $r;
1284                } elsif ($line =~ /^  \+empty_dir: (.+)$/) {
1285                        add_path($paths_table, $1);
1286                } elsif ($line =~ /^  \-empty_dir: (.+)$/) {
1287                        remove_path($paths_table, $1);
1288                }
1289                1; # continue
1290        };
1291
1292        my @empty_dirs;
1293        my %paths_table;
1294
1295        my $gz_file = "$self->{dir}/unhandled.log.gz";
1296        if (-f $gz_file) {
1297                if (!can_compress()) {
1298                        warn "Compress::Zlib could not be found; ",
1299                             "empty directories in $gz_file will not be read\n";
1300                } else {
1301                        my $gz = Compress::Zlib::gzopen($gz_file, "rb") or
1302                                die "Unable to open $gz_file: $!\n";
1303                        my $line;
1304                        while ($gz->gzreadline($line) > 0) {
1305                                scan($r, \%paths_table, $line) or last;
1306                        }
1307                        $gz->gzclose;
1308                }
1309        }
1310
1311        if (open my $fh, '<', "$self->{dir}/unhandled.log") {
1312                binmode $fh or croak "binmode: $!";
1313                while (<$fh>) {
1314                        scan($r, \%paths_table, $_) or last;
1315                }
1316                close $fh;
1317        }
1318
1319        collect_paths(\%paths_table, \@empty_dirs);
1320        my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
1321        foreach my $d (sort @empty_dirs) {
1322                $d = uri_decode($d);
1323                $d =~ s/$strip//;
1324                next unless length($d);
1325                next if -d $d;
1326                if (-e $d) {
1327                        warn "$d exists but is not a directory\n";
1328                } else {
1329                        print "creating empty directory: $d\n";
1330                        mkpath([$d]);
1331                }
1332        }
1333}
1334
1335sub get_untracked {
1336        my ($self, $ed) = @_;
1337        my @out;
1338        my $h = $ed->{empty};
1339        foreach (sort keys %$h) {
1340                my $act = $h->{$_} ? '+empty_dir' : '-empty_dir';
1341                push @out, "  $act: " . uri_encode($_);
1342                warn "W: $act: $_\n";
1343        }
1344        foreach my $t (qw/dir_prop file_prop/) {
1345                $h = $ed->{$t} or next;
1346                foreach my $path (sort keys %$h) {
1347                        my $ppath = $path eq '' ? '.' : $path;
1348                        foreach my $prop (sort keys %{$h->{$path}}) {
1349                                next if $SKIP_PROP{$prop};
1350                                my $v = $h->{$path}->{$prop};
1351                                my $t_ppath_prop = "$t: " .
1352                                                    uri_encode($ppath) . ' ' .
1353                                                    uri_encode($prop);
1354                                if (defined $v) {
1355                                        push @out, "  +$t_ppath_prop " .
1356                                                   uri_encode($v);
1357                                } else {
1358                                        push @out, "  -$t_ppath_prop";
1359                                }
1360                        }
1361                }
1362        }
1363        foreach my $t (qw/absent_file absent_directory/) {
1364                $h = $ed->{$t} or next;
1365                foreach my $parent (sort keys %$h) {
1366                        foreach my $path (sort @{$h->{$parent}}) {
1367                                push @out, "  $t: " .
1368                                           uri_encode("$parent/$path");
1369                                warn "W: $t: $parent/$path ",
1370                                     "Insufficient permissions?\n";
1371                        }
1372                }
1373        }
1374        \@out;
1375}
1376
1377# parse_svn_date(DATE)
1378# --------------------
1379# Given a date (in UTC) from Subversion, return a string in the format
1380# "<TZ Offset> <local date/time>" that Git will use.
1381#
1382# By default the parsed date will be in UTC; if $Git::SVN::_localtime
1383# is true we'll convert it to the local timezone instead.
1384sub parse_svn_date {
1385        my $date = shift || return '+0000 1970-01-01 00:00:00';
1386        my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
1387                                            (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
1388                                         croak "Unable to parse date: $date\n";
1389        my $parsed_date;    # Set next.
1390
1391        if ($Git::SVN::_localtime) {
1392                # Translate the Subversion datetime to an epoch time.
1393                # Begin by switching ourselves to $date's timezone, UTC.
1394                my $old_env_TZ = $ENV{TZ};
1395                $ENV{TZ} = 'UTC';
1396
1397                my $epoch_in_UTC =
1398                    Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y - 1900);
1399
1400                # Determine our local timezone (including DST) at the
1401                # time of $epoch_in_UTC.  $Git::SVN::Log::TZ stored the
1402                # value of TZ, if any, at the time we were run.
1403                if (defined $Git::SVN::Log::TZ) {
1404                        $ENV{TZ} = $Git::SVN::Log::TZ;
1405                } else {
1406                        delete $ENV{TZ};
1407                }
1408
1409                my $our_TZ = get_tz_offset();
1410
1411                # This converts $epoch_in_UTC into our local timezone.
1412                my ($sec, $min, $hour, $mday, $mon, $year,
1413                    $wday, $yday, $isdst) = localtime($epoch_in_UTC);
1414
1415                $parsed_date = sprintf('%s %04d-%02d-%02d %02d:%02d:%02d',
1416                                       $our_TZ, $year + 1900, $mon + 1,
1417                                       $mday, $hour, $min, $sec);
1418
1419                # Reset us to the timezone in effect when we entered
1420                # this routine.
1421                if (defined $old_env_TZ) {
1422                        $ENV{TZ} = $old_env_TZ;
1423                } else {
1424                        delete $ENV{TZ};
1425                }
1426        } else {
1427                $parsed_date = "+0000 $Y-$m-$d $H:$M:$S";
1428        }
1429
1430        return $parsed_date;
1431}
1432
1433sub other_gs {
1434        my ($self, $new_url, $url,
1435            $branch_from, $r, $old_ref_id) = @_;
1436        my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from);
1437        unless ($gs) {
1438                my $ref_id = $old_ref_id;
1439                $ref_id =~ s/\@\d+-*$//;
1440                $ref_id .= "\@$r";
1441                # just grow a tail if we're not unique enough :x
1442                $ref_id .= '-' while find_ref($ref_id);
1443                my ($u, $p, $repo_id) = ($new_url, '', $ref_id);
1444                if ($u =~ s#^\Q$url\E(/|$)##) {
1445                        $p = $u;
1446                        $u = $url;
1447                        $repo_id = $self->{repo_id};
1448                }
1449                while (1) {
1450                        # It is possible to tag two different subdirectories at
1451                        # the same revision.  If the url for an existing ref
1452                        # does not match, we must either find a ref with a
1453                        # matching url or create a new ref by growing a tail.
1454                        $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1);
1455                        my (undef, $max_commit) = $gs->rev_map_max(1);
1456                        last if (!$max_commit);
1457                        my ($url) = ::cmt_metadata($max_commit);
1458                        last if ($url eq $gs->metadata_url);
1459                        $ref_id .= '-';
1460                }
1461                print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1;
1462        }
1463        $gs
1464}
1465
1466sub call_authors_prog {
1467        my ($orig_author) = @_;
1468        $orig_author = command_oneline('rev-parse', '--sq-quote', $orig_author);
1469        my $author = `$::_authors_prog $orig_author`;
1470        if ($? != 0) {
1471                die "$::_authors_prog failed with exit code $?\n"
1472        }
1473        if ($author =~ /^\s*(.+?)\s*<(.*)>\s*$/) {
1474                my ($name, $email) = ($1, $2);
1475                $email = undef if length $2 == 0;
1476                return [$name, $email];
1477        } else {
1478                die "Author: $orig_author: $::_authors_prog returned "
1479                        . "invalid author format: $author\n";
1480        }
1481}
1482
1483sub check_author {
1484        my ($author) = @_;
1485        if (!defined $author || length $author == 0) {
1486                $author = '(no author)';
1487        }
1488        if (!defined $::users{$author}) {
1489                if (defined $::_authors_prog) {
1490                        $::users{$author} = call_authors_prog($author);
1491                } elsif (defined $::_authors) {
1492                        die "Author: $author not defined in $::_authors file\n";
1493                }
1494        }
1495        $author;
1496}
1497
1498sub find_extra_svk_parents {
1499        my ($self, $tickets, $parents) = @_;
1500        # aha!  svk:merge property changed...
1501        my @tickets = split "\n", $tickets;
1502        my @known_parents;
1503        for my $ticket ( @tickets ) {
1504                my ($uuid, $path, $rev) = split /:/, $ticket;
1505                if ( $uuid eq $self->ra_uuid ) {
1506                        my $repos_root = $self->url;
1507                        my $branch_from = $path;
1508                        $branch_from =~ s{^/}{};
1509                        my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ),
1510                                                 $repos_root,
1511                                                 $branch_from,
1512                                                 $rev,
1513                                                 $self->{ref_id});
1514                        if ( my $commit = $gs->rev_map_get($rev, $uuid) ) {
1515                                # wahey!  we found it, but it might be
1516                                # an old one (!)
1517                                push @known_parents, [ $rev, $commit ];
1518                        }
1519                }
1520        }
1521        # Ordering matters; highest-numbered commit merge tickets
1522        # first, as they may account for later merge ticket additions
1523        # or changes.
1524        @known_parents = map {$_->[1]} sort {$b->[0] <=> $a->[0]} @known_parents;
1525        for my $parent ( @known_parents ) {
1526                my @cmd = ('rev-list', $parent, map { "^$_" } @$parents );
1527                my ($msg_fh, $ctx) = command_output_pipe(@cmd);
1528                my $new;
1529                while ( <$msg_fh> ) {
1530                        $new=1;last;
1531                }
1532                command_close_pipe($msg_fh, $ctx);
1533                if ( $new ) {
1534                        print STDERR
1535                            "Found merge parent (svk:merge ticket): $parent\n";
1536                        push @$parents, $parent;
1537                }
1538        }
1539}
1540
1541sub lookup_svn_merge {
1542        my $uuid = shift;
1543        my $url = shift;
1544        my $source = shift;
1545        my $revs = shift;
1546
1547        my $path = $source;
1548        $path =~ s{^/}{};
1549        my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
1550        if ( !$gs ) {
1551                warn "Couldn't find revmap for $url$source\n";
1552                return;
1553        }
1554        my @ranges = split ",", $revs;
1555        my ($tip, $tip_commit);
1556        my @merged_commit_ranges;
1557        # find the tip
1558        for my $range ( @ranges ) {
1559                if ($range =~ /[*]$/) {
1560                        warn "W: Ignoring partial merge in svn:mergeinfo "
1561                                ."dirprop: $source:$range\n";
1562                        next;
1563                }
1564                my ($bottom, $top) = split "-", $range;
1565                $top ||= $bottom;
1566                my $bottom_commit = $gs->find_rev_after( $bottom, 1, $top );
1567                my $top_commit = $gs->find_rev_before( $top, 1, $bottom );
1568
1569                unless ($top_commit and $bottom_commit) {
1570                        warn "W: unknown path/rev in svn:mergeinfo "
1571                                ."dirprop: $source:$range\n";
1572                        next;
1573                }
1574
1575                if (scalar(command('rev-parse', "$bottom_commit^@"))) {
1576                        push @merged_commit_ranges,
1577                             "$bottom_commit^..$top_commit";
1578                } else {
1579                        push @merged_commit_ranges, "$top_commit";
1580                }
1581
1582                if ( !defined $tip or $top > $tip ) {
1583                        $tip = $top;
1584                        $tip_commit = $top_commit;
1585                }
1586        }
1587        return ($tip_commit, @merged_commit_ranges);
1588}
1589
1590sub _rev_list {
1591        my ($msg_fh, $ctx) = command_output_pipe(
1592                "rev-list", @_,
1593               );
1594        my @rv;
1595        while ( <$msg_fh> ) {
1596                chomp;
1597                push @rv, $_;
1598        }
1599        command_close_pipe($msg_fh, $ctx);
1600        @rv;
1601}
1602
1603sub check_cherry_pick2 {
1604        my $base = shift;
1605        my $tip = shift;
1606        my $parents = shift;
1607        my @ranges = @_;
1608        my %commits = map { $_ => 1 }
1609                _rev_list("--no-merges", $tip, "--not", $base, @$parents, "--");
1610        for my $range ( @ranges ) {
1611                delete @commits{_rev_list($range, "--")};
1612        }
1613        for my $commit (keys %commits) {
1614                if (has_no_changes($commit)) {
1615                        delete $commits{$commit};
1616                }
1617        }
1618        my @k = (keys %commits);
1619        return (scalar @k, $k[0]);
1620}
1621
1622sub has_no_changes {
1623        my $commit = shift;
1624
1625        my @revs = split / /, command_oneline(
1626                qw(rev-list --parents -1 -m), $commit);
1627
1628        # Commits with no parents, e.g. the start of a partial branch,
1629        # have changes by definition.
1630        return 1 if (@revs < 2);
1631
1632        # Commits with multiple parents, e.g a merge, have no changes
1633        # by definition.
1634        return 0 if (@revs > 2);
1635
1636        return (command_oneline("rev-parse", "$commit^{tree}") eq
1637                command_oneline("rev-parse", "$commit~1^{tree}"));
1638}
1639
1640sub tie_for_persistent_memoization {
1641        my $hash = shift;
1642        my $path = shift;
1643
1644        unless ($memo_backend) {
1645                if (eval { require Git::SVN::Memoize::YAML; 1}) {
1646                        $memo_backend = 1;
1647                } else {
1648                        require Memoize::Storable;
1649                        $memo_backend = -1;
1650                }
1651        }
1652
1653        if ($memo_backend > 0) {
1654                tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
1655        } else {
1656                tie %$hash => 'Memoize::Storable', "$path.db", 'nstore';
1657        }
1658}
1659
1660# The GIT_DIR environment variable is not always set until after the command
1661# line arguments are processed, so we can't memoize in a BEGIN block.
1662{
1663        my $memoized = 0;
1664
1665        sub memoize_svn_mergeinfo_functions {
1666                return if $memoized;
1667                $memoized = 1;
1668
1669                my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
1670                mkpath([$cache_path]) unless -d $cache_path;
1671
1672                my %lookup_svn_merge_cache;
1673                my %check_cherry_pick2_cache;
1674                my %has_no_changes_cache;
1675
1676                tie_for_persistent_memoization(\%lookup_svn_merge_cache,
1677                    "$cache_path/lookup_svn_merge");
1678                memoize 'lookup_svn_merge',
1679                        SCALAR_CACHE => 'FAULT',
1680                        LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
1681                ;
1682
1683                tie_for_persistent_memoization(\%check_cherry_pick2_cache,
1684                    "$cache_path/check_cherry_pick2");
1685                memoize 'check_cherry_pick2',
1686                        SCALAR_CACHE => 'FAULT',
1687                        LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
1688                ;
1689
1690                tie_for_persistent_memoization(\%has_no_changes_cache,
1691                    "$cache_path/has_no_changes");
1692                memoize 'has_no_changes',
1693                        SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
1694                        LIST_CACHE => 'FAULT',
1695                ;
1696        }
1697
1698        sub unmemoize_svn_mergeinfo_functions {
1699                return if not $memoized;
1700                $memoized = 0;
1701
1702                Memoize::unmemoize 'lookup_svn_merge';
1703                Memoize::unmemoize 'check_cherry_pick2';
1704                Memoize::unmemoize 'has_no_changes';
1705        }
1706
1707        sub clear_memoized_mergeinfo_caches {
1708                die "Only call this method in non-memoized context" if ($memoized);
1709
1710                my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
1711                return unless -d $cache_path;
1712
1713                for my $cache_file (("$cache_path/lookup_svn_merge",
1714                                     "$cache_path/check_cherry_pick", # old
1715                                     "$cache_path/check_cherry_pick2",
1716                                     "$cache_path/has_no_changes")) {
1717                        for my $suffix (qw(yaml db)) {
1718                                my $file = "$cache_file.$suffix";
1719                                next unless -e $file;
1720                                unlink($file) or die "unlink($file) failed: $!\n";
1721                        }
1722                }
1723        }
1724
1725
1726        Memoize::memoize 'Git::SVN::repos_root';
1727}
1728
1729END {
1730        # Force cache writeout explicitly instead of waiting for
1731        # global destruction to avoid segfault in Storable:
1732        # http://rt.cpan.org/Public/Bug/Display.html?id=36087
1733        unmemoize_svn_mergeinfo_functions();
1734}
1735
1736sub parents_exclude {
1737        my $parents = shift;
1738        my @commits = @_;
1739        return unless @commits;
1740
1741        my @excluded;
1742        my $excluded;
1743        do {
1744                my @cmd = ('rev-list', "-1", @commits, "--not", @$parents );
1745                $excluded = command_oneline(@cmd);
1746                if ( $excluded ) {
1747                        my @new;
1748                        my $found;
1749                        for my $commit ( @commits ) {
1750                                if ( $commit eq $excluded ) {
1751                                        push @excluded, $commit;
1752                                        $found++;
1753                                }
1754                                else {
1755                                        push @new, $commit;
1756                                }
1757                        }
1758                        die "saw commit '$excluded' in rev-list output, "
1759                                ."but we didn't ask for that commit (wanted: @commits --not @$parents)"
1760                                        unless $found;
1761                        @commits = @new;
1762                }
1763        }
1764                while ($excluded and @commits);
1765
1766        return @excluded;
1767}
1768
1769# Compute what's new in svn:mergeinfo.
1770sub mergeinfo_changes {
1771        my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
1772        my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
1773        my $old_minfo = {};
1774
1775        my $ra = $self->ra;
1776        # Give up if $old_path isn't in the repo.
1777        # This is probably a merge on a subtree.
1778        if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
1779                warn "W: ignoring svn:mergeinfo on $old_path, ",
1780                        "directory didn't exist in r$old_rev\n";
1781                return {};
1782        }
1783        my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev);
1784        if (defined $props->{"svn:mergeinfo"}) {
1785                my %omi = map {split ":", $_ } split "\n",
1786                        $props->{"svn:mergeinfo"};
1787                $old_minfo = \%omi;
1788        }
1789
1790        my %changes = ();
1791        foreach my $p (keys %minfo) {
1792                my $a = $old_minfo->{$p} || "";
1793                my $b = $minfo{$p};
1794                # Omit merged branches whose ranges lists are unchanged.
1795                next if $a eq $b;
1796                # Remove any common range list prefix.
1797                ($a ^ $b) =~ /^[\0]*/;
1798                my $common_prefix = rindex $b, ",", $+[0] - 1;
1799                $changes{$p} = substr $b, $common_prefix + 1;
1800        }
1801        print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
1802                scalar(keys %minfo), " sources, ",
1803                scalar(keys %changes), " changed\n";
1804
1805        return \%changes;
1806}
1807
1808# note: this function should only be called if the various dirprops
1809# have actually changed
1810sub find_extra_svn_parents {
1811        my ($self, $mergeinfo, $parents) = @_;
1812        # aha!  svk:merge property changed...
1813
1814        memoize_svn_mergeinfo_functions();
1815
1816        # We first search for merged tips which are not in our
1817        # history.  Then, we figure out which git revisions are in
1818        # that tip, but not this revision.  If all of those revisions
1819        # are now marked as merge, we can add the tip as a parent.
1820        my @merges = sort keys %$mergeinfo;
1821        my @merge_tips;
1822        my $url = $self->url;
1823        my $uuid = $self->ra_uuid;
1824        my @all_ranges;
1825        for my $merge ( @merges ) {
1826                my ($tip_commit, @ranges) =
1827                        lookup_svn_merge( $uuid, $url,
1828                                          $merge, $mergeinfo->{$merge} );
1829                unless (!$tip_commit or
1830                                grep { $_ eq $tip_commit } @$parents ) {
1831                        push @merge_tips, $tip_commit;
1832                        push @all_ranges, @ranges;
1833                } else {
1834                        push @merge_tips, undef;
1835                }
1836        }
1837
1838        my %excluded = map { $_ => 1 }
1839                parents_exclude($parents, grep { defined } @merge_tips);
1840
1841        # check merge tips for new parents
1842        my @new_parents;
1843        for my $merge_tip ( @merge_tips ) {
1844                my $merge = shift @merges;
1845                next unless $merge_tip and $excluded{$merge_tip};
1846                my $spec = "$merge:$mergeinfo->{$merge}";
1847
1848                # check out 'new' tips
1849                my $merge_base;
1850                eval {
1851                        $merge_base = command_oneline(
1852                                "merge-base",
1853                                @$parents, $merge_tip,
1854                        );
1855                };
1856                if ($@) {
1857                        die "An error occurred during merge-base"
1858                                unless $@->isa("Git::Error::Command");
1859
1860                        warn "W: Cannot find common ancestor between ".
1861                             "@$parents and $merge_tip. Ignoring merge info.\n";
1862                        next;
1863                }
1864
1865                # double check that there are no missing non-merge commits
1866                my ($ninc, $ifirst) = check_cherry_pick2(
1867                        $merge_base, $merge_tip,
1868                        $parents,
1869                        @all_ranges,
1870                       );
1871
1872                if ($ninc) {
1873                        warn "W: svn cherry-pick ignored ($spec) - missing " .
1874                                "$ninc commit(s) (eg $ifirst)\n";
1875                } else {
1876                        warn "Found merge parent ($spec): ", $merge_tip, "\n";
1877                        push @new_parents, $merge_tip;
1878                }
1879        }
1880
1881        # cater for merges which merge commits from multiple branches
1882        if ( @new_parents > 1 ) {
1883                for ( my $i = 0; $i <= $#new_parents; $i++ ) {
1884                        for ( my $j = 0; $j <= $#new_parents; $j++ ) {
1885                                next if $i == $j;
1886                                next unless $new_parents[$i];
1887                                next unless $new_parents[$j];
1888                                my $revs = command_oneline(
1889                                        "rev-list", "-1",
1890                                        "$new_parents[$i]..$new_parents[$j]",
1891                                       );
1892                                if ( !$revs ) {
1893                                        undef($new_parents[$j]);
1894                                }
1895                        }
1896                }
1897        }
1898        push @$parents, grep { defined } @new_parents;
1899}
1900
1901sub make_log_entry {
1902        my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
1903        my $untracked = $self->get_untracked($ed);
1904
1905        my @parents = @$parents;
1906        my $props = $ed->{dir_prop}{$self->path};
1907        if ( $props->{"svk:merge"} ) {
1908                $self->find_extra_svk_parents($props->{"svk:merge"}, \@parents);
1909        }
1910        if ( $props->{"svn:mergeinfo"} ) {
1911                my $mi_changes = $self->mergeinfo_changes
1912                        ($parent_path, $parent_rev,
1913                         $self->path, $rev,
1914                         $props->{"svn:mergeinfo"});
1915                $self->find_extra_svn_parents($mi_changes, \@parents);
1916        }
1917
1918        open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
1919        print $un "r$rev\n" or croak $!;
1920        print $un $_, "\n" foreach @$untracked;
1921        my %log_entry = ( parents => \@parents, revision => $rev,
1922                          log => '');
1923
1924        my $headrev;
1925        my $logged = delete $self->{logged_rev_props};
1926        if (!$logged || $self->{-want_revprops}) {
1927                my $rp = $self->ra->rev_proplist($rev);
1928                foreach (sort keys %$rp) {
1929                        my $v = $rp->{$_};
1930                        if (/^svn:(author|date|log)$/) {
1931                                $log_entry{$1} = $v;
1932                        } elsif ($_ eq 'svm:headrev') {
1933                                $headrev = $v;
1934                        } else {
1935                                print $un "  rev_prop: ", uri_encode($_), ' ',
1936                                          uri_encode($v), "\n";
1937                        }
1938                }
1939        } else {
1940                map { $log_entry{$_} = $logged->{$_} } keys %$logged;
1941        }
1942        close $un or croak $!;
1943
1944        $log_entry{date} = parse_svn_date($log_entry{date});
1945        $log_entry{log} .= "\n";
1946        my $author = $log_entry{author} = check_author($log_entry{author});
1947        my ($name, $email) = defined $::users{$author} ? @{$::users{$author}}
1948                                                       : ($author, undef);
1949
1950        my ($commit_name, $commit_email) = ($name, $email);
1951        if ($_use_log_author) {
1952                my $name_field;
1953                if ($log_entry{log} =~ /From:\s+(.*\S)\s*\n/i) {
1954                        $name_field = $1;
1955                } elsif ($log_entry{log} =~ /Signed-off-by:\s+(.*\S)\s*\n/i) {
1956                        $name_field = $1;
1957                }
1958                if (!defined $name_field) {
1959                        if (!defined $email) {
1960                                $email = $name;
1961                        }
1962                } elsif ($name_field =~ /(.*?)\s+<(.*)>/) {
1963                        ($name, $email) = ($1, $2);
1964                } elsif ($name_field =~ /(.*)@/) {
1965                        ($name, $email) = ($1, $name_field);
1966                } else {
1967                        ($name, $email) = ($name_field, $name_field);
1968                }
1969        }
1970        if (defined $headrev && $self->use_svm_props) {
1971                if ($self->rewrite_root) {
1972                        die "Can't have both 'useSvmProps' and 'rewriteRoot' ",
1973                            "options set!\n";
1974                }
1975                if ($self->rewrite_uuid) {
1976                        die "Can't have both 'useSvmProps' and 'rewriteUUID' ",
1977                            "options set!\n";
1978                }
1979                my ($uuid, $r) = $headrev =~ m{^([a-f\d\-]{30,}):(\d+)$}i;
1980                # we don't want "SVM: initializing mirror for junk" ...
1981                return undef if $r == 0;
1982                my $svm = $self->svm;
1983                if ($uuid ne $svm->{uuid}) {
1984                        die "UUID mismatch on SVM path:\n",
1985                            "expected: $svm->{uuid}\n",
1986                            "     got: $uuid\n";
1987                }
1988                my $full_url = $self->full_url;
1989                $full_url =~ s#^\Q$svm->{replace}\E(/|$)#$svm->{source}$1# or
1990                             die "Failed to replace '$svm->{replace}' with ",
1991                                 "'$svm->{source}' in $full_url\n";
1992                # throw away username for storing in records
1993                remove_username($full_url);
1994                $log_entry{metadata} = "$full_url\@$r $uuid";
1995                $log_entry{svm_revision} = $r;
1996                $email ||= "$author\@$uuid";
1997                $commit_email ||= "$author\@$uuid";
1998        } elsif ($self->use_svnsync_props) {
1999                my $full_url = canonicalize_url(
2000                        add_path_to_url( $self->svnsync->{url}, $self->path )
2001                );
2002                remove_username($full_url);
2003                my $uuid = $self->svnsync->{uuid};
2004                $log_entry{metadata} = "$full_url\@$rev $uuid";
2005                $email ||= "$author\@$uuid";
2006                $commit_email ||= "$author\@$uuid";
2007        } else {
2008                my $url = $self->metadata_url;
2009                remove_username($url);
2010                my $uuid = $self->rewrite_uuid || $self->ra->get_uuid;
2011                $log_entry{metadata} = "$url\@$rev " . $uuid;
2012                $email ||= "$author\@" . $uuid;
2013                $commit_email ||= "$author\@" . $uuid;
2014        }
2015        $log_entry{name} = $name;
2016        $log_entry{email} = $email;
2017        $log_entry{commit_name} = $commit_name;
2018        $log_entry{commit_email} = $commit_email;
2019        \%log_entry;
2020}
2021
2022sub fetch {
2023        my ($self, $min_rev, $max_rev, @parents) = @_;
2024        my ($last_rev, $last_commit) = $self->last_rev_commit;
2025        my ($base, $head) = $self->get_fetch_range($min_rev, $max_rev);
2026        $self->ra->gs_fetch_loop_common($base, $head, [$self]);
2027}
2028
2029sub set_tree_cb {
2030        my ($self, $log_entry, $tree, $rev, $date, $author) = @_;
2031        $self->{inject_parents} = { $rev => $tree };
2032        $self->fetch(undef, undef);
2033}
2034
2035sub set_tree {
2036        my ($self, $tree) = (shift, shift);
2037        my $log_entry = ::get_commit_entry($tree);
2038        unless ($self->{last_rev}) {
2039                fatal("Must have an existing revision to commit");
2040        }
2041        my %ed_opts = ( r => $self->{last_rev},
2042                        log => $log_entry->{log},
2043                        ra => $self->ra,
2044                        tree_a => $self->{last_commit},
2045                        tree_b => $tree,
2046                        editor_cb => sub {
2047                               $self->set_tree_cb($log_entry, $tree, @_) },
2048                        svn_path => $self->path );
2049        if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) {
2050                print "No changes\nr$self->{last_rev} = $tree\n";
2051        }
2052}
2053
2054sub rebuild_from_rev_db {
2055        my ($self, $path) = @_;
2056        my $r = -1;
2057        open my $fh, '<', $path or croak "open: $!";
2058        binmode $fh or croak "binmode: $!";
2059        while (<$fh>) {
2060                length($_) == 41 or croak "inconsistent size in ($_) != 41";
2061                chomp($_);
2062                ++$r;
2063                next if $_ eq ('0' x 40);
2064                $self->rev_map_set($r, $_);
2065                print "r$r = $_\n";
2066        }
2067        close $fh or croak "close: $!";
2068        unlink $path or croak "unlink: $!";
2069}
2070
2071#define a global associate map to record rebuild status
2072my %rebuild_status;
2073#define a global associate map to record rebuild verify status
2074my %rebuild_verify_status;
2075
2076sub rebuild {
2077        my ($self) = @_;
2078        my $map_path = $self->map_path;
2079        my $partial = (-e $map_path && ! -z $map_path);
2080        my $verify_key = $self->refname.'^0';
2081        if (!$rebuild_verify_status{$verify_key}) {
2082                my $verify_result = ::verify_ref($verify_key);
2083                if ($verify_result) {
2084                        $rebuild_verify_status{$verify_key} = 1;
2085                }
2086        }
2087        if (!$rebuild_verify_status{$verify_key}) {
2088                return;
2089        }
2090        if (!$partial && ($self->use_svm_props || $self->no_metadata)) {
2091                my $rev_db = $self->rev_db_path;
2092                $self->rebuild_from_rev_db($rev_db);
2093                if ($self->use_svm_props) {
2094                        my $svm_rev_db = $self->rev_db_path($self->svm_uuid);
2095                        $self->rebuild_from_rev_db($svm_rev_db);
2096                }
2097                $self->unlink_rev_db_symlink;
2098                return;
2099        }
2100        print "Rebuilding $map_path ...\n" if (!$partial);
2101        my ($base_rev, $head) = ($partial ? $self->rev_map_max_norebuild(1) :
2102                (undef, undef));
2103        my $key_value = ($head ? "$head.." : "") . $self->refname;
2104        if (exists $rebuild_status{$key_value}) {
2105                print "Done rebuilding $map_path\n" if (!$partial || !$head);
2106                my $rev_db_path = $self->rev_db_path;
2107                if (-f $self->rev_db_path) {
2108                        unlink $self->rev_db_path or croak "unlink: $!";
2109                }
2110                $self->unlink_rev_db_symlink;
2111                return;
2112        }
2113        my ($log, $ctx) =
2114                command_output_pipe(qw/rev-list --pretty=raw --reverse/,
2115                                $key_value,
2116                                '--');
2117        $rebuild_status{$key_value} = 1;
2118        my $metadata_url = $self->metadata_url;
2119        remove_username($metadata_url);
2120        my $svn_uuid = $self->rewrite_uuid || $self->ra_uuid;
2121        my $c;
2122        while (<$log>) {
2123                if ( m{^commit ($::sha1)$} ) {
2124                        $c = $1;
2125                        next;
2126                }
2127                next unless s{^\s*(git-svn-id:)}{$1};
2128                my ($url, $rev, $uuid) = ::extract_metadata($_);
2129                remove_username($url);
2130
2131                # ignore merges (from set-tree)
2132                next if (!defined $rev || !$uuid);
2133
2134                # if we merged or otherwise started elsewhere, this is
2135                # how we break out of it
2136                if (($uuid ne $svn_uuid) ||
2137                    ($metadata_url && $url && ($url ne $metadata_url))) {
2138                        next;
2139                }
2140                if ($partial && $head) {
2141                        print "Partial-rebuilding $map_path ...\n";
2142                        print "Currently at $base_rev = $head\n";
2143                        $head = undef;
2144                }
2145
2146                $self->rev_map_set($rev, $c);
2147                print "r$rev = $c\n";
2148        }
2149        command_close_pipe($log, $ctx);
2150        print "Done rebuilding $map_path\n" if (!$partial || !$head);
2151        my $rev_db_path = $self->rev_db_path;
2152        if (-f $self->rev_db_path) {
2153                unlink $self->rev_db_path or croak "unlink: $!";
2154        }
2155        $self->unlink_rev_db_symlink;
2156}
2157
2158# rev_map:
2159# Tie::File seems to be prone to offset errors if revisions get sparse,
2160# it's not that fast, either.  Tie::File is also not in Perl 5.6.  So
2161# one of my favorite modules is out :<  Next up would be one of the DBM
2162# modules, but I'm not sure which is most portable...
2163#
2164# This is the replacement for the rev_db format, which was too big
2165# and inefficient for large repositories with a lot of sparse history
2166# (mainly tags)
2167#
2168# The format is this:
2169#   - 24 bytes for every record,
2170#     * 4 bytes for the integer representing an SVN revision number
2171#     * 20 bytes representing the sha1 of a git commit
2172#   - No empty padding records like the old format
2173#     (except the last record, which can be overwritten)
2174#   - new records are written append-only since SVN revision numbers
2175#     increase monotonically
2176#   - lookups on SVN revision number are done via a binary search
2177#   - Piping the file to xxd -c24 is a good way of dumping it for
2178#     viewing or editing (piped back through xxd -r), should the need
2179#     ever arise.
2180#   - The last record can be padding revision with an all-zero sha1
2181#     This is used to optimize fetch performance when using multiple
2182#     "fetch" directives in .git/config
2183#
2184# These files are disposable unless noMetadata or useSvmProps is set
2185
2186sub _rev_map_set {
2187        my ($fh, $rev, $commit) = @_;
2188
2189        binmode $fh or croak "binmode: $!";
2190        my $size = (stat($fh))[7];
2191        ($size % 24) == 0 or croak "inconsistent size: $size";
2192
2193        my $wr_offset = 0;
2194        if ($size > 0) {
2195                sysseek($fh, -24, SEEK_END) or croak "seek: $!";
2196                my $read = sysread($fh, my $buf, 24) or croak "read: $!";
2197                $read == 24 or croak "read only $read bytes (!= 24)";
2198                my ($last_rev, $last_commit) = unpack(rev_map_fmt, $buf);
2199                if ($last_commit eq ('0' x40)) {
2200                        if ($size >= 48) {
2201                                sysseek($fh, -48, SEEK_END) or croak "seek: $!";
2202                                $read = sysread($fh, $buf, 24) or
2203                                    croak "read: $!";
2204                                $read == 24 or
2205                                    croak "read only $read bytes (!= 24)";
2206                                ($last_rev, $last_commit) =
2207                                    unpack(rev_map_fmt, $buf);
2208                                if ($last_commit eq ('0' x40)) {
2209                                        croak "inconsistent .rev_map\n";
2210                                }
2211                        }
2212                        if ($last_rev >= $rev) {
2213                                croak "last_rev is higher!: $last_rev >= $rev";
2214                        }
2215                        $wr_offset = -24;
2216                }
2217        }
2218        sysseek($fh, $wr_offset, SEEK_END) or croak "seek: $!";
2219        syswrite($fh, pack(rev_map_fmt, $rev, $commit), 24) == 24 or
2220          croak "write: $!";
2221}
2222
2223sub _rev_map_reset {
2224        my ($fh, $rev, $commit) = @_;
2225        my $c = _rev_map_get($fh, $rev);
2226        $c eq $commit or die "_rev_map_reset(@_) commit $c does not match!\n";
2227        my $offset = sysseek($fh, 0, SEEK_CUR) or croak "seek: $!";
2228        truncate $fh, $offset or croak "truncate: $!";
2229}
2230
2231sub mkfile {
2232        my ($path) = @_;
2233        unless (-e $path) {
2234                my ($dir, $base) = ($path =~ m#^(.*?)/?([^/]+)$#);
2235                mkpath([$dir]) unless -d $dir;
2236                open my $fh, '>>', $path or die "Couldn't create $path: $!\n";
2237                close $fh or die "Couldn't close (create) $path: $!\n";
2238        }
2239}
2240
2241sub rev_map_set {
2242        my ($self, $rev, $commit, $update_ref, $uuid) = @_;
2243        defined $commit or die "missing arg3\n";
2244        length $commit == 40 or die "arg3 must be a full SHA1 hexsum\n";
2245        my $db = $self->map_path($uuid);
2246        my $db_lock = "$db.lock";
2247        my $sigmask;
2248        $update_ref ||= 0;
2249        if ($update_ref) {
2250                $sigmask = POSIX::SigSet->new();
2251                my $signew = POSIX::SigSet->new(SIGINT, SIGHUP, SIGTERM,
2252                        SIGALRM, SIGUSR1, SIGUSR2);
2253                sigprocmask(SIG_BLOCK, $signew, $sigmask) or
2254                        croak "Can't block signals: $!";
2255        }
2256        mkfile($db);
2257
2258        $LOCKFILES{$db_lock} = 1;
2259        my $sync;
2260        # both of these options make our .rev_db file very, very important
2261        # and we can't afford to lose it because rebuild() won't work
2262        if ($self->use_svm_props || $self->no_metadata) {
2263                require File::Copy;
2264                $sync = 1;
2265                File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ",
2266                                           "Failed to copy: ",
2267                                           "$db => $db_lock ($!)\n";
2268        } else {
2269                rename $db, $db_lock or die "rev_map_set(@_): ",
2270                                            "Failed to rename: ",
2271                                            "$db => $db_lock ($!)\n";
2272        }
2273
2274        sysopen(my $fh, $db_lock, O_RDWR | O_CREAT)
2275             or croak "Couldn't open $db_lock: $!\n";
2276        if ($update_ref eq 'reset') {
2277                clear_memoized_mergeinfo_caches();
2278                _rev_map_reset($fh, $rev, $commit);
2279        } else {
2280                _rev_map_set($fh, $rev, $commit);
2281        }
2282
2283        if ($sync) {
2284                $fh->flush or die "Couldn't flush $db_lock: $!\n";
2285                $fh->sync or die "Couldn't sync $db_lock: $!\n";
2286        }
2287        close $fh or croak $!;
2288        if ($update_ref) {
2289                $_head = $self;
2290                my $note = "";
2291                $note = " ($update_ref)" if ($update_ref !~ /^\d*$/);
2292                command_noisy('update-ref', '-m', "r$rev$note",
2293                              $self->refname, $commit);
2294        }
2295        rename $db_lock, $db or die "rev_map_set(@_): ", "Failed to rename: ",
2296                                    "$db_lock => $db ($!)\n";
2297        delete $LOCKFILES{$db_lock};
2298        if ($update_ref) {
2299                sigprocmask(SIG_SETMASK, $sigmask) or
2300                        croak "Can't restore signal mask: $!";
2301        }
2302}
2303
2304# If want_commit, this will return an array of (rev, commit) where
2305# commit _must_ be a valid commit in the archive.
2306# Otherwise, it'll return the max revision (whether or not the
2307# commit is valid or just a 0x40 placeholder).
2308sub rev_map_max {
2309        my ($self, $want_commit) = @_;
2310        $self->rebuild;
2311        my ($r, $c) = $self->rev_map_max_norebuild($want_commit);
2312        $want_commit ? ($r, $c) : $r;
2313}
2314
2315sub rev_map_max_norebuild {
2316        my ($self, $want_commit) = @_;
2317        my $map_path = $self->map_path;
2318        stat $map_path or return $want_commit ? (0, undef) : 0;
2319        sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2320        binmode $fh or croak "binmode: $!";
2321        my $size = (stat($fh))[7];
2322        ($size % 24) == 0 or croak "inconsistent size: $size";
2323
2324        if ($size == 0) {
2325                close $fh or croak "close: $!";
2326                return $want_commit ? (0, undef) : 0;
2327        }
2328
2329        sysseek($fh, -24, SEEK_END) or croak "seek: $!";
2330        sysread($fh, my $buf, 24) == 24 or croak "read: $!";
2331        my ($r, $c) = unpack(rev_map_fmt, $buf);
2332        if ($want_commit && $c eq ('0' x40)) {
2333                if ($size < 48) {
2334                        return $want_commit ? (0, undef) : 0;
2335                }
2336                sysseek($fh, -48, SEEK_END) or croak "seek: $!";
2337                sysread($fh, $buf, 24) == 24 or croak "read: $!";
2338                ($r, $c) = unpack(rev_map_fmt, $buf);
2339                if ($c eq ('0'x40)) {
2340                        croak "Penultimate record is all-zeroes in $map_path";
2341                }
2342        }
2343        close $fh or croak "close: $!";
2344        $want_commit ? ($r, $c) : $r;
2345}
2346
2347sub rev_map_get {
2348        my ($self, $rev, $uuid) = @_;
2349        my $map_path = $self->map_path($uuid);
2350        return undef unless -e $map_path;
2351
2352        sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2353        my $c = _rev_map_get($fh, $rev);
2354        close($fh) or croak "close: $!";
2355        $c
2356}
2357
2358sub _rev_map_get {
2359        my ($fh, $rev) = @_;
2360
2361        binmode $fh or croak "binmode: $!";
2362        my $size = (stat($fh))[7];
2363        ($size % 24) == 0 or croak "inconsistent size: $size";
2364
2365        if ($size == 0) {
2366                return undef;
2367        }
2368
2369        my ($l, $u) = (0, $size - 24);
2370        my ($r, $c, $buf);
2371
2372        while ($l <= $u) {
2373                my $i = int(($l/24 + $u/24) / 2) * 24;
2374                sysseek($fh, $i, SEEK_SET) or croak "seek: $!";
2375                sysread($fh, my $buf, 24) == 24 or croak "read: $!";
2376                my ($r, $c) = unpack(rev_map_fmt, $buf);
2377
2378                if ($r < $rev) {
2379                        $l = $i + 24;
2380                } elsif ($r > $rev) {
2381                        $u = $i - 24;
2382                } else { # $r == $rev
2383                        return $c eq ('0' x 40) ? undef : $c;
2384                }
2385        }
2386        undef;
2387}
2388
2389# Finds the first svn revision that exists on (if $eq_ok is true) or
2390# before $rev for the current branch.  It will not search any lower
2391# than $min_rev.  Returns the git commit hash and svn revision number
2392# if found, else (undef, undef).
2393sub find_rev_before {
2394        my ($self, $rev, $eq_ok, $min_rev) = @_;
2395        --$rev unless $eq_ok;
2396        $min_rev ||= 1;
2397        my $max_rev = $self->rev_map_max;
2398        $rev = $max_rev if ($rev > $max_rev);
2399        while ($rev >= $min_rev) {
2400                if (my $c = $self->rev_map_get($rev)) {
2401                        return ($rev, $c);
2402                }
2403                --$rev;
2404        }
2405        return (undef, undef);
2406}
2407
2408# Finds the first svn revision that exists on (if $eq_ok is true) or
2409# after $rev for the current branch.  It will not search any higher
2410# than $max_rev.  Returns the git commit hash and svn revision number
2411# if found, else (undef, undef).
2412sub find_rev_after {
2413        my ($self, $rev, $eq_ok, $max_rev) = @_;
2414        ++$rev unless $eq_ok;
2415        $max_rev ||= $self->rev_map_max;
2416        while ($rev <= $max_rev) {
2417                if (my $c = $self->rev_map_get($rev)) {
2418                        return ($rev, $c);
2419                }
2420                ++$rev;
2421        }
2422        return (undef, undef);
2423}
2424
2425sub _new {
2426        my ($class, $repo_id, $ref_id, $path) = @_;
2427        unless (defined $repo_id && length $repo_id) {
2428                $repo_id = $default_repo_id;
2429        }
2430        unless (defined $ref_id && length $ref_id) {
2431                # Access the prefix option from the git-svn main program if it's loaded.
2432                my $prefix = defined &::opt_prefix ? ::opt_prefix() : "";
2433                $_[2] = $ref_id =
2434                             "refs/remotes/$prefix$default_ref_id";
2435        }
2436        $_[1] = $repo_id;
2437        my $dir = "$ENV{GIT_DIR}/svn/$ref_id";
2438
2439        # Older repos imported by us used $GIT_DIR/svn/foo instead of
2440        # $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo
2441        if ($ref_id =~ m{^refs/remotes/(.+)}) {
2442                my $old_dir = "$ENV{GIT_DIR}/svn/$1";
2443                if (-d $old_dir && ! -d $dir) {
2444                        $dir = $old_dir;
2445                }
2446        }
2447
2448        $_[3] = $path = '' unless (defined $path);
2449        mkpath([$dir]);
2450        my $obj = bless {
2451                ref_id => $ref_id, dir => $dir, index => "$dir/index",
2452                config => "$ENV{GIT_DIR}/svn/config",
2453                map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
2454
2455        # Ensure it gets canonicalized
2456        $obj->path($path);
2457
2458        return $obj;
2459}
2460
2461sub path {
2462        my $self = shift;
2463
2464        if (@_) {
2465                my $path = shift;
2466                $self->{_path} = canonicalize_path($path);
2467                return;
2468        }
2469
2470        return $self->{_path};
2471}
2472
2473sub url {
2474        my $self = shift;
2475
2476        if (@_) {
2477                my $url = shift;
2478                $self->{url} = canonicalize_url($url);
2479                return;
2480        }
2481
2482        return $self->{url};
2483}
2484
2485# for read-only access of old .rev_db formats
2486sub unlink_rev_db_symlink {
2487        my ($self) = @_;
2488        my $link = $self->rev_db_path;
2489        $link =~ s/\.[\w-]+$// or croak "missing UUID at the end of $link";
2490        if (-l $link) {
2491                unlink $link or croak "unlink: $link failed!";
2492        }
2493}
2494
2495sub rev_db_path {
2496        my ($self, $uuid) = @_;
2497        my $db_path = $self->map_path($uuid);
2498        $db_path =~ s{/\.rev_map\.}{/\.rev_db\.}
2499            or croak "map_path: $db_path does not contain '/.rev_map.' !";
2500        $db_path;
2501}
2502
2503# the new replacement for .rev_db
2504sub map_path {
2505        my ($self, $uuid) = @_;
2506        $uuid ||= $self->ra_uuid;
2507        "$self->{map_root}.$uuid";
2508}
2509
2510sub uri_encode {
2511        my ($f) = @_;
2512        $f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#sprintf("%%%02X",ord($1))#eg;
2513        $f
2514}
2515
2516sub uri_decode {
2517        my ($f) = @_;
2518        $f =~ s#%([0-9a-fA-F]{2})#chr(hex($1))#eg;
2519        $f
2520}
2521
2522sub remove_username {
2523        $_[0] =~ s{^([^:]*://)[^@]+@}{$1};
2524}
2525
25261;