c71c041986857bcc9871e1f2af8edc3deaffb4f8
   1package Git::SVN;
   2use strict;
   3use warnings;
   4use Fcntl qw/:DEFAULT :seek/;
   5use constant rev_map_fmt => 'NH40';
   6use vars qw/$default_repo_id $default_ref_id $_no_metadata $_follow_parent
   7            $_repack $_repack_flags $_use_svm_props $_head
   8            $_use_svnsync_props $no_reuse_existing $_minimize_url
   9            $_use_log_author $_add_author_from $_localtime/;
  10use Carp qw/croak/;
  11use File::Path qw/mkpath/;
  12use File::Copy qw/copy/;
  13use IPC::Open3;
  14use Time::Local;
  15use Memoize;  # core since 5.8.0, Jul 2002
  16use Memoize::Storable;
  17use POSIX qw(:signal_h);
  18
  19use Git qw(
  20    command
  21    command_oneline
  22    command_noisy
  23    command_output_pipe
  24    command_close_pipe
  25);
  26use Git::SVN::Utils qw(fatal can_compress);
  27
  28my $can_use_yaml;
  29BEGIN {
  30        $can_use_yaml = eval { require Git::SVN::Memoize::YAML; 1};
  31}
  32
  33my ($_gc_nr, $_gc_period);
  34
  35# properties that we do not log:
  36my %SKIP_PROP;
  37BEGIN {
  38        %SKIP_PROP = map { $_ => 1 } qw/svn:wc:ra_dav:version-url
  39                                        svn:special svn:executable
  40                                        svn:entry:committed-rev
  41                                        svn:entry:last-author
  42                                        svn:entry:uuid
  43                                        svn:entry:committed-date/;
  44
  45        # some options are read globally, but can be overridden locally
  46        # per [svn-remote "..."] section.  Command-line options will *NOT*
  47        # override options set in an [svn-remote "..."] section
  48        no strict 'refs';
  49        for my $option (qw/follow_parent no_metadata use_svm_props
  50                           use_svnsync_props/) {
  51                my $key = $option;
  52                $key =~ tr/_//d;
  53                my $prop = "-$option";
  54                *$option = sub {
  55                        my ($self) = @_;
  56                        return $self->{$prop} if exists $self->{$prop};
  57                        my $k = "svn-remote.$self->{repo_id}.$key";
  58                        eval { command_oneline(qw/config --get/, $k) };
  59                        if ($@) {
  60                                $self->{$prop} = ${"Git::SVN::_$option"};
  61                        } else {
  62                                my $v = command_oneline(qw/config --bool/,$k);
  63                                $self->{$prop} = $v eq 'false' ? 0 : 1;
  64                        }
  65                        return $self->{$prop};
  66                }
  67        }
  68}
  69
  70
  71my (%LOCKFILES, %INDEX_FILES);
  72END {
  73        unlink keys %LOCKFILES if %LOCKFILES;
  74        unlink keys %INDEX_FILES if %INDEX_FILES;
  75}
  76
  77sub resolve_local_globs {
  78        my ($url, $fetch, $glob_spec) = @_;
  79        return unless defined $glob_spec;
  80        my $ref = $glob_spec->{ref};
  81        my $path = $glob_spec->{path};
  82        foreach (command(qw#for-each-ref --format=%(refname) refs/#)) {
  83                next unless m#^$ref->{regex}$#;
  84                my $p = $1;
  85                my $pathname = desanitize_refname($path->full_path($p));
  86                my $refname = desanitize_refname($ref->full_path($p));
  87                if (my $existing = $fetch->{$pathname}) {
  88                        if ($existing ne $refname) {
  89                                die "Refspec conflict:\n",
  90                                    "existing: $existing\n",
  91                                    " globbed: $refname\n";
  92                        }
  93                        my $u = (::cmt_metadata("$refname"))[0];
  94                        $u =~ s!^\Q$url\E(/|$)!! or die
  95                          "$refname: '$url' not found in '$u'\n";
  96                        if ($pathname ne $u) {
  97                                warn "W: Refspec glob conflict ",
  98                                     "(ref: $refname):\n",
  99                                     "expected path: $pathname\n",
 100                                     "    real path: $u\n",
 101                                     "Continuing ahead with $u\n";
 102                                next;
 103                        }
 104                } else {
 105                        $fetch->{$pathname} = $refname;
 106                }
 107        }
 108}
 109
 110sub parse_revision_argument {
 111        my ($base, $head) = @_;
 112        if (!defined $::_revision || $::_revision eq 'BASE:HEAD') {
 113                return ($base, $head);
 114        }
 115        return ($1, $2) if ($::_revision =~ /^(\d+):(\d+)$/);
 116        return ($::_revision, $::_revision) if ($::_revision =~ /^\d+$/);
 117        return ($head, $head) if ($::_revision eq 'HEAD');
 118        return ($base, $1) if ($::_revision =~ /^BASE:(\d+)$/);
 119        return ($1, $head) if ($::_revision =~ /^(\d+):HEAD$/);
 120        die "revision argument: $::_revision not understood by git-svn\n";
 121}
 122
 123sub fetch_all {
 124        my ($repo_id, $remotes) = @_;
 125        if (ref $repo_id) {
 126                my $gs = $repo_id;
 127                $repo_id = undef;
 128                $repo_id = $gs->{repo_id};
 129        }
 130        $remotes ||= read_all_remotes();
 131        my $remote = $remotes->{$repo_id} or
 132                     die "[svn-remote \"$repo_id\"] unknown\n";
 133        my $fetch = $remote->{fetch};
 134        my $url = $remote->{url} or die "svn-remote.$repo_id.url not defined\n";
 135        my (@gs, @globs);
 136        my $ra = Git::SVN::Ra->new($url);
 137        my $uuid = $ra->get_uuid;
 138        my $head = $ra->get_latest_revnum;
 139
 140        # ignore errors, $head revision may not even exist anymore
 141        eval { $ra->get_log("", $head, 0, 1, 0, 1, sub { $head = $_[1] }) };
 142        warn "W: $@\n" if $@;
 143
 144        my $base = defined $fetch ? $head : 0;
 145
 146        # read the max revs for wildcard expansion (branches/*, tags/*)
 147        foreach my $t (qw/branches tags/) {
 148                defined $remote->{$t} or next;
 149                push @globs, @{$remote->{$t}};
 150
 151                my $max_rev = eval { tmp_config(qw/--int --get/,
 152                                         "svn-remote.$repo_id.${t}-maxRev") };
 153                if (defined $max_rev && ($max_rev < $base)) {
 154                        $base = $max_rev;
 155                } elsif (!defined $max_rev) {
 156                        $base = 0;
 157                }
 158        }
 159
 160        if ($fetch) {
 161                foreach my $p (sort keys %$fetch) {
 162                        my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p);
 163                        my $lr = $gs->rev_map_max;
 164                        if (defined $lr) {
 165                                $base = $lr if ($lr < $base);
 166                        }
 167                        push @gs, $gs;
 168                }
 169        }
 170
 171        ($base, $head) = parse_revision_argument($base, $head);
 172        $ra->gs_fetch_loop_common($base, $head, \@gs, \@globs);
 173}
 174
 175sub read_all_remotes {
 176        my $r = {};
 177        my $use_svm_props = eval { command_oneline(qw/config --bool
 178            svn.useSvmProps/) };
 179        $use_svm_props = $use_svm_props eq 'true' if $use_svm_props;
 180        my $svn_refspec = qr{\s*(.*?)\s*:\s*(.+?)\s*};
 181        foreach (grep { s/^svn-remote\.// } command(qw/config -l/)) {
 182                if (m!^(.+)\.fetch=$svn_refspec$!) {
 183                        my ($remote, $local_ref, $remote_ref) = ($1, $2, $3);
 184                        die("svn-remote.$remote: remote ref '$remote_ref' "
 185                            . "must start with 'refs/'\n")
 186                                unless $remote_ref =~ m{^refs/};
 187                        $local_ref = uri_decode($local_ref);
 188                        $r->{$remote}->{fetch}->{$local_ref} = $remote_ref;
 189                        $r->{$remote}->{svm} = {} if $use_svm_props;
 190                } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
 191                        $r->{$1}->{svm} = {};
 192                } elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
 193                        $r->{$1}->{url} = $2;
 194                } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
 195                        $r->{$1}->{pushurl} = $2;
 196                } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
 197                        $r->{$1}->{ignore_refs_regex} = $2;
 198                } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
 199                        my ($remote, $t, $local_ref, $remote_ref) =
 200                                                             ($1, $2, $3, $4);
 201                        die("svn-remote.$remote: remote ref '$remote_ref' ($t) "
 202                            . "must start with 'refs/'\n")
 203                                unless $remote_ref =~ m{^refs/};
 204                        $local_ref = uri_decode($local_ref);
 205                        my $rs = {
 206                            t => $t,
 207                            remote => $remote,
 208                            path => Git::SVN::GlobSpec->new($local_ref, 1),
 209                            ref => Git::SVN::GlobSpec->new($remote_ref, 0) };
 210                        if (length($rs->{ref}->{right}) != 0) {
 211                                die "The '*' glob character must be the last ",
 212                                    "character of '$remote_ref'\n";
 213                        }
 214                        push @{ $r->{$remote}->{$t} }, $rs;
 215                }
 216        }
 217
 218        map {
 219                if (defined $r->{$_}->{svm}) {
 220                        my $svm;
 221                        eval {
 222                                my $section = "svn-remote.$_";
 223                                $svm = {
 224                                        source => tmp_config('--get',
 225                                            "$section.svm-source"),
 226                                        replace => tmp_config('--get',
 227                                            "$section.svm-replace"),
 228                                }
 229                        };
 230                        $r->{$_}->{svm} = $svm;
 231                }
 232        } keys %$r;
 233
 234        foreach my $remote (keys %$r) {
 235                foreach ( grep { defined $_ }
 236                          map { $r->{$remote}->{$_} } qw(branches tags) ) {
 237                        foreach my $rs ( @$_ ) {
 238                                $rs->{ignore_refs_regex} =
 239                                    $r->{$remote}->{ignore_refs_regex};
 240                        }
 241                }
 242        }
 243
 244        $r;
 245}
 246
 247sub init_vars {
 248        $_gc_nr = $_gc_period = 1000;
 249        if (defined $_repack || defined $_repack_flags) {
 250               warn "Repack options are obsolete; they have no effect.\n";
 251        }
 252}
 253
 254sub verify_remotes_sanity {
 255        return unless -d $ENV{GIT_DIR};
 256        my %seen;
 257        foreach (command(qw/config -l/)) {
 258                if (m!^svn-remote\.(?:.+)\.fetch=.*:refs/remotes/(\S+)\s*$!) {
 259                        if ($seen{$1}) {
 260                                die "Remote ref refs/remote/$1 is tracked by",
 261                                    "\n  \"$_\"\nand\n  \"$seen{$1}\"\n",
 262                                    "Please resolve this ambiguity in ",
 263                                    "your git configuration file before ",
 264                                    "continuing\n";
 265                        }
 266                        $seen{$1} = $_;
 267                }
 268        }
 269}
 270
 271sub find_existing_remote {
 272        my ($url, $remotes) = @_;
 273        return undef if $no_reuse_existing;
 274        my $existing;
 275        foreach my $repo_id (keys %$remotes) {
 276                my $u = $remotes->{$repo_id}->{url} or next;
 277                next if $u ne $url;
 278                $existing = $repo_id;
 279                last;
 280        }
 281        $existing;
 282}
 283
 284sub init_remote_config {
 285        my ($self, $url, $no_write) = @_;
 286        $url =~ s!/+$!!; # strip trailing slash
 287        my $r = read_all_remotes();
 288        my $existing = find_existing_remote($url, $r);
 289        if ($existing) {
 290                unless ($no_write) {
 291                        print STDERR "Using existing ",
 292                                     "[svn-remote \"$existing\"]\n";
 293                }
 294                $self->{repo_id} = $existing;
 295        } elsif ($_minimize_url) {
 296                my $min_url = Git::SVN::Ra->new($url)->minimize_url;
 297                $existing = find_existing_remote($min_url, $r);
 298                if ($existing) {
 299                        unless ($no_write) {
 300                                print STDERR "Using existing ",
 301                                             "[svn-remote \"$existing\"]\n";
 302                        }
 303                        $self->{repo_id} = $existing;
 304                }
 305                if ($min_url ne $url) {
 306                        unless ($no_write) {
 307                                print STDERR "Using higher level of URL: ",
 308                                             "$url => $min_url\n";
 309                        }
 310                        my $old_path = $self->{path};
 311                        $self->{path} = $url;
 312                        $self->{path} =~ s!^\Q$min_url\E(/|$)!!;
 313                        if (length $old_path) {
 314                                $self->{path} .= "/$old_path";
 315                        }
 316                        $url = $min_url;
 317                }
 318        }
 319        my $orig_url;
 320        if (!$existing) {
 321                # verify that we aren't overwriting anything:
 322                $orig_url = eval {
 323                        command_oneline('config', '--get',
 324                                        "svn-remote.$self->{repo_id}.url")
 325                };
 326                if ($orig_url && ($orig_url ne $url)) {
 327                        die "svn-remote.$self->{repo_id}.url already set: ",
 328                            "$orig_url\nwanted to set to: $url\n";
 329                }
 330        }
 331        my ($xrepo_id, $xpath) = find_ref($self->refname);
 332        if (!$no_write && defined $xpath) {
 333                die "svn-remote.$xrepo_id.fetch already set to track ",
 334                    "$xpath:", $self->refname, "\n";
 335        }
 336        unless ($no_write) {
 337                command_noisy('config',
 338                              "svn-remote.$self->{repo_id}.url", $url);
 339                $self->{path} =~ s{^/}{};
 340                $self->{path} =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
 341                command_noisy('config', '--add',
 342                              "svn-remote.$self->{repo_id}.fetch",
 343                              "$self->{path}:".$self->refname);
 344        }
 345        $self->{url} = $url;
 346}
 347
 348sub find_by_url { # repos_root and, path are optional
 349        my ($class, $full_url, $repos_root, $path) = @_;
 350
 351        return undef unless defined $full_url;
 352        remove_username($full_url);
 353        remove_username($repos_root) if defined $repos_root;
 354        my $remotes = read_all_remotes();
 355        if (defined $full_url && defined $repos_root && !defined $path) {
 356                $path = $full_url;
 357                $path =~ s#^\Q$repos_root\E(?:/|$)##;
 358        }
 359        foreach my $repo_id (keys %$remotes) {
 360                my $u = $remotes->{$repo_id}->{url} or next;
 361                remove_username($u);
 362                next if defined $repos_root && $repos_root ne $u;
 363
 364                my $fetch = $remotes->{$repo_id}->{fetch} || {};
 365                foreach my $t (qw/branches tags/) {
 366                        foreach my $globspec (@{$remotes->{$repo_id}->{$t}}) {
 367                                resolve_local_globs($u, $fetch, $globspec);
 368                        }
 369                }
 370                my $p = $path;
 371                my $rwr = rewrite_root({repo_id => $repo_id});
 372                my $svm = $remotes->{$repo_id}->{svm}
 373                        if defined $remotes->{$repo_id}->{svm};
 374                unless (defined $p) {
 375                        $p = $full_url;
 376                        my $z = $u;
 377                        my $prefix = '';
 378                        if ($rwr) {
 379                                $z = $rwr;
 380                                remove_username($z);
 381                        } elsif (defined $svm) {
 382                                $z = $svm->{source};
 383                                $prefix = $svm->{replace};
 384                                $prefix =~ s#^\Q$u\E(?:/|$)##;
 385                                $prefix =~ s#/$##;
 386                        }
 387                        $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
 388                }
 389                foreach my $f (keys %$fetch) {
 390                        next if $f ne $p;
 391                        return Git::SVN->new($fetch->{$f}, $repo_id, $f);
 392                }
 393        }
 394        undef;
 395}
 396
 397sub init {
 398        my ($class, $url, $path, $repo_id, $ref_id, $no_write) = @_;
 399        my $self = _new($class, $repo_id, $ref_id, $path);
 400        if (defined $url) {
 401                $self->init_remote_config($url, $no_write);
 402        }
 403        $self;
 404}
 405
 406sub find_ref {
 407        my ($ref_id) = @_;
 408        foreach (command(qw/config -l/)) {
 409                next unless m!^svn-remote\.(.+)\.fetch=
 410                              \s*(.*?)\s*:\s*(.+?)\s*$!x;
 411                my ($repo_id, $path, $ref) = ($1, $2, $3);
 412                if ($ref eq $ref_id) {
 413                        $path = '' if ($path =~ m#^\./?#);
 414                        return ($repo_id, $path);
 415                }
 416        }
 417        (undef, undef, undef);
 418}
 419
 420sub new {
 421        my ($class, $ref_id, $repo_id, $path) = @_;
 422        if (defined $ref_id && !defined $repo_id && !defined $path) {
 423                ($repo_id, $path) = find_ref($ref_id);
 424                if (!defined $repo_id) {
 425                        die "Could not find a \"svn-remote.*.fetch\" key ",
 426                            "in the repository configuration matching: ",
 427                            "$ref_id\n";
 428                }
 429        }
 430        my $self = _new($class, $repo_id, $ref_id, $path);
 431        if (!defined $self->{path} || !length $self->{path}) {
 432                my $fetch = command_oneline('config', '--get',
 433                                            "svn-remote.$repo_id.fetch",
 434                                            ":$ref_id\$") or
 435                     die "Failed to read \"svn-remote.$repo_id.fetch\" ",
 436                         "\":$ref_id\$\" in config\n";
 437                ($self->{path}, undef) = split(/\s*:\s*/, $fetch);
 438        }
 439        $self->{path} =~ s{/+}{/}g;
 440        $self->{path} =~ s{\A/}{};
 441        $self->{path} =~ s{/\z}{};
 442        $self->{url} = command_oneline('config', '--get',
 443                                       "svn-remote.$repo_id.url") or
 444                  die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
 445        $self->{pushurl} = eval { command_oneline('config', '--get',
 446                                  "svn-remote.$repo_id.pushurl") };
 447        $self->rebuild;
 448        $self;
 449}
 450
 451sub refname {
 452        my ($refname) = $_[0]->{ref_id} ;
 453
 454        # It cannot end with a slash /, we'll throw up on this because
 455        # SVN can't have directories with a slash in their name, either:
 456        if ($refname =~ m{/$}) {
 457                die "ref: '$refname' ends with a trailing slash, this is ",
 458                    "not permitted by git nor Subversion\n";
 459        }
 460
 461        # It cannot have ASCII control character space, tilde ~, caret ^,
 462        # colon :, question-mark ?, asterisk *, space, or open bracket [
 463        # anywhere.
 464        #
 465        # Additionally, % must be escaped because it is used for escaping
 466        # and we want our escaped refname to be reversible
 467        $refname =~ s{([ \%~\^:\?\*\[\t])}{uc sprintf('%%%02x',ord($1))}eg;
 468
 469        # no slash-separated component can begin with a dot .
 470        # /.* becomes /%2E*
 471        $refname =~ s{/\.}{/%2E}g;
 472
 473        # It cannot have two consecutive dots .. anywhere
 474        # .. becomes %2E%2E
 475        $refname =~ s{\.\.}{%2E%2E}g;
 476
 477        # trailing dots and .lock are not allowed
 478        # .$ becomes %2E and .lock becomes %2Elock
 479        $refname =~ s{\.(?=$|lock$)}{%2E};
 480
 481        # the sequence @{ is used to access the reflog
 482        # @{ becomes %40{
 483        $refname =~ s{\@\{}{%40\{}g;
 484
 485        return $refname;
 486}
 487
 488sub desanitize_refname {
 489        my ($refname) = @_;
 490        $refname =~ s{%(?:([0-9A-F]{2}))}{chr hex($1)}eg;
 491        return $refname;
 492}
 493
 494sub svm_uuid {
 495        my ($self) = @_;
 496        return $self->{svm}->{uuid} if $self->svm;
 497        $self->ra;
 498        unless ($self->{svm}) {
 499                die "SVM UUID not cached, and reading remotely failed\n";
 500        }
 501        $self->{svm}->{uuid};
 502}
 503
 504sub svm {
 505        my ($self) = @_;
 506        return $self->{svm} if $self->{svm};
 507        my $svm;
 508        # see if we have it in our config, first:
 509        eval {
 510                my $section = "svn-remote.$self->{repo_id}";
 511                $svm = {
 512                  source => tmp_config('--get', "$section.svm-source"),
 513                  uuid => tmp_config('--get', "$section.svm-uuid"),
 514                  replace => tmp_config('--get', "$section.svm-replace"),
 515                }
 516        };
 517        if ($svm && $svm->{source} && $svm->{uuid} && $svm->{replace}) {
 518                $self->{svm} = $svm;
 519        }
 520        $self->{svm};
 521}
 522
 523sub _set_svm_vars {
 524        my ($self, $ra) = @_;
 525        return $ra if $self->svm;
 526
 527        my @err = ( "useSvmProps set, but failed to read SVM properties\n",
 528                    "(svm:source, svm:uuid) ",
 529                    "from the following URLs:\n" );
 530        sub read_svm_props {
 531                my ($self, $ra, $path, $r) = @_;
 532                my $props = ($ra->get_dir($path, $r))[2];
 533                my $src = $props->{'svm:source'};
 534                my $uuid = $props->{'svm:uuid'};
 535                return undef if (!$src || !$uuid);
 536
 537                chomp($src, $uuid);
 538
 539                $uuid =~ m{^[0-9a-f\-]{30,}$}i
 540                    or die "doesn't look right - svm:uuid is '$uuid'\n";
 541
 542                # the '!' is used to mark the repos_root!/relative/path
 543                $src =~ s{/?!/?}{/};
 544                $src =~ s{/+$}{}; # no trailing slashes please
 545                # username is of no interest
 546                $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
 547
 548                my $replace = $ra->{url};
 549                $replace .= "/$path" if length $path;
 550
 551                my $section = "svn-remote.$self->{repo_id}";
 552                tmp_config("$section.svm-source", $src);
 553                tmp_config("$section.svm-replace", $replace);
 554                tmp_config("$section.svm-uuid", $uuid);
 555                $self->{svm} = {
 556                        source => $src,
 557                        uuid => $uuid,
 558                        replace => $replace
 559                };
 560        }
 561
 562        my $r = $ra->get_latest_revnum;
 563        my $path = $self->{path};
 564        my %tried;
 565        while (length $path) {
 566                unless ($tried{"$self->{url}/$path"}) {
 567                        return $ra if $self->read_svm_props($ra, $path, $r);
 568                        $tried{"$self->{url}/$path"} = 1;
 569                }
 570                $path =~ s#/?[^/]+$##;
 571        }
 572        die "Path: '$path' should be ''\n" if $path ne '';
 573        return $ra if $self->read_svm_props($ra, $path, $r);
 574        $tried{"$self->{url}/$path"} = 1;
 575
 576        if ($ra->{repos_root} eq $self->{url}) {
 577                die @err, (map { "  $_\n" } keys %tried), "\n";
 578        }
 579
 580        # nope, make sure we're connected to the repository root:
 581        my $ok;
 582        my @tried_b;
 583        $path = $ra->{svn_path};
 584        $ra = Git::SVN::Ra->new($ra->{repos_root});
 585        while (length $path) {
 586                unless ($tried{"$ra->{url}/$path"}) {
 587                        $ok = $self->read_svm_props($ra, $path, $r);
 588                        last if $ok;
 589                        $tried{"$ra->{url}/$path"} = 1;
 590                }
 591                $path =~ s#/?[^/]+$##;
 592        }
 593        die "Path: '$path' should be ''\n" if $path ne '';
 594        $ok ||= $self->read_svm_props($ra, $path, $r);
 595        $tried{"$ra->{url}/$path"} = 1;
 596        if (!$ok) {
 597                die @err, (map { "  $_\n" } keys %tried), "\n";
 598        }
 599        Git::SVN::Ra->new($self->{url});
 600}
 601
 602sub svnsync {
 603        my ($self) = @_;
 604        return $self->{svnsync} if $self->{svnsync};
 605
 606        if ($self->no_metadata) {
 607                die "Can't have both 'noMetadata' and ",
 608                    "'useSvnsyncProps' options set!\n";
 609        }
 610        if ($self->rewrite_root) {
 611                die "Can't have both 'useSvnsyncProps' and 'rewriteRoot' ",
 612                    "options set!\n";
 613        }
 614        if ($self->rewrite_uuid) {
 615                die "Can't have both 'useSvnsyncProps' and 'rewriteUUID' ",
 616                    "options set!\n";
 617        }
 618
 619        my $svnsync;
 620        # see if we have it in our config, first:
 621        eval {
 622                my $section = "svn-remote.$self->{repo_id}";
 623
 624                my $url = tmp_config('--get', "$section.svnsync-url");
 625                ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
 626                   die "doesn't look right - svn:sync-from-url is '$url'\n";
 627
 628                my $uuid = tmp_config('--get', "$section.svnsync-uuid");
 629                ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
 630                   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
 631
 632                $svnsync = { url => $url, uuid => $uuid }
 633        };
 634        if ($svnsync && $svnsync->{url} && $svnsync->{uuid}) {
 635                return $self->{svnsync} = $svnsync;
 636        }
 637
 638        my $err = "useSvnsyncProps set, but failed to read " .
 639                  "svnsync property: svn:sync-from-";
 640        my $rp = $self->ra->rev_proplist(0);
 641
 642        my $url = $rp->{'svn:sync-from-url'} or die $err . "url\n";
 643        ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
 644                   die "doesn't look right - svn:sync-from-url is '$url'\n";
 645
 646        my $uuid = $rp->{'svn:sync-from-uuid'} or die $err . "uuid\n";
 647        ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
 648                   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
 649
 650        my $section = "svn-remote.$self->{repo_id}";
 651        tmp_config('--add', "$section.svnsync-uuid", $uuid);
 652        tmp_config('--add', "$section.svnsync-url", $url);
 653        return $self->{svnsync} = { url => $url, uuid => $uuid };
 654}
 655
 656# this allows us to memoize our SVN::Ra UUID locally and avoid a
 657# remote lookup (useful for 'git svn log').
 658sub ra_uuid {
 659        my ($self) = @_;
 660        unless ($self->{ra_uuid}) {
 661                my $key = "svn-remote.$self->{repo_id}.uuid";
 662                my $uuid = eval { tmp_config('--get', $key) };
 663                if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
 664                        $self->{ra_uuid} = $uuid;
 665                } else {
 666                        die "ra_uuid called without URL\n" unless $self->{url};
 667                        $self->{ra_uuid} = $self->ra->get_uuid;
 668                        tmp_config('--add', $key, $self->{ra_uuid});
 669                }
 670        }
 671        $self->{ra_uuid};
 672}
 673
 674sub _set_repos_root {
 675        my ($self, $repos_root) = @_;
 676        my $k = "svn-remote.$self->{repo_id}.reposRoot";
 677        $repos_root ||= $self->ra->{repos_root};
 678        tmp_config($k, $repos_root);
 679        $repos_root;
 680}
 681
 682sub repos_root {
 683        my ($self) = @_;
 684        my $k = "svn-remote.$self->{repo_id}.reposRoot";
 685        eval { tmp_config('--get', $k) } || $self->_set_repos_root;
 686}
 687
 688sub ra {
 689        my ($self) = shift;
 690        my $ra = Git::SVN::Ra->new($self->{url});
 691        $self->_set_repos_root($ra->{repos_root});
 692        if ($self->use_svm_props && !$self->{svm}) {
 693                if ($self->no_metadata) {
 694                        die "Can't have both 'noMetadata' and ",
 695                            "'useSvmProps' options set!\n";
 696                } elsif ($self->use_svnsync_props) {
 697                        die "Can't have both 'useSvnsyncProps' and ",
 698                            "'useSvmProps' options set!\n";
 699                }
 700                $ra = $self->_set_svm_vars($ra);
 701                $self->{-want_revprops} = 1;
 702        }
 703        $ra;
 704}
 705
 706# prop_walk(PATH, REV, SUB)
 707# -------------------------
 708# Recursively traverse PATH at revision REV and invoke SUB for each
 709# directory that contains a SVN property.  SUB will be invoked as
 710# follows:  &SUB(gs, path, props);  where `gs' is this instance of
 711# Git::SVN, `path' the path to the directory where the properties
 712# `props' were found.  The `path' will be relative to point of checkout,
 713# that is, if url://repo/trunk is the current Git branch, and that
 714# directory contains a sub-directory `d', SUB will be invoked with `/d/'
 715# as `path' (note the trailing `/').
 716sub prop_walk {
 717        my ($self, $path, $rev, $sub) = @_;
 718
 719        $path =~ s#^/##;
 720        my ($dirent, undef, $props) = $self->ra->get_dir($path, $rev);
 721        $path =~ s#^/*#/#g;
 722        my $p = $path;
 723        # Strip the irrelevant part of the path.
 724        $p =~ s#^/+\Q$self->{path}\E(/|$)#/#;
 725        # Ensure the path is terminated by a `/'.
 726        $p =~ s#/*$#/#;
 727
 728        # The properties contain all the internal SVN stuff nobody
 729        # (usually) cares about.
 730        my $interesting_props = 0;
 731        foreach (keys %{$props}) {
 732                # If it doesn't start with `svn:', it must be a
 733                # user-defined property.
 734                ++$interesting_props and next if $_ !~ /^svn:/;
 735                # FIXME: Fragile, if SVN adds new public properties,
 736                # this needs to be updated.
 737                ++$interesting_props if /^svn:(?:ignore|keywords|executable
 738                                                 |eol-style|mime-type
 739                                                 |externals|needs-lock)$/x;
 740        }
 741        &$sub($self, $p, $props) if $interesting_props;
 742
 743        foreach (sort keys %$dirent) {
 744                next if $dirent->{$_}->{kind} != $SVN::Node::dir;
 745                $self->prop_walk($self->{path} . $p . $_, $rev, $sub);
 746        }
 747}
 748
 749sub last_rev { ($_[0]->last_rev_commit)[0] }
 750sub last_commit { ($_[0]->last_rev_commit)[1] }
 751
 752# returns the newest SVN revision number and newest commit SHA1
 753sub last_rev_commit {
 754        my ($self) = @_;
 755        if (defined $self->{last_rev} && defined $self->{last_commit}) {
 756                return ($self->{last_rev}, $self->{last_commit});
 757        }
 758        my $c = ::verify_ref($self->refname.'^0');
 759        if ($c && !$self->use_svm_props && !$self->no_metadata) {
 760                my $rev = (::cmt_metadata($c))[1];
 761                if (defined $rev) {
 762                        ($self->{last_rev}, $self->{last_commit}) = ($rev, $c);
 763                        return ($rev, $c);
 764                }
 765        }
 766        my $map_path = $self->map_path;
 767        unless (-e $map_path) {
 768                ($self->{last_rev}, $self->{last_commit}) = (undef, undef);
 769                return (undef, undef);
 770        }
 771        my ($rev, $commit) = $self->rev_map_max(1);
 772        ($self->{last_rev}, $self->{last_commit}) = ($rev, $commit);
 773        return ($rev, $commit);
 774}
 775
 776sub get_fetch_range {
 777        my ($self, $min, $max) = @_;
 778        $max ||= $self->ra->get_latest_revnum;
 779        $min ||= $self->rev_map_max;
 780        (++$min, $max);
 781}
 782
 783sub tmp_config {
 784        my (@args) = @_;
 785        my $old_def_config = "$ENV{GIT_DIR}/svn/config";
 786        my $config = "$ENV{GIT_DIR}/svn/.metadata";
 787        if (! -f $config && -f $old_def_config) {
 788                rename $old_def_config, $config or
 789                       die "Failed rename $old_def_config => $config: $!\n";
 790        }
 791        my $old_config = $ENV{GIT_CONFIG};
 792        $ENV{GIT_CONFIG} = $config;
 793        $@ = undef;
 794        my @ret = eval {
 795                unless (-f $config) {
 796                        mkfile($config);
 797                        open my $fh, '>', $config or
 798                            die "Can't open $config: $!\n";
 799                        print $fh "; This file is used internally by ",
 800                                  "git-svn\n" or die
 801                                  "Couldn't write to $config: $!\n";
 802                        print $fh "; You should not have to edit it\n" or
 803                              die "Couldn't write to $config: $!\n";
 804                        close $fh or die "Couldn't close $config: $!\n";
 805                }
 806                command('config', @args);
 807        };
 808        my $err = $@;
 809        if (defined $old_config) {
 810                $ENV{GIT_CONFIG} = $old_config;
 811        } else {
 812                delete $ENV{GIT_CONFIG};
 813        }
 814        die $err if $err;
 815        wantarray ? @ret : $ret[0];
 816}
 817
 818sub tmp_index_do {
 819        my ($self, $sub) = @_;
 820        my $old_index = $ENV{GIT_INDEX_FILE};
 821        $ENV{GIT_INDEX_FILE} = $self->{index};
 822        $@ = undef;
 823        my @ret = eval {
 824                my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#);
 825                mkpath([$dir]) unless -d $dir;
 826                &$sub;
 827        };
 828        my $err = $@;
 829        if (defined $old_index) {
 830                $ENV{GIT_INDEX_FILE} = $old_index;
 831        } else {
 832                delete $ENV{GIT_INDEX_FILE};
 833        }
 834        die $err if $err;
 835        wantarray ? @ret : $ret[0];
 836}
 837
 838sub assert_index_clean {
 839        my ($self, $treeish) = @_;
 840
 841        $self->tmp_index_do(sub {
 842                command_noisy('read-tree', $treeish) unless -e $self->{index};
 843                my $x = command_oneline('write-tree');
 844                my ($y) = (command(qw/cat-file commit/, $treeish) =~
 845                           /^tree ($::sha1)/mo);
 846                return if $y eq $x;
 847
 848                warn "Index mismatch: $y != $x\nrereading $treeish\n";
 849                unlink $self->{index} or die "unlink $self->{index}: $!\n";
 850                command_noisy('read-tree', $treeish);
 851                $x = command_oneline('write-tree');
 852                if ($y ne $x) {
 853                        fatal "trees ($treeish) $y != $x\n",
 854                              "Something is seriously wrong...";
 855                }
 856        });
 857}
 858
 859sub get_commit_parents {
 860        my ($self, $log_entry) = @_;
 861        my (%seen, @ret, @tmp);
 862        # legacy support for 'set-tree'; this is only used by set_tree_cb:
 863        if (my $ip = $self->{inject_parents}) {
 864                if (my $commit = delete $ip->{$log_entry->{revision}}) {
 865                        push @tmp, $commit;
 866                }
 867        }
 868        if (my $cur = ::verify_ref($self->refname.'^0')) {
 869                push @tmp, $cur;
 870        }
 871        if (my $ipd = $self->{inject_parents_dcommit}) {
 872                if (my $commit = delete $ipd->{$log_entry->{revision}}) {
 873                        push @tmp, @$commit;
 874                }
 875        }
 876        push @tmp, $_ foreach (@{$log_entry->{parents}}, @tmp);
 877        while (my $p = shift @tmp) {
 878                next if $seen{$p};
 879                $seen{$p} = 1;
 880                push @ret, $p;
 881        }
 882        @ret;
 883}
 884
 885sub rewrite_root {
 886        my ($self) = @_;
 887        return $self->{-rewrite_root} if exists $self->{-rewrite_root};
 888        my $k = "svn-remote.$self->{repo_id}.rewriteRoot";
 889        my $rwr = eval { command_oneline(qw/config --get/, $k) };
 890        if ($rwr) {
 891                $rwr =~ s#/+$##;
 892                if ($rwr !~ m#^[a-z\+]+://#) {
 893                        die "$rwr is not a valid URL (key: $k)\n";
 894                }
 895        }
 896        $self->{-rewrite_root} = $rwr;
 897}
 898
 899sub rewrite_uuid {
 900        my ($self) = @_;
 901        return $self->{-rewrite_uuid} if exists $self->{-rewrite_uuid};
 902        my $k = "svn-remote.$self->{repo_id}.rewriteUUID";
 903        my $rwid = eval { command_oneline(qw/config --get/, $k) };
 904        if ($rwid) {
 905                $rwid =~ s#/+$##;
 906                if ($rwid !~ m#^[a-f0-9]{8}-(?:[a-f0-9]{4}-){3}[a-f0-9]{12}$#) {
 907                        die "$rwid is not a valid UUID (key: $k)\n";
 908                }
 909        }
 910        $self->{-rewrite_uuid} = $rwid;
 911}
 912
 913sub metadata_url {
 914        my ($self) = @_;
 915        ($self->rewrite_root || $self->{url}) .
 916           (length $self->{path} ? '/' . $self->{path} : '');
 917}
 918
 919sub full_url {
 920        my ($self) = @_;
 921        $self->{url} . (length $self->{path} ? '/' . $self->{path} : '');
 922}
 923
 924sub full_pushurl {
 925        my ($self) = @_;
 926        if ($self->{pushurl}) {
 927                return $self->{pushurl} . (length $self->{path} ? '/' .
 928                       $self->{path} : '');
 929        } else {
 930                return $self->full_url;
 931        }
 932}
 933
 934sub set_commit_header_env {
 935        my ($log_entry) = @_;
 936        my %env;
 937        foreach my $ned (qw/NAME EMAIL DATE/) {
 938                foreach my $ac (qw/AUTHOR COMMITTER/) {
 939                        $env{"GIT_${ac}_${ned}"} = $ENV{"GIT_${ac}_${ned}"};
 940                }
 941        }
 942
 943        $ENV{GIT_AUTHOR_NAME} = $log_entry->{name};
 944        $ENV{GIT_AUTHOR_EMAIL} = $log_entry->{email};
 945        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date};
 946
 947        $ENV{GIT_COMMITTER_NAME} = (defined $log_entry->{commit_name})
 948                                                ? $log_entry->{commit_name}
 949                                                : $log_entry->{name};
 950        $ENV{GIT_COMMITTER_EMAIL} = (defined $log_entry->{commit_email})
 951                                                ? $log_entry->{commit_email}
 952                                                : $log_entry->{email};
 953        \%env;
 954}
 955
 956sub restore_commit_header_env {
 957        my ($env) = @_;
 958        foreach my $ned (qw/NAME EMAIL DATE/) {
 959                foreach my $ac (qw/AUTHOR COMMITTER/) {
 960                        my $k = "GIT_${ac}_${ned}";
 961                        if (defined $env->{$k}) {
 962                                $ENV{$k} = $env->{$k};
 963                        } else {
 964                                delete $ENV{$k};
 965                        }
 966                }
 967        }
 968}
 969
 970sub gc {
 971        command_noisy('gc', '--auto');
 972};
 973
 974sub do_git_commit {
 975        my ($self, $log_entry) = @_;
 976        my $lr = $self->last_rev;
 977        if (defined $lr && $lr >= $log_entry->{revision}) {
 978                die "Last fetched revision of ", $self->refname,
 979                    " was r$lr, but we are about to fetch: ",
 980                    "r$log_entry->{revision}!\n";
 981        }
 982        if (my $c = $self->rev_map_get($log_entry->{revision})) {
 983                croak "$log_entry->{revision} = $c already exists! ",
 984                      "Why are we refetching it?\n";
 985        }
 986        my $old_env = set_commit_header_env($log_entry);
 987        my $tree = $log_entry->{tree};
 988        if (!defined $tree) {
 989                $tree = $self->tmp_index_do(sub {
 990                                            command_oneline('write-tree') });
 991        }
 992        die "Tree is not a valid sha1: $tree\n" if $tree !~ /^$::sha1$/o;
 993
 994        my @exec = ('git', 'commit-tree', $tree);
 995        foreach ($self->get_commit_parents($log_entry)) {
 996                push @exec, '-p', $_;
 997        }
 998        defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
 999                                                                   or croak $!;
1000        binmode $msg_fh;
1001
1002        # we always get UTF-8 from SVN, but we may want our commits in
1003        # a different encoding.
1004        if (my $enc = Git::config('i18n.commitencoding')) {
1005                require Encode;
1006                Encode::from_to($log_entry->{log}, 'UTF-8', $enc);
1007        }
1008        print $msg_fh $log_entry->{log} or croak $!;
1009        restore_commit_header_env($old_env);
1010        unless ($self->no_metadata) {
1011                print $msg_fh "\ngit-svn-id: $log_entry->{metadata}\n"
1012                              or croak $!;
1013        }
1014        $msg_fh->flush == 0 or croak $!;
1015        close $msg_fh or croak $!;
1016        chomp(my $commit = do { local $/; <$out_fh> });
1017        close $out_fh or croak $!;
1018        waitpid $pid, 0;
1019        croak $? if $?;
1020        if ($commit !~ /^$::sha1$/o) {
1021                die "Failed to commit, invalid sha1: $commit\n";
1022        }
1023
1024        $self->rev_map_set($log_entry->{revision}, $commit, 1);
1025
1026        $self->{last_rev} = $log_entry->{revision};
1027        $self->{last_commit} = $commit;
1028        print "r$log_entry->{revision}" unless $::_q > 1;
1029        if (defined $log_entry->{svm_revision}) {
1030                 print " (\@$log_entry->{svm_revision})" unless $::_q > 1;
1031                 $self->rev_map_set($log_entry->{svm_revision}, $commit,
1032                                   0, $self->svm_uuid);
1033        }
1034        print " = $commit ($self->{ref_id})\n" unless $::_q > 1;
1035        if (--$_gc_nr == 0) {
1036                $_gc_nr = $_gc_period;
1037                gc();
1038        }
1039        return $commit;
1040}
1041
1042sub match_paths {
1043        my ($self, $paths, $r) = @_;
1044        return 1 if $self->{path} eq '';
1045        if (my $path = $paths->{"/$self->{path}"}) {
1046                return ($path->{action} eq 'D') ? 0 : 1;
1047        }
1048        $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\//;
1049        if (grep /$self->{path_regex}/, keys %$paths) {
1050                return 1;
1051        }
1052        my $c = '';
1053        foreach (split m#/#, $self->{path}) {
1054                $c .= "/$_";
1055                next unless ($paths->{$c} &&
1056                             ($paths->{$c}->{action} =~ /^[AR]$/));
1057                if ($self->ra->check_path($self->{path}, $r) ==
1058                    $SVN::Node::dir) {
1059                        return 1;
1060                }
1061        }
1062        return 0;
1063}
1064
1065sub find_parent_branch {
1066        my ($self, $paths, $rev) = @_;
1067        return undef unless $self->follow_parent;
1068        unless (defined $paths) {
1069                my $err_handler = $SVN::Error::handler;
1070                $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
1071                $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1,
1072                                   sub { $paths = $_[0] });
1073                $SVN::Error::handler = $err_handler;
1074        }
1075        return undef unless defined $paths;
1076
1077        # look for a parent from another branch:
1078        my @b_path_components = split m#/#, $self->{path};
1079        my @a_path_components;
1080        my $i;
1081        while (@b_path_components) {
1082                $i = $paths->{'/'.join('/', @b_path_components)};
1083                last if $i && defined $i->{copyfrom_path};
1084                unshift(@a_path_components, pop(@b_path_components));
1085        }
1086        return undef unless defined $i && defined $i->{copyfrom_path};
1087        my $branch_from = $i->{copyfrom_path};
1088        if (@a_path_components) {
1089                print STDERR "branch_from: $branch_from => ";
1090                $branch_from .= '/'.join('/', @a_path_components);
1091                print STDERR $branch_from, "\n";
1092        }
1093        my $r = $i->{copyfrom_rev};
1094        my $repos_root = $self->ra->{repos_root};
1095        my $url = $self->ra->{url};
1096        my $new_url = $url . $branch_from;
1097        print STDERR  "Found possible branch point: ",
1098                      "$new_url => ", $self->full_url, ", $r\n"
1099                      unless $::_q > 1;
1100        $branch_from =~ s#^/##;
1101        my $gs = $self->other_gs($new_url, $url,
1102                                 $branch_from, $r, $self->{ref_id});
1103        my ($r0, $parent) = $gs->find_rev_before($r, 1);
1104        {
1105                my ($base, $head);
1106                if (!defined $r0 || !defined $parent) {
1107                        ($base, $head) = parse_revision_argument(0, $r);
1108                } else {
1109                        if ($r0 < $r) {
1110                                $gs->ra->get_log([$gs->{path}], $r0 + 1, $r, 1,
1111                                        0, 1, sub { $base = $_[1] - 1 });
1112                        }
1113                }
1114                if (defined $base && $base <= $r) {
1115                        $gs->fetch($base, $r);
1116                }
1117                ($r0, $parent) = $gs->find_rev_before($r, 1);
1118        }
1119        if (defined $r0 && defined $parent) {
1120                print STDERR "Found branch parent: ($self->{ref_id}) $parent\n"
1121                             unless $::_q > 1;
1122                my $ed;
1123                if ($self->ra->can_do_switch) {
1124                        $self->assert_index_clean($parent);
1125                        print STDERR "Following parent with do_switch\n"
1126                                     unless $::_q > 1;
1127                        # do_switch works with svn/trunk >= r22312, but that
1128                        # is not included with SVN 1.4.3 (the latest version
1129                        # at the moment), so we can't rely on it
1130                        $self->{last_rev} = $r0;
1131                        $self->{last_commit} = $parent;
1132                        $ed = Git::SVN::Fetcher->new($self, $gs->{path});
1133                        $gs->ra->gs_do_switch($r0, $rev, $gs,
1134                                              $self->full_url, $ed)
1135                          or die "SVN connection failed somewhere...\n";
1136                } elsif ($self->ra->trees_match($new_url, $r0,
1137                                                $self->full_url, $rev)) {
1138                        print STDERR "Trees match:\n",
1139                                     "  $new_url\@$r0\n",
1140                                     "  ${\$self->full_url}\@$rev\n",
1141                                     "Following parent with no changes\n"
1142                                     unless $::_q > 1;
1143                        $self->tmp_index_do(sub {
1144                            command_noisy('read-tree', $parent);
1145                        });
1146                        $self->{last_commit} = $parent;
1147                } else {
1148                        print STDERR "Following parent with do_update\n"
1149                                     unless $::_q > 1;
1150                        $ed = Git::SVN::Fetcher->new($self);
1151                        $self->ra->gs_do_update($rev, $rev, $self, $ed)
1152                          or die "SVN connection failed somewhere...\n";
1153                }
1154                print STDERR "Successfully followed parent\n" unless $::_q > 1;
1155                return $self->make_log_entry($rev, [$parent], $ed);
1156        }
1157        return undef;
1158}
1159
1160sub do_fetch {
1161        my ($self, $paths, $rev) = @_;
1162        my $ed;
1163        my ($last_rev, @parents);
1164        if (my $lc = $self->last_commit) {
1165                # we can have a branch that was deleted, then re-added
1166                # under the same name but copied from another path, in
1167                # which case we'll have multiple parents (we don't
1168                # want to break the original ref, nor lose copypath info):
1169                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1170                        push @{$log_entry->{parents}}, $lc;
1171                        return $log_entry;
1172                }
1173                $ed = Git::SVN::Fetcher->new($self);
1174                $last_rev = $self->{last_rev};
1175                $ed->{c} = $lc;
1176                @parents = ($lc);
1177        } else {
1178                $last_rev = $rev;
1179                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1180                        return $log_entry;
1181                }
1182                $ed = Git::SVN::Fetcher->new($self);
1183        }
1184        unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
1185                die "SVN connection failed somewhere...\n";
1186        }
1187        $self->make_log_entry($rev, \@parents, $ed);
1188}
1189
1190sub mkemptydirs {
1191        my ($self, $r) = @_;
1192
1193        sub scan {
1194                my ($r, $empty_dirs, $line) = @_;
1195                if (defined $r && $line =~ /^r(\d+)$/) {
1196                        return 0 if $1 > $r;
1197                } elsif ($line =~ /^  \+empty_dir: (.+)$/) {
1198                        $empty_dirs->{$1} = 1;
1199                } elsif ($line =~ /^  \-empty_dir: (.+)$/) {
1200                        my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs);
1201                        delete @$empty_dirs{@d};
1202                }
1203                1; # continue
1204        };
1205
1206        my %empty_dirs = ();
1207        my $gz_file = "$self->{dir}/unhandled.log.gz";
1208        if (-f $gz_file) {
1209                if (!can_compress()) {
1210                        warn "Compress::Zlib could not be found; ",
1211                             "empty directories in $gz_file will not be read\n";
1212                } else {
1213                        my $gz = Compress::Zlib::gzopen($gz_file, "rb") or
1214                                die "Unable to open $gz_file: $!\n";
1215                        my $line;
1216                        while ($gz->gzreadline($line) > 0) {
1217                                scan($r, \%empty_dirs, $line) or last;
1218                        }
1219                        $gz->gzclose;
1220                }
1221        }
1222
1223        if (open my $fh, '<', "$self->{dir}/unhandled.log") {
1224                binmode $fh or croak "binmode: $!";
1225                while (<$fh>) {
1226                        scan($r, \%empty_dirs, $_) or last;
1227                }
1228                close $fh;
1229        }
1230
1231        my $strip = qr/\A\Q$self->{path}\E(?:\/|$)/;
1232        foreach my $d (sort keys %empty_dirs) {
1233                $d = uri_decode($d);
1234                $d =~ s/$strip//;
1235                next unless length($d);
1236                next if -d $d;
1237                if (-e $d) {
1238                        warn "$d exists but is not a directory\n";
1239                } else {
1240                        print "creating empty directory: $d\n";
1241                        mkpath([$d]);
1242                }
1243        }
1244}
1245
1246sub get_untracked {
1247        my ($self, $ed) = @_;
1248        my @out;
1249        my $h = $ed->{empty};
1250        foreach (sort keys %$h) {
1251                my $act = $h->{$_} ? '+empty_dir' : '-empty_dir';
1252                push @out, "  $act: " . uri_encode($_);
1253                warn "W: $act: $_\n";
1254        }
1255        foreach my $t (qw/dir_prop file_prop/) {
1256                $h = $ed->{$t} or next;
1257                foreach my $path (sort keys %$h) {
1258                        my $ppath = $path eq '' ? '.' : $path;
1259                        foreach my $prop (sort keys %{$h->{$path}}) {
1260                                next if $SKIP_PROP{$prop};
1261                                my $v = $h->{$path}->{$prop};
1262                                my $t_ppath_prop = "$t: " .
1263                                                    uri_encode($ppath) . ' ' .
1264                                                    uri_encode($prop);
1265                                if (defined $v) {
1266                                        push @out, "  +$t_ppath_prop " .
1267                                                   uri_encode($v);
1268                                } else {
1269                                        push @out, "  -$t_ppath_prop";
1270                                }
1271                        }
1272                }
1273        }
1274        foreach my $t (qw/absent_file absent_directory/) {
1275                $h = $ed->{$t} or next;
1276                foreach my $parent (sort keys %$h) {
1277                        foreach my $path (sort @{$h->{$parent}}) {
1278                                push @out, "  $t: " .
1279                                           uri_encode("$parent/$path");
1280                                warn "W: $t: $parent/$path ",
1281                                     "Insufficient permissions?\n";
1282                        }
1283                }
1284        }
1285        \@out;
1286}
1287
1288sub get_tz {
1289        # some systmes don't handle or mishandle %z, so be creative.
1290        my $t = shift || time;
1291        my $gm = timelocal(gmtime($t));
1292        my $sign = qw( + + - )[ $t <=> $gm ];
1293        return sprintf("%s%02d%02d", $sign, (gmtime(abs($t - $gm)))[2,1]);
1294}
1295
1296# parse_svn_date(DATE)
1297# --------------------
1298# Given a date (in UTC) from Subversion, return a string in the format
1299# "<TZ Offset> <local date/time>" that Git will use.
1300#
1301# By default the parsed date will be in UTC; if $Git::SVN::_localtime
1302# is true we'll convert it to the local timezone instead.
1303sub parse_svn_date {
1304        my $date = shift || return '+0000 1970-01-01 00:00:00';
1305        my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
1306                                            (\d\d)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
1307                                         croak "Unable to parse date: $date\n";
1308        my $parsed_date;    # Set next.
1309
1310        if ($Git::SVN::_localtime) {
1311                # Translate the Subversion datetime to an epoch time.
1312                # Begin by switching ourselves to $date's timezone, UTC.
1313                my $old_env_TZ = $ENV{TZ};
1314                $ENV{TZ} = 'UTC';
1315
1316                my $epoch_in_UTC =
1317                    POSIX::strftime('%s', $S, $M, $H, $d, $m - 1, $Y - 1900);
1318
1319                # Determine our local timezone (including DST) at the
1320                # time of $epoch_in_UTC.  $Git::SVN::Log::TZ stored the
1321                # value of TZ, if any, at the time we were run.
1322                if (defined $Git::SVN::Log::TZ) {
1323                        $ENV{TZ} = $Git::SVN::Log::TZ;
1324                } else {
1325                        delete $ENV{TZ};
1326                }
1327
1328                my $our_TZ = get_tz();
1329
1330                # This converts $epoch_in_UTC into our local timezone.
1331                my ($sec, $min, $hour, $mday, $mon, $year,
1332                    $wday, $yday, $isdst) = localtime($epoch_in_UTC);
1333
1334                $parsed_date = sprintf('%s %04d-%02d-%02d %02d:%02d:%02d',
1335                                       $our_TZ, $year + 1900, $mon + 1,
1336                                       $mday, $hour, $min, $sec);
1337
1338                # Reset us to the timezone in effect when we entered
1339                # this routine.
1340                if (defined $old_env_TZ) {
1341                        $ENV{TZ} = $old_env_TZ;
1342                } else {
1343                        delete $ENV{TZ};
1344                }
1345        } else {
1346                $parsed_date = "+0000 $Y-$m-$d $H:$M:$S";
1347        }
1348
1349        return $parsed_date;
1350}
1351
1352sub other_gs {
1353        my ($self, $new_url, $url,
1354            $branch_from, $r, $old_ref_id) = @_;
1355        my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from);
1356        unless ($gs) {
1357                my $ref_id = $old_ref_id;
1358                $ref_id =~ s/\@\d+-*$//;
1359                $ref_id .= "\@$r";
1360                # just grow a tail if we're not unique enough :x
1361                $ref_id .= '-' while find_ref($ref_id);
1362                my ($u, $p, $repo_id) = ($new_url, '', $ref_id);
1363                if ($u =~ s#^\Q$url\E(/|$)##) {
1364                        $p = $u;
1365                        $u = $url;
1366                        $repo_id = $self->{repo_id};
1367                }
1368                while (1) {
1369                        # It is possible to tag two different subdirectories at
1370                        # the same revision.  If the url for an existing ref
1371                        # does not match, we must either find a ref with a
1372                        # matching url or create a new ref by growing a tail.
1373                        $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1);
1374                        my (undef, $max_commit) = $gs->rev_map_max(1);
1375                        last if (!$max_commit);
1376                        my ($url) = ::cmt_metadata($max_commit);
1377                        last if ($url eq $gs->metadata_url);
1378                        $ref_id .= '-';
1379                }
1380                print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1;
1381        }
1382        $gs
1383}
1384
1385sub call_authors_prog {
1386        my ($orig_author) = @_;
1387        $orig_author = command_oneline('rev-parse', '--sq-quote', $orig_author);
1388        my $author = `$::_authors_prog $orig_author`;
1389        if ($? != 0) {
1390                die "$::_authors_prog failed with exit code $?\n"
1391        }
1392        if ($author =~ /^\s*(.+?)\s*<(.*)>\s*$/) {
1393                my ($name, $email) = ($1, $2);
1394                $email = undef if length $2 == 0;
1395                return [$name, $email];
1396        } else {
1397                die "Author: $orig_author: $::_authors_prog returned "
1398                        . "invalid author format: $author\n";
1399        }
1400}
1401
1402sub check_author {
1403        my ($author) = @_;
1404        if (!defined $author || length $author == 0) {
1405                $author = '(no author)';
1406        }
1407        if (!defined $::users{$author}) {
1408                if (defined $::_authors_prog) {
1409                        $::users{$author} = call_authors_prog($author);
1410                } elsif (defined $::_authors) {
1411                        die "Author: $author not defined in $::_authors file\n";
1412                }
1413        }
1414        $author;
1415}
1416
1417sub find_extra_svk_parents {
1418        my ($self, $ed, $tickets, $parents) = @_;
1419        # aha!  svk:merge property changed...
1420        my @tickets = split "\n", $tickets;
1421        my @known_parents;
1422        for my $ticket ( @tickets ) {
1423                my ($uuid, $path, $rev) = split /:/, $ticket;
1424                if ( $uuid eq $self->ra_uuid ) {
1425                        my $url = $self->{url};
1426                        my $repos_root = $url;
1427                        my $branch_from = $path;
1428                        $branch_from =~ s{^/}{};
1429                        my $gs = $self->other_gs($repos_root."/".$branch_from,
1430                                                 $url,
1431                                                 $branch_from,
1432                                                 $rev,
1433                                                 $self->{ref_id});
1434                        if ( my $commit = $gs->rev_map_get($rev, $uuid) ) {
1435                                # wahey!  we found it, but it might be
1436                                # an old one (!)
1437                                push @known_parents, [ $rev, $commit ];
1438                        }
1439                }
1440        }
1441        # Ordering matters; highest-numbered commit merge tickets
1442        # first, as they may account for later merge ticket additions
1443        # or changes.
1444        @known_parents = map {$_->[1]} sort {$b->[0] <=> $a->[0]} @known_parents;
1445        for my $parent ( @known_parents ) {
1446                my @cmd = ('rev-list', $parent, map { "^$_" } @$parents );
1447                my ($msg_fh, $ctx) = command_output_pipe(@cmd);
1448                my $new;
1449                while ( <$msg_fh> ) {
1450                        $new=1;last;
1451                }
1452                command_close_pipe($msg_fh, $ctx);
1453                if ( $new ) {
1454                        print STDERR
1455                            "Found merge parent (svk:merge ticket): $parent\n";
1456                        push @$parents, $parent;
1457                }
1458        }
1459}
1460
1461sub lookup_svn_merge {
1462        my $uuid = shift;
1463        my $url = shift;
1464        my $merge = shift;
1465
1466        my ($source, $revs) = split ":", $merge;
1467        my $path = $source;
1468        $path =~ s{^/}{};
1469        my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
1470        if ( !$gs ) {
1471                warn "Couldn't find revmap for $url$source\n";
1472                return;
1473        }
1474        my @ranges = split ",", $revs;
1475        my ($tip, $tip_commit);
1476        my @merged_commit_ranges;
1477        # find the tip
1478        for my $range ( @ranges ) {
1479                my ($bottom, $top) = split "-", $range;
1480                $top ||= $bottom;
1481                my $bottom_commit = $gs->find_rev_after( $bottom, 1, $top );
1482                my $top_commit = $gs->find_rev_before( $top, 1, $bottom );
1483
1484                unless ($top_commit and $bottom_commit) {
1485                        warn "W:unknown path/rev in svn:mergeinfo "
1486                                ."dirprop: $source:$range\n";
1487                        next;
1488                }
1489
1490                if (scalar(command('rev-parse', "$bottom_commit^@"))) {
1491                        push @merged_commit_ranges,
1492                             "$bottom_commit^..$top_commit";
1493                } else {
1494                        push @merged_commit_ranges, "$top_commit";
1495                }
1496
1497                if ( !defined $tip or $top > $tip ) {
1498                        $tip = $top;
1499                        $tip_commit = $top_commit;
1500                }
1501        }
1502        return ($tip_commit, @merged_commit_ranges);
1503}
1504
1505sub _rev_list {
1506        my ($msg_fh, $ctx) = command_output_pipe(
1507                "rev-list", @_,
1508               );
1509        my @rv;
1510        while ( <$msg_fh> ) {
1511                chomp;
1512                push @rv, $_;
1513        }
1514        command_close_pipe($msg_fh, $ctx);
1515        @rv;
1516}
1517
1518sub check_cherry_pick {
1519        my $base = shift;
1520        my $tip = shift;
1521        my $parents = shift;
1522        my @ranges = @_;
1523        my %commits = map { $_ => 1 }
1524                _rev_list("--no-merges", $tip, "--not", $base, @$parents, "--");
1525        for my $range ( @ranges ) {
1526                delete @commits{_rev_list($range, "--")};
1527        }
1528        for my $commit (keys %commits) {
1529                if (has_no_changes($commit)) {
1530                        delete $commits{$commit};
1531                }
1532        }
1533        return (keys %commits);
1534}
1535
1536sub has_no_changes {
1537        my $commit = shift;
1538
1539        my @revs = split / /, command_oneline(
1540                qw(rev-list --parents -1 -m), $commit);
1541
1542        # Commits with no parents, e.g. the start of a partial branch,
1543        # have changes by definition.
1544        return 1 if (@revs < 2);
1545
1546        # Commits with multiple parents, e.g a merge, have no changes
1547        # by definition.
1548        return 0 if (@revs > 2);
1549
1550        return (command_oneline("rev-parse", "$commit^{tree}") eq
1551                command_oneline("rev-parse", "$commit~1^{tree}"));
1552}
1553
1554sub tie_for_persistent_memoization {
1555        my $hash = shift;
1556        my $path = shift;
1557
1558        if ($can_use_yaml) {
1559                tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
1560        } else {
1561                tie %$hash => 'Memoize::Storable', "$path.db", 'nstore';
1562        }
1563}
1564
1565# The GIT_DIR environment variable is not always set until after the command
1566# line arguments are processed, so we can't memoize in a BEGIN block.
1567{
1568        my $memoized = 0;
1569
1570        sub memoize_svn_mergeinfo_functions {
1571                return if $memoized;
1572                $memoized = 1;
1573
1574                my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
1575                mkpath([$cache_path]) unless -d $cache_path;
1576
1577                my %lookup_svn_merge_cache;
1578                my %check_cherry_pick_cache;
1579                my %has_no_changes_cache;
1580
1581                tie_for_persistent_memoization(\%lookup_svn_merge_cache,
1582                    "$cache_path/lookup_svn_merge");
1583                memoize 'lookup_svn_merge',
1584                        SCALAR_CACHE => 'FAULT',
1585                        LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
1586                ;
1587
1588                tie_for_persistent_memoization(\%check_cherry_pick_cache,
1589                    "$cache_path/check_cherry_pick");
1590                memoize 'check_cherry_pick',
1591                        SCALAR_CACHE => 'FAULT',
1592                        LIST_CACHE => ['HASH' => \%check_cherry_pick_cache],
1593                ;
1594
1595                tie_for_persistent_memoization(\%has_no_changes_cache,
1596                    "$cache_path/has_no_changes");
1597                memoize 'has_no_changes',
1598                        SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
1599                        LIST_CACHE => 'FAULT',
1600                ;
1601        }
1602
1603        sub unmemoize_svn_mergeinfo_functions {
1604                return if not $memoized;
1605                $memoized = 0;
1606
1607                Memoize::unmemoize 'lookup_svn_merge';
1608                Memoize::unmemoize 'check_cherry_pick';
1609                Memoize::unmemoize 'has_no_changes';
1610        }
1611
1612        Memoize::memoize 'Git::SVN::repos_root';
1613}
1614
1615END {
1616        # Force cache writeout explicitly instead of waiting for
1617        # global destruction to avoid segfault in Storable:
1618        # http://rt.cpan.org/Public/Bug/Display.html?id=36087
1619        unmemoize_svn_mergeinfo_functions();
1620}
1621
1622sub parents_exclude {
1623        my $parents = shift;
1624        my @commits = @_;
1625        return unless @commits;
1626
1627        my @excluded;
1628        my $excluded;
1629        do {
1630                my @cmd = ('rev-list', "-1", @commits, "--not", @$parents );
1631                $excluded = command_oneline(@cmd);
1632                if ( $excluded ) {
1633                        my @new;
1634                        my $found;
1635                        for my $commit ( @commits ) {
1636                                if ( $commit eq $excluded ) {
1637                                        push @excluded, $commit;
1638                                        $found++;
1639                                        last;
1640                                }
1641                                else {
1642                                        push @new, $commit;
1643                                }
1644                        }
1645                        die "saw commit '$excluded' in rev-list output, "
1646                                ."but we didn't ask for that commit (wanted: @commits --not @$parents)"
1647                                        unless $found;
1648                        @commits = @new;
1649                }
1650        }
1651                while ($excluded and @commits);
1652
1653        return @excluded;
1654}
1655
1656
1657# note: this function should only be called if the various dirprops
1658# have actually changed
1659sub find_extra_svn_parents {
1660        my ($self, $ed, $mergeinfo, $parents) = @_;
1661        # aha!  svk:merge property changed...
1662
1663        memoize_svn_mergeinfo_functions();
1664
1665        # We first search for merged tips which are not in our
1666        # history.  Then, we figure out which git revisions are in
1667        # that tip, but not this revision.  If all of those revisions
1668        # are now marked as merge, we can add the tip as a parent.
1669        my @merges = split "\n", $mergeinfo;
1670        my @merge_tips;
1671        my $url = $self->{url};
1672        my $uuid = $self->ra_uuid;
1673        my %ranges;
1674        for my $merge ( @merges ) {
1675                my ($tip_commit, @ranges) =
1676                        lookup_svn_merge( $uuid, $url, $merge );
1677                unless (!$tip_commit or
1678                                grep { $_ eq $tip_commit } @$parents ) {
1679                        push @merge_tips, $tip_commit;
1680                        $ranges{$tip_commit} = \@ranges;
1681                } else {
1682                        push @merge_tips, undef;
1683                }
1684        }
1685
1686        my %excluded = map { $_ => 1 }
1687                parents_exclude($parents, grep { defined } @merge_tips);
1688
1689        # check merge tips for new parents
1690        my @new_parents;
1691        for my $merge_tip ( @merge_tips ) {
1692                my $spec = shift @merges;
1693                next unless $merge_tip and $excluded{$merge_tip};
1694
1695                my $ranges = $ranges{$merge_tip};
1696
1697                # check out 'new' tips
1698                my $merge_base;
1699                eval {
1700                        $merge_base = command_oneline(
1701                                "merge-base",
1702                                @$parents, $merge_tip,
1703                        );
1704                };
1705                if ($@) {
1706                        die "An error occurred during merge-base"
1707                                unless $@->isa("Git::Error::Command");
1708
1709                        warn "W: Cannot find common ancestor between ".
1710                             "@$parents and $merge_tip. Ignoring merge info.\n";
1711                        next;
1712                }
1713
1714                # double check that there are no missing non-merge commits
1715                my (@incomplete) = check_cherry_pick(
1716                        $merge_base, $merge_tip,
1717                        $parents,
1718                        @$ranges,
1719                       );
1720
1721                if ( @incomplete ) {
1722                        warn "W:svn cherry-pick ignored ($spec) - missing "
1723                                .@incomplete." commit(s) (eg $incomplete[0])\n";
1724                } else {
1725                        warn
1726                                "Found merge parent (svn:mergeinfo prop): ",
1727                                        $merge_tip, "\n";
1728                        push @new_parents, $merge_tip;
1729                }
1730        }
1731
1732        # cater for merges which merge commits from multiple branches
1733        if ( @new_parents > 1 ) {
1734                for ( my $i = 0; $i <= $#new_parents; $i++ ) {
1735                        for ( my $j = 0; $j <= $#new_parents; $j++ ) {
1736                                next if $i == $j;
1737                                next unless $new_parents[$i];
1738                                next unless $new_parents[$j];
1739                                my $revs = command_oneline(
1740                                        "rev-list", "-1",
1741                                        "$new_parents[$i]..$new_parents[$j]",
1742                                       );
1743                                if ( !$revs ) {
1744                                        undef($new_parents[$j]);
1745                                }
1746                        }
1747                }
1748        }
1749        push @$parents, grep { defined } @new_parents;
1750}
1751
1752sub make_log_entry {
1753        my ($self, $rev, $parents, $ed) = @_;
1754        my $untracked = $self->get_untracked($ed);
1755
1756        my @parents = @$parents;
1757        my $ps = $ed->{path_strip} || "";
1758        for my $path ( grep { m/$ps/ } %{$ed->{dir_prop}} ) {
1759                my $props = $ed->{dir_prop}{$path};
1760                if ( $props->{"svk:merge"} ) {
1761                        $self->find_extra_svk_parents
1762                                ($ed, $props->{"svk:merge"}, \@parents);
1763                }
1764                if ( $props->{"svn:mergeinfo"} ) {
1765                        $self->find_extra_svn_parents
1766                                ($ed,
1767                                 $props->{"svn:mergeinfo"},
1768                                 \@parents);
1769                }
1770        }
1771
1772        open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
1773        print $un "r$rev\n" or croak $!;
1774        print $un $_, "\n" foreach @$untracked;
1775        my %log_entry = ( parents => \@parents, revision => $rev,
1776                          log => '');
1777
1778        my $headrev;
1779        my $logged = delete $self->{logged_rev_props};
1780        if (!$logged || $self->{-want_revprops}) {
1781                my $rp = $self->ra->rev_proplist($rev);
1782                foreach (sort keys %$rp) {
1783                        my $v = $rp->{$_};
1784                        if (/^svn:(author|date|log)$/) {
1785                                $log_entry{$1} = $v;
1786                        } elsif ($_ eq 'svm:headrev') {
1787                                $headrev = $v;
1788                        } else {
1789                                print $un "  rev_prop: ", uri_encode($_), ' ',
1790                                          uri_encode($v), "\n";
1791                        }
1792                }
1793        } else {
1794                map { $log_entry{$_} = $logged->{$_} } keys %$logged;
1795        }
1796        close $un or croak $!;
1797
1798        $log_entry{date} = parse_svn_date($log_entry{date});
1799        $log_entry{log} .= "\n";
1800        my $author = $log_entry{author} = check_author($log_entry{author});
1801        my ($name, $email) = defined $::users{$author} ? @{$::users{$author}}
1802                                                       : ($author, undef);
1803
1804        my ($commit_name, $commit_email) = ($name, $email);
1805        if ($_use_log_author) {
1806                my $name_field;
1807                if ($log_entry{log} =~ /From:\s+(.*\S)\s*\n/i) {
1808                        $name_field = $1;
1809                } elsif ($log_entry{log} =~ /Signed-off-by:\s+(.*\S)\s*\n/i) {
1810                        $name_field = $1;
1811                }
1812                if (!defined $name_field) {
1813                        if (!defined $email) {
1814                                $email = $name;
1815                        }
1816                } elsif ($name_field =~ /(.*?)\s+<(.*)>/) {
1817                        ($name, $email) = ($1, $2);
1818                } elsif ($name_field =~ /(.*)@/) {
1819                        ($name, $email) = ($1, $name_field);
1820                } else {
1821                        ($name, $email) = ($name_field, $name_field);
1822                }
1823        }
1824        if (defined $headrev && $self->use_svm_props) {
1825                if ($self->rewrite_root) {
1826                        die "Can't have both 'useSvmProps' and 'rewriteRoot' ",
1827                            "options set!\n";
1828                }
1829                if ($self->rewrite_uuid) {
1830                        die "Can't have both 'useSvmProps' and 'rewriteUUID' ",
1831                            "options set!\n";
1832                }
1833                my ($uuid, $r) = $headrev =~ m{^([a-f\d\-]{30,}):(\d+)$}i;
1834                # we don't want "SVM: initializing mirror for junk" ...
1835                return undef if $r == 0;
1836                my $svm = $self->svm;
1837                if ($uuid ne $svm->{uuid}) {
1838                        die "UUID mismatch on SVM path:\n",
1839                            "expected: $svm->{uuid}\n",
1840                            "     got: $uuid\n";
1841                }
1842                my $full_url = $self->full_url;
1843                $full_url =~ s#^\Q$svm->{replace}\E(/|$)#$svm->{source}$1# or
1844                             die "Failed to replace '$svm->{replace}' with ",
1845                                 "'$svm->{source}' in $full_url\n";
1846                # throw away username for storing in records
1847                remove_username($full_url);
1848                $log_entry{metadata} = "$full_url\@$r $uuid";
1849                $log_entry{svm_revision} = $r;
1850                $email ||= "$author\@$uuid";
1851                $commit_email ||= "$author\@$uuid";
1852        } elsif ($self->use_svnsync_props) {
1853                my $full_url = $self->svnsync->{url};
1854                $full_url .= "/$self->{path}" if length $self->{path};
1855                remove_username($full_url);
1856                my $uuid = $self->svnsync->{uuid};
1857                $log_entry{metadata} = "$full_url\@$rev $uuid";
1858                $email ||= "$author\@$uuid";
1859                $commit_email ||= "$author\@$uuid";
1860        } else {
1861                my $url = $self->metadata_url;
1862                remove_username($url);
1863                my $uuid = $self->rewrite_uuid || $self->ra->get_uuid;
1864                $log_entry{metadata} = "$url\@$rev " . $uuid;
1865                $email ||= "$author\@" . $uuid;
1866                $commit_email ||= "$author\@" . $uuid;
1867        }
1868        $log_entry{name} = $name;
1869        $log_entry{email} = $email;
1870        $log_entry{commit_name} = $commit_name;
1871        $log_entry{commit_email} = $commit_email;
1872        \%log_entry;
1873}
1874
1875sub fetch {
1876        my ($self, $min_rev, $max_rev, @parents) = @_;
1877        my ($last_rev, $last_commit) = $self->last_rev_commit;
1878        my ($base, $head) = $self->get_fetch_range($min_rev, $max_rev);
1879        $self->ra->gs_fetch_loop_common($base, $head, [$self]);
1880}
1881
1882sub set_tree_cb {
1883        my ($self, $log_entry, $tree, $rev, $date, $author) = @_;
1884        $self->{inject_parents} = { $rev => $tree };
1885        $self->fetch(undef, undef);
1886}
1887
1888sub set_tree {
1889        my ($self, $tree) = (shift, shift);
1890        my $log_entry = ::get_commit_entry($tree);
1891        unless ($self->{last_rev}) {
1892                fatal("Must have an existing revision to commit");
1893        }
1894        my %ed_opts = ( r => $self->{last_rev},
1895                        log => $log_entry->{log},
1896                        ra => $self->ra,
1897                        tree_a => $self->{last_commit},
1898                        tree_b => $tree,
1899                        editor_cb => sub {
1900                               $self->set_tree_cb($log_entry, $tree, @_) },
1901                        svn_path => $self->{path} );
1902        if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) {
1903                print "No changes\nr$self->{last_rev} = $tree\n";
1904        }
1905}
1906
1907sub rebuild_from_rev_db {
1908        my ($self, $path) = @_;
1909        my $r = -1;
1910        open my $fh, '<', $path or croak "open: $!";
1911        binmode $fh or croak "binmode: $!";
1912        while (<$fh>) {
1913                length($_) == 41 or croak "inconsistent size in ($_) != 41";
1914                chomp($_);
1915                ++$r;
1916                next if $_ eq ('0' x 40);
1917                $self->rev_map_set($r, $_);
1918                print "r$r = $_\n";
1919        }
1920        close $fh or croak "close: $!";
1921        unlink $path or croak "unlink: $!";
1922}
1923
1924sub rebuild {
1925        my ($self) = @_;
1926        my $map_path = $self->map_path;
1927        my $partial = (-e $map_path && ! -z $map_path);
1928        return unless ::verify_ref($self->refname.'^0');
1929        if (!$partial && ($self->use_svm_props || $self->no_metadata)) {
1930                my $rev_db = $self->rev_db_path;
1931                $self->rebuild_from_rev_db($rev_db);
1932                if ($self->use_svm_props) {
1933                        my $svm_rev_db = $self->rev_db_path($self->svm_uuid);
1934                        $self->rebuild_from_rev_db($svm_rev_db);
1935                }
1936                $self->unlink_rev_db_symlink;
1937                return;
1938        }
1939        print "Rebuilding $map_path ...\n" if (!$partial);
1940        my ($base_rev, $head) = ($partial ? $self->rev_map_max_norebuild(1) :
1941                (undef, undef));
1942        my ($log, $ctx) =
1943            command_output_pipe(qw/rev-list --pretty=raw --reverse/,
1944                                ($head ? "$head.." : "") . $self->refname,
1945                                '--');
1946        my $metadata_url = $self->metadata_url;
1947        remove_username($metadata_url);
1948        my $svn_uuid = $self->rewrite_uuid || $self->ra_uuid;
1949        my $c;
1950        while (<$log>) {
1951                if ( m{^commit ($::sha1)$} ) {
1952                        $c = $1;
1953                        next;
1954                }
1955                next unless s{^\s*(git-svn-id:)}{$1};
1956                my ($url, $rev, $uuid) = ::extract_metadata($_);
1957                remove_username($url);
1958
1959                # ignore merges (from set-tree)
1960                next if (!defined $rev || !$uuid);
1961
1962                # if we merged or otherwise started elsewhere, this is
1963                # how we break out of it
1964                if (($uuid ne $svn_uuid) ||
1965                    ($metadata_url && $url && ($url ne $metadata_url))) {
1966                        next;
1967                }
1968                if ($partial && $head) {
1969                        print "Partial-rebuilding $map_path ...\n";
1970                        print "Currently at $base_rev = $head\n";
1971                        $head = undef;
1972                }
1973
1974                $self->rev_map_set($rev, $c);
1975                print "r$rev = $c\n";
1976        }
1977        command_close_pipe($log, $ctx);
1978        print "Done rebuilding $map_path\n" if (!$partial || !$head);
1979        my $rev_db_path = $self->rev_db_path;
1980        if (-f $self->rev_db_path) {
1981                unlink $self->rev_db_path or croak "unlink: $!";
1982        }
1983        $self->unlink_rev_db_symlink;
1984}
1985
1986# rev_map:
1987# Tie::File seems to be prone to offset errors if revisions get sparse,
1988# it's not that fast, either.  Tie::File is also not in Perl 5.6.  So
1989# one of my favorite modules is out :<  Next up would be one of the DBM
1990# modules, but I'm not sure which is most portable...
1991#
1992# This is the replacement for the rev_db format, which was too big
1993# and inefficient for large repositories with a lot of sparse history
1994# (mainly tags)
1995#
1996# The format is this:
1997#   - 24 bytes for every record,
1998#     * 4 bytes for the integer representing an SVN revision number
1999#     * 20 bytes representing the sha1 of a git commit
2000#   - No empty padding records like the old format
2001#     (except the last record, which can be overwritten)
2002#   - new records are written append-only since SVN revision numbers
2003#     increase monotonically
2004#   - lookups on SVN revision number are done via a binary search
2005#   - Piping the file to xxd -c24 is a good way of dumping it for
2006#     viewing or editing (piped back through xxd -r), should the need
2007#     ever arise.
2008#   - The last record can be padding revision with an all-zero sha1
2009#     This is used to optimize fetch performance when using multiple
2010#     "fetch" directives in .git/config
2011#
2012# These files are disposable unless noMetadata or useSvmProps is set
2013
2014sub _rev_map_set {
2015        my ($fh, $rev, $commit) = @_;
2016
2017        binmode $fh or croak "binmode: $!";
2018        my $size = (stat($fh))[7];
2019        ($size % 24) == 0 or croak "inconsistent size: $size";
2020
2021        my $wr_offset = 0;
2022        if ($size > 0) {
2023                sysseek($fh, -24, SEEK_END) or croak "seek: $!";
2024                my $read = sysread($fh, my $buf, 24) or croak "read: $!";
2025                $read == 24 or croak "read only $read bytes (!= 24)";
2026                my ($last_rev, $last_commit) = unpack(rev_map_fmt, $buf);
2027                if ($last_commit eq ('0' x40)) {
2028                        if ($size >= 48) {
2029                                sysseek($fh, -48, SEEK_END) or croak "seek: $!";
2030                                $read = sysread($fh, $buf, 24) or
2031                                    croak "read: $!";
2032                                $read == 24 or
2033                                    croak "read only $read bytes (!= 24)";
2034                                ($last_rev, $last_commit) =
2035                                    unpack(rev_map_fmt, $buf);
2036                                if ($last_commit eq ('0' x40)) {
2037                                        croak "inconsistent .rev_map\n";
2038                                }
2039                        }
2040                        if ($last_rev >= $rev) {
2041                                croak "last_rev is higher!: $last_rev >= $rev";
2042                        }
2043                        $wr_offset = -24;
2044                }
2045        }
2046        sysseek($fh, $wr_offset, SEEK_END) or croak "seek: $!";
2047        syswrite($fh, pack(rev_map_fmt, $rev, $commit), 24) == 24 or
2048          croak "write: $!";
2049}
2050
2051sub _rev_map_reset {
2052        my ($fh, $rev, $commit) = @_;
2053        my $c = _rev_map_get($fh, $rev);
2054        $c eq $commit or die "_rev_map_reset(@_) commit $c does not match!\n";
2055        my $offset = sysseek($fh, 0, SEEK_CUR) or croak "seek: $!";
2056        truncate $fh, $offset or croak "truncate: $!";
2057}
2058
2059sub mkfile {
2060        my ($path) = @_;
2061        unless (-e $path) {
2062                my ($dir, $base) = ($path =~ m#^(.*?)/?([^/]+)$#);
2063                mkpath([$dir]) unless -d $dir;
2064                open my $fh, '>>', $path or die "Couldn't create $path: $!\n";
2065                close $fh or die "Couldn't close (create) $path: $!\n";
2066        }
2067}
2068
2069sub rev_map_set {
2070        my ($self, $rev, $commit, $update_ref, $uuid) = @_;
2071        defined $commit or die "missing arg3\n";
2072        length $commit == 40 or die "arg3 must be a full SHA1 hexsum\n";
2073        my $db = $self->map_path($uuid);
2074        my $db_lock = "$db.lock";
2075        my $sigmask;
2076        $update_ref ||= 0;
2077        if ($update_ref) {
2078                $sigmask = POSIX::SigSet->new();
2079                my $signew = POSIX::SigSet->new(SIGINT, SIGHUP, SIGTERM,
2080                        SIGALRM, SIGUSR1, SIGUSR2);
2081                sigprocmask(SIG_BLOCK, $signew, $sigmask) or
2082                        croak "Can't block signals: $!";
2083        }
2084        mkfile($db);
2085
2086        $LOCKFILES{$db_lock} = 1;
2087        my $sync;
2088        # both of these options make our .rev_db file very, very important
2089        # and we can't afford to lose it because rebuild() won't work
2090        if ($self->use_svm_props || $self->no_metadata) {
2091                $sync = 1;
2092                copy($db, $db_lock) or die "rev_map_set(@_): ",
2093                                           "Failed to copy: ",
2094                                           "$db => $db_lock ($!)\n";
2095        } else {
2096                rename $db, $db_lock or die "rev_map_set(@_): ",
2097                                            "Failed to rename: ",
2098                                            "$db => $db_lock ($!)\n";
2099        }
2100
2101        sysopen(my $fh, $db_lock, O_RDWR | O_CREAT)
2102             or croak "Couldn't open $db_lock: $!\n";
2103        $update_ref eq 'reset' ? _rev_map_reset($fh, $rev, $commit) :
2104                                 _rev_map_set($fh, $rev, $commit);
2105        if ($sync) {
2106                $fh->flush or die "Couldn't flush $db_lock: $!\n";
2107                $fh->sync or die "Couldn't sync $db_lock: $!\n";
2108        }
2109        close $fh or croak $!;
2110        if ($update_ref) {
2111                $_head = $self;
2112                my $note = "";
2113                $note = " ($update_ref)" if ($update_ref !~ /^\d*$/);
2114                command_noisy('update-ref', '-m', "r$rev$note",
2115                              $self->refname, $commit);
2116        }
2117        rename $db_lock, $db or die "rev_map_set(@_): ", "Failed to rename: ",
2118                                    "$db_lock => $db ($!)\n";
2119        delete $LOCKFILES{$db_lock};
2120        if ($update_ref) {
2121                sigprocmask(SIG_SETMASK, $sigmask) or
2122                        croak "Can't restore signal mask: $!";
2123        }
2124}
2125
2126# If want_commit, this will return an array of (rev, commit) where
2127# commit _must_ be a valid commit in the archive.
2128# Otherwise, it'll return the max revision (whether or not the
2129# commit is valid or just a 0x40 placeholder).
2130sub rev_map_max {
2131        my ($self, $want_commit) = @_;
2132        $self->rebuild;
2133        my ($r, $c) = $self->rev_map_max_norebuild($want_commit);
2134        $want_commit ? ($r, $c) : $r;
2135}
2136
2137sub rev_map_max_norebuild {
2138        my ($self, $want_commit) = @_;
2139        my $map_path = $self->map_path;
2140        stat $map_path or return $want_commit ? (0, undef) : 0;
2141        sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2142        binmode $fh or croak "binmode: $!";
2143        my $size = (stat($fh))[7];
2144        ($size % 24) == 0 or croak "inconsistent size: $size";
2145
2146        if ($size == 0) {
2147                close $fh or croak "close: $!";
2148                return $want_commit ? (0, undef) : 0;
2149        }
2150
2151        sysseek($fh, -24, SEEK_END) or croak "seek: $!";
2152        sysread($fh, my $buf, 24) == 24 or croak "read: $!";
2153        my ($r, $c) = unpack(rev_map_fmt, $buf);
2154        if ($want_commit && $c eq ('0' x40)) {
2155                if ($size < 48) {
2156                        return $want_commit ? (0, undef) : 0;
2157                }
2158                sysseek($fh, -48, SEEK_END) or croak "seek: $!";
2159                sysread($fh, $buf, 24) == 24 or croak "read: $!";
2160                ($r, $c) = unpack(rev_map_fmt, $buf);
2161                if ($c eq ('0'x40)) {
2162                        croak "Penultimate record is all-zeroes in $map_path";
2163                }
2164        }
2165        close $fh or croak "close: $!";
2166        $want_commit ? ($r, $c) : $r;
2167}
2168
2169sub rev_map_get {
2170        my ($self, $rev, $uuid) = @_;
2171        my $map_path = $self->map_path($uuid);
2172        return undef unless -e $map_path;
2173
2174        sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2175        my $c = _rev_map_get($fh, $rev);
2176        close($fh) or croak "close: $!";
2177        $c
2178}
2179
2180sub _rev_map_get {
2181        my ($fh, $rev) = @_;
2182
2183        binmode $fh or croak "binmode: $!";
2184        my $size = (stat($fh))[7];
2185        ($size % 24) == 0 or croak "inconsistent size: $size";
2186
2187        if ($size == 0) {
2188                return undef;
2189        }
2190
2191        my ($l, $u) = (0, $size - 24);
2192        my ($r, $c, $buf);
2193
2194        while ($l <= $u) {
2195                my $i = int(($l/24 + $u/24) / 2) * 24;
2196                sysseek($fh, $i, SEEK_SET) or croak "seek: $!";
2197                sysread($fh, my $buf, 24) == 24 or croak "read: $!";
2198                my ($r, $c) = unpack(rev_map_fmt, $buf);
2199
2200                if ($r < $rev) {
2201                        $l = $i + 24;
2202                } elsif ($r > $rev) {
2203                        $u = $i - 24;
2204                } else { # $r == $rev
2205                        return $c eq ('0' x 40) ? undef : $c;
2206                }
2207        }
2208        undef;
2209}
2210
2211# Finds the first svn revision that exists on (if $eq_ok is true) or
2212# before $rev for the current branch.  It will not search any lower
2213# than $min_rev.  Returns the git commit hash and svn revision number
2214# if found, else (undef, undef).
2215sub find_rev_before {
2216        my ($self, $rev, $eq_ok, $min_rev) = @_;
2217        --$rev unless $eq_ok;
2218        $min_rev ||= 1;
2219        my $max_rev = $self->rev_map_max;
2220        $rev = $max_rev if ($rev > $max_rev);
2221        while ($rev >= $min_rev) {
2222                if (my $c = $self->rev_map_get($rev)) {
2223                        return ($rev, $c);
2224                }
2225                --$rev;
2226        }
2227        return (undef, undef);
2228}
2229
2230# Finds the first svn revision that exists on (if $eq_ok is true) or
2231# after $rev for the current branch.  It will not search any higher
2232# than $max_rev.  Returns the git commit hash and svn revision number
2233# if found, else (undef, undef).
2234sub find_rev_after {
2235        my ($self, $rev, $eq_ok, $max_rev) = @_;
2236        ++$rev unless $eq_ok;
2237        $max_rev ||= $self->rev_map_max;
2238        while ($rev <= $max_rev) {
2239                if (my $c = $self->rev_map_get($rev)) {
2240                        return ($rev, $c);
2241                }
2242                ++$rev;
2243        }
2244        return (undef, undef);
2245}
2246
2247sub _new {
2248        my ($class, $repo_id, $ref_id, $path) = @_;
2249        unless (defined $repo_id && length $repo_id) {
2250                $repo_id = $default_repo_id;
2251        }
2252        unless (defined $ref_id && length $ref_id) {
2253                # Access the prefix option from the git-svn main program if it's loaded.
2254                my $prefix = defined &::opt_prefix ? ::opt_prefix() : "";
2255                $_[2] = $ref_id =
2256                             "refs/remotes/$prefix$default_ref_id";
2257        }
2258        $_[1] = $repo_id;
2259        my $dir = "$ENV{GIT_DIR}/svn/$ref_id";
2260
2261        # Older repos imported by us used $GIT_DIR/svn/foo instead of
2262        # $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo
2263        if ($ref_id =~ m{^refs/remotes/(.*)}) {
2264                my $old_dir = "$ENV{GIT_DIR}/svn/$1";
2265                if (-d $old_dir && ! -d $dir) {
2266                        $dir = $old_dir;
2267                }
2268        }
2269
2270        $_[3] = $path = '' unless (defined $path);
2271        mkpath([$dir]);
2272        bless {
2273                ref_id => $ref_id, dir => $dir, index => "$dir/index",
2274                path => $path, config => "$ENV{GIT_DIR}/svn/config",
2275                map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
2276}
2277
2278# for read-only access of old .rev_db formats
2279sub unlink_rev_db_symlink {
2280        my ($self) = @_;
2281        my $link = $self->rev_db_path;
2282        $link =~ s/\.[\w-]+$// or croak "missing UUID at the end of $link";
2283        if (-l $link) {
2284                unlink $link or croak "unlink: $link failed!";
2285        }
2286}
2287
2288sub rev_db_path {
2289        my ($self, $uuid) = @_;
2290        my $db_path = $self->map_path($uuid);
2291        $db_path =~ s{/\.rev_map\.}{/\.rev_db\.}
2292            or croak "map_path: $db_path does not contain '/.rev_map.' !";
2293        $db_path;
2294}
2295
2296# the new replacement for .rev_db
2297sub map_path {
2298        my ($self, $uuid) = @_;
2299        $uuid ||= $self->ra_uuid;
2300        "$self->{map_root}.$uuid";
2301}
2302
2303sub uri_encode {
2304        my ($f) = @_;
2305        $f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#uc sprintf("%%%02x",ord($1))#eg;
2306        $f
2307}
2308
2309sub uri_decode {
2310        my ($f) = @_;
2311        $f =~ s#%([0-9a-fA-F]{2})#chr(hex($1))#eg;
2312        $f
2313}
2314
2315sub remove_username {
2316        $_[0] =~ s{^([^:]*://)[^@]+@}{$1};
2317}
2318
23191;