perl / Git / SVN.pmon commit fsck: handle multiple authors in commits specially (c9ad147)
   1package Git::SVN;
   2use strict;
   3use warnings;
   4use Fcntl qw/:DEFAULT :seek/;
   5use constant rev_map_fmt => 'NH40';
   6use vars qw/$_no_metadata
   7            $_repack $_repack_flags $_use_svm_props $_head
   8            $_use_svnsync_props $no_reuse_existing
   9            $_use_log_author $_add_author_from $_localtime/;
  10use Carp qw/croak/;
  11use File::Path qw/mkpath/;
  12use IPC::Open3;
  13use Memoize;  # core since 5.8.0, Jul 2002
  14use POSIX qw(:signal_h);
  15use Time::Local;
  16
  17use Git qw(
  18    command
  19    command_oneline
  20    command_noisy
  21    command_output_pipe
  22    command_close_pipe
  23    get_tz_offset
  24);
  25use Git::SVN::Utils qw(
  26        fatal
  27        can_compress
  28        join_paths
  29        canonicalize_path
  30        canonicalize_url
  31        add_path_to_url
  32);
  33
  34my $memo_backend;
  35our $_follow_parent  = 1;
  36our $_minimize_url   = 'unset';
  37our $default_repo_id = 'svn';
  38our $default_ref_id  = $ENV{GIT_SVN_ID} || 'git-svn';
  39
  40my ($_gc_nr, $_gc_period);
  41
  42# properties that we do not log:
  43my %SKIP_PROP;
  44BEGIN {
  45        %SKIP_PROP = map { $_ => 1 } qw/svn:wc:ra_dav:version-url
  46                                        svn:special svn:executable
  47                                        svn:entry:committed-rev
  48                                        svn:entry:last-author
  49                                        svn:entry:uuid
  50                                        svn:entry:committed-date/;
  51
  52        # some options are read globally, but can be overridden locally
  53        # per [svn-remote "..."] section.  Command-line options will *NOT*
  54        # override options set in an [svn-remote "..."] section
  55        no strict 'refs';
  56        for my $option (qw/follow_parent no_metadata use_svm_props
  57                           use_svnsync_props/) {
  58                my $key = $option;
  59                $key =~ tr/_//d;
  60                my $prop = "-$option";
  61                *$option = sub {
  62                        my ($self) = @_;
  63                        return $self->{$prop} if exists $self->{$prop};
  64                        my $k = "svn-remote.$self->{repo_id}.$key";
  65                        eval { command_oneline(qw/config --get/, $k) };
  66                        if ($@) {
  67                                $self->{$prop} = ${"Git::SVN::_$option"};
  68                        } else {
  69                                my $v = command_oneline(qw/config --bool/,$k);
  70                                $self->{$prop} = $v eq 'false' ? 0 : 1;
  71                        }
  72                        return $self->{$prop};
  73                }
  74        }
  75}
  76
  77
  78my (%LOCKFILES, %INDEX_FILES);
  79END {
  80        unlink keys %LOCKFILES if %LOCKFILES;
  81        unlink keys %INDEX_FILES if %INDEX_FILES;
  82}
  83
  84sub resolve_local_globs {
  85        my ($url, $fetch, $glob_spec) = @_;
  86        return unless defined $glob_spec;
  87        my $ref = $glob_spec->{ref};
  88        my $path = $glob_spec->{path};
  89        foreach (command(qw#for-each-ref --format=%(refname) refs/#)) {
  90                next unless m#^$ref->{regex}$#;
  91                my $p = $1;
  92                my $pathname = desanitize_refname($path->full_path($p));
  93                my $refname = desanitize_refname($ref->full_path($p));
  94                if (my $existing = $fetch->{$pathname}) {
  95                        if ($existing ne $refname) {
  96                                die "Refspec conflict:\n",
  97                                    "existing: $existing\n",
  98                                    " globbed: $refname\n";
  99                        }
 100                        my $u = (::cmt_metadata("$refname"))[0];
 101                        $u =~ s!^\Q$url\E(/|$)!! or die
 102                          "$refname: '$url' not found in '$u'\n";
 103                        if ($pathname ne $u) {
 104                                warn "W: Refspec glob conflict ",
 105                                     "(ref: $refname):\n",
 106                                     "expected path: $pathname\n",
 107                                     "    real path: $u\n",
 108                                     "Continuing ahead with $u\n";
 109                                next;
 110                        }
 111                } else {
 112                        $fetch->{$pathname} = $refname;
 113                }
 114        }
 115}
 116
 117sub parse_revision_argument {
 118        my ($base, $head) = @_;
 119        if (!defined $::_revision || $::_revision eq 'BASE:HEAD') {
 120                return ($base, $head);
 121        }
 122        return ($1, $2) if ($::_revision =~ /^(\d+):(\d+)$/);
 123        return ($::_revision, $::_revision) if ($::_revision =~ /^\d+$/);
 124        return ($head, $head) if ($::_revision eq 'HEAD');
 125        return ($base, $1) if ($::_revision =~ /^BASE:(\d+)$/);
 126        return ($1, $head) if ($::_revision =~ /^(\d+):HEAD$/);
 127        die "revision argument: $::_revision not understood by git-svn\n";
 128}
 129
 130sub fetch_all {
 131        my ($repo_id, $remotes) = @_;
 132        if (ref $repo_id) {
 133                my $gs = $repo_id;
 134                $repo_id = undef;
 135                $repo_id = $gs->{repo_id};
 136        }
 137        $remotes ||= read_all_remotes();
 138        my $remote = $remotes->{$repo_id} or
 139                     die "[svn-remote \"$repo_id\"] unknown\n";
 140        my $fetch = $remote->{fetch};
 141        my $url = $remote->{url} or die "svn-remote.$repo_id.url not defined\n";
 142        my (@gs, @globs);
 143        my $ra = Git::SVN::Ra->new($url);
 144        my $uuid = $ra->get_uuid;
 145        my $head = $ra->get_latest_revnum;
 146
 147        # ignore errors, $head revision may not even exist anymore
 148        eval { $ra->get_log("", $head, 0, 1, 0, 1, sub { $head = $_[1] }) };
 149        warn "W: $@\n" if $@;
 150
 151        my $base = defined $fetch ? $head : 0;
 152
 153        # read the max revs for wildcard expansion (branches/*, tags/*)
 154        foreach my $t (qw/branches tags/) {
 155                defined $remote->{$t} or next;
 156                push @globs, @{$remote->{$t}};
 157
 158                my $max_rev = eval { tmp_config(qw/--int --get/,
 159                                         "svn-remote.$repo_id.${t}-maxRev") };
 160                if (defined $max_rev && ($max_rev < $base)) {
 161                        $base = $max_rev;
 162                } elsif (!defined $max_rev) {
 163                        $base = 0;
 164                }
 165        }
 166
 167        if ($fetch) {
 168                foreach my $p (sort keys %$fetch) {
 169                        my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p);
 170                        my $lr = $gs->rev_map_max;
 171                        if (defined $lr) {
 172                                $base = $lr if ($lr < $base);
 173                        }
 174                        push @gs, $gs;
 175                }
 176        }
 177
 178        ($base, $head) = parse_revision_argument($base, $head);
 179        $ra->gs_fetch_loop_common($base, $head, \@gs, \@globs);
 180}
 181
 182sub read_all_remotes {
 183        my $r = {};
 184        my $use_svm_props = eval { command_oneline(qw/config --bool
 185            svn.useSvmProps/) };
 186        $use_svm_props = $use_svm_props eq 'true' if $use_svm_props;
 187        my $svn_refspec = qr{\s*(.*?)\s*:\s*(.+?)\s*};
 188        foreach (grep { s/^svn-remote\.// } command(qw/config -l/)) {
 189                if (m!^(.+)\.fetch=$svn_refspec$!) {
 190                        my ($remote, $local_ref, $remote_ref) = ($1, $2, $3);
 191                        die("svn-remote.$remote: remote ref '$remote_ref' "
 192                            . "must start with 'refs/'\n")
 193                                unless $remote_ref =~ m{^refs/};
 194                        $local_ref = uri_decode($local_ref);
 195                        $r->{$remote}->{fetch}->{$local_ref} = $remote_ref;
 196                        $r->{$remote}->{svm} = {} if $use_svm_props;
 197                } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) {
 198                        $r->{$1}->{svm} = {};
 199                } elsif (m!^(.+)\.url=\s*(.*)\s*$!) {
 200                        $r->{$1}->{url} = canonicalize_url($2);
 201                } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) {
 202                        $r->{$1}->{pushurl} = canonicalize_url($2);
 203                } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) {
 204                        $r->{$1}->{ignore_refs_regex} = $2;
 205                } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) {
 206                        my ($remote, $t, $local_ref, $remote_ref) =
 207                                                             ($1, $2, $3, $4);
 208                        die("svn-remote.$remote: remote ref '$remote_ref' ($t) "
 209                            . "must start with 'refs/'\n")
 210                                unless $remote_ref =~ m{^refs/};
 211                        $local_ref = uri_decode($local_ref);
 212
 213                        require Git::SVN::GlobSpec;
 214                        my $rs = {
 215                            t => $t,
 216                            remote => $remote,
 217                            path => Git::SVN::GlobSpec->new($local_ref, 1),
 218                            ref => Git::SVN::GlobSpec->new($remote_ref, 0) };
 219                        if (length($rs->{ref}->{right}) != 0) {
 220                                die "The '*' glob character must be the last ",
 221                                    "character of '$remote_ref'\n";
 222                        }
 223                        push @{ $r->{$remote}->{$t} }, $rs;
 224                }
 225        }
 226
 227        map {
 228                if (defined $r->{$_}->{svm}) {
 229                        my $svm;
 230                        eval {
 231                                my $section = "svn-remote.$_";
 232                                $svm = {
 233                                        source => tmp_config('--get',
 234                                            "$section.svm-source"),
 235                                        replace => tmp_config('--get',
 236                                            "$section.svm-replace"),
 237                                }
 238                        };
 239                        $r->{$_}->{svm} = $svm;
 240                }
 241        } keys %$r;
 242
 243        foreach my $remote (keys %$r) {
 244                foreach ( grep { defined $_ }
 245                          map { $r->{$remote}->{$_} } qw(branches tags) ) {
 246                        foreach my $rs ( @$_ ) {
 247                                $rs->{ignore_refs_regex} =
 248                                    $r->{$remote}->{ignore_refs_regex};
 249                        }
 250                }
 251        }
 252
 253        $r;
 254}
 255
 256sub init_vars {
 257        $_gc_nr = $_gc_period = 1000;
 258        if (defined $_repack || defined $_repack_flags) {
 259               warn "Repack options are obsolete; they have no effect.\n";
 260        }
 261}
 262
 263sub verify_remotes_sanity {
 264        return unless -d $ENV{GIT_DIR};
 265        my %seen;
 266        foreach (command(qw/config -l/)) {
 267                if (m!^svn-remote\.(?:.+)\.fetch=.*:refs/remotes/(\S+)\s*$!) {
 268                        if ($seen{$1}) {
 269                                die "Remote ref refs/remote/$1 is tracked by",
 270                                    "\n  \"$_\"\nand\n  \"$seen{$1}\"\n",
 271                                    "Please resolve this ambiguity in ",
 272                                    "your git configuration file before ",
 273                                    "continuing\n";
 274                        }
 275                        $seen{$1} = $_;
 276                }
 277        }
 278}
 279
 280sub find_existing_remote {
 281        my ($url, $remotes) = @_;
 282        return undef if $no_reuse_existing;
 283        my $existing;
 284        foreach my $repo_id (keys %$remotes) {
 285                my $u = $remotes->{$repo_id}->{url} or next;
 286                next if $u ne $url;
 287                $existing = $repo_id;
 288                last;
 289        }
 290        $existing;
 291}
 292
 293sub init_remote_config {
 294        my ($self, $url, $no_write) = @_;
 295        $url = canonicalize_url($url);
 296        my $r = read_all_remotes();
 297        my $existing = find_existing_remote($url, $r);
 298        if ($existing) {
 299                unless ($no_write) {
 300                        print STDERR "Using existing ",
 301                                     "[svn-remote \"$existing\"]\n";
 302                }
 303                $self->{repo_id} = $existing;
 304        } elsif ($_minimize_url) {
 305                my $min_url = Git::SVN::Ra->new($url)->minimize_url;
 306                $existing = find_existing_remote($min_url, $r);
 307                if ($existing) {
 308                        unless ($no_write) {
 309                                print STDERR "Using existing ",
 310                                             "[svn-remote \"$existing\"]\n";
 311                        }
 312                        $self->{repo_id} = $existing;
 313                }
 314                if ($min_url ne $url) {
 315                        unless ($no_write) {
 316                                print STDERR "Using higher level of URL: ",
 317                                             "$url => $min_url\n";
 318                        }
 319                        my $old_path = $self->path;
 320                        $url =~ s!^\Q$min_url\E(/|$)!!;
 321                        $url = join_paths($url, $old_path);
 322                        $self->path($url);
 323                        $url = $min_url;
 324                }
 325        }
 326        my $orig_url;
 327        if (!$existing) {
 328                # verify that we aren't overwriting anything:
 329                $orig_url = eval {
 330                        command_oneline('config', '--get',
 331                                        "svn-remote.$self->{repo_id}.url")
 332                };
 333                if ($orig_url && ($orig_url ne $url)) {
 334                        die "svn-remote.$self->{repo_id}.url already set: ",
 335                            "$orig_url\nwanted to set to: $url\n";
 336                }
 337        }
 338        my ($xrepo_id, $xpath) = find_ref($self->refname);
 339        if (!$no_write && defined $xpath) {
 340                die "svn-remote.$xrepo_id.fetch already set to track ",
 341                    "$xpath:", $self->refname, "\n";
 342        }
 343        unless ($no_write) {
 344                command_noisy('config',
 345                              "svn-remote.$self->{repo_id}.url", $url);
 346                my $path = $self->path;
 347                $path =~ s{^/}{};
 348                $path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg;
 349                $self->path($path);
 350                command_noisy('config', '--add',
 351                              "svn-remote.$self->{repo_id}.fetch",
 352                              $self->path.":".$self->refname);
 353        }
 354        $self->url($url);
 355}
 356
 357sub find_by_url { # repos_root and, path are optional
 358        my ($class, $full_url, $repos_root, $path) = @_;
 359
 360        $full_url = canonicalize_url($full_url);
 361
 362        return undef unless defined $full_url;
 363        remove_username($full_url);
 364        remove_username($repos_root) if defined $repos_root;
 365        my $remotes = read_all_remotes();
 366        if (defined $full_url && defined $repos_root && !defined $path) {
 367                $path = $full_url;
 368                $path =~ s#^\Q$repos_root\E(?:/|$)##;
 369        }
 370        foreach my $repo_id (keys %$remotes) {
 371                my $u = $remotes->{$repo_id}->{url} or next;
 372                remove_username($u);
 373                next if defined $repos_root && $repos_root ne $u;
 374
 375                my $fetch = $remotes->{$repo_id}->{fetch} || {};
 376                foreach my $t (qw/branches tags/) {
 377                        foreach my $globspec (@{$remotes->{$repo_id}->{$t}}) {
 378                                resolve_local_globs($u, $fetch, $globspec);
 379                        }
 380                }
 381                my $p = $path;
 382                my $rwr = rewrite_root({repo_id => $repo_id});
 383                my $svm = $remotes->{$repo_id}->{svm}
 384                        if defined $remotes->{$repo_id}->{svm};
 385                unless (defined $p) {
 386                        $p = $full_url;
 387                        my $z = $u;
 388                        my $prefix = '';
 389                        if ($rwr) {
 390                                $z = $rwr;
 391                                remove_username($z);
 392                        } elsif (defined $svm) {
 393                                $z = $svm->{source};
 394                                $prefix = $svm->{replace};
 395                                $prefix =~ s#^\Q$u\E(?:/|$)##;
 396                                $prefix =~ s#/$##;
 397                        }
 398                        $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next;
 399                }
 400
 401                # remote fetch paths are not URI escaped.  Decode ours
 402                # so they match
 403                $p = uri_decode($p);
 404
 405                foreach my $f (keys %$fetch) {
 406                        next if $f ne $p;
 407                        return Git::SVN->new($fetch->{$f}, $repo_id, $f);
 408                }
 409        }
 410        undef;
 411}
 412
 413sub init {
 414        my ($class, $url, $path, $repo_id, $ref_id, $no_write) = @_;
 415        my $self = _new($class, $repo_id, $ref_id, $path);
 416        if (defined $url) {
 417                $self->init_remote_config($url, $no_write);
 418        }
 419        $self;
 420}
 421
 422sub find_ref {
 423        my ($ref_id) = @_;
 424        foreach (command(qw/config -l/)) {
 425                next unless m!^svn-remote\.(.+)\.fetch=
 426                              \s*(.*?)\s*:\s*(.+?)\s*$!x;
 427                my ($repo_id, $path, $ref) = ($1, $2, $3);
 428                if ($ref eq $ref_id) {
 429                        $path = '' if ($path =~ m#^\./?#);
 430                        return ($repo_id, $path);
 431                }
 432        }
 433        (undef, undef, undef);
 434}
 435
 436sub new {
 437        my ($class, $ref_id, $repo_id, $path) = @_;
 438        if (defined $ref_id && !defined $repo_id && !defined $path) {
 439                ($repo_id, $path) = find_ref($ref_id);
 440                if (!defined $repo_id) {
 441                        die "Could not find a \"svn-remote.*.fetch\" key ",
 442                            "in the repository configuration matching: ",
 443                            "$ref_id\n";
 444                }
 445        }
 446        my $self = _new($class, $repo_id, $ref_id, $path);
 447        if (!defined $self->path || !length $self->path) {
 448                my $fetch = command_oneline('config', '--get',
 449                                            "svn-remote.$repo_id.fetch",
 450                                            ":$ref_id\$") or
 451                     die "Failed to read \"svn-remote.$repo_id.fetch\" ",
 452                         "\":$ref_id\$\" in config\n";
 453                my($path) = split(/\s*:\s*/, $fetch);
 454                $self->path($path);
 455        }
 456        {
 457                my $path = $self->path;
 458                $path =~ s{\A/}{};
 459                $path =~ s{/\z}{};
 460                $self->path($path);
 461        }
 462        my $url = command_oneline('config', '--get',
 463                                  "svn-remote.$repo_id.url") or
 464                  die "Failed to read \"svn-remote.$repo_id.url\" in config\n";
 465        $self->url($url);
 466        $self->{pushurl} = eval { command_oneline('config', '--get',
 467                                  "svn-remote.$repo_id.pushurl") };
 468        $self->rebuild;
 469        $self;
 470}
 471
 472sub refname {
 473        my ($refname) = $_[0]->{ref_id} ;
 474
 475        # It cannot end with a slash /, we'll throw up on this because
 476        # SVN can't have directories with a slash in their name, either:
 477        if ($refname =~ m{/$}) {
 478                die "ref: '$refname' ends with a trailing slash; this is ",
 479                    "not permitted by git or Subversion\n";
 480        }
 481
 482        # It cannot have ASCII control character space, tilde ~, caret ^,
 483        # colon :, question-mark ?, asterisk *, space, or open bracket [
 484        # anywhere.
 485        #
 486        # Additionally, % must be escaped because it is used for escaping
 487        # and we want our escaped refname to be reversible
 488        $refname =~ s{([ \%~\^:\?\*\[\t])}{sprintf('%%%02X',ord($1))}eg;
 489
 490        # no slash-separated component can begin with a dot .
 491        # /.* becomes /%2E*
 492        $refname =~ s{/\.}{/%2E}g;
 493
 494        # It cannot have two consecutive dots .. anywhere
 495        # .. becomes %2E%2E
 496        $refname =~ s{\.\.}{%2E%2E}g;
 497
 498        # trailing dots and .lock are not allowed
 499        # .$ becomes %2E and .lock becomes %2Elock
 500        $refname =~ s{\.(?=$|lock$)}{%2E};
 501
 502        # the sequence @{ is used to access the reflog
 503        # @{ becomes %40{
 504        $refname =~ s{\@\{}{%40\{}g;
 505
 506        return $refname;
 507}
 508
 509sub desanitize_refname {
 510        my ($refname) = @_;
 511        $refname =~ s{%(?:([0-9A-F]{2}))}{chr hex($1)}eg;
 512        return $refname;
 513}
 514
 515sub svm_uuid {
 516        my ($self) = @_;
 517        return $self->{svm}->{uuid} if $self->svm;
 518        $self->ra;
 519        unless ($self->{svm}) {
 520                die "SVM UUID not cached, and reading remotely failed\n";
 521        }
 522        $self->{svm}->{uuid};
 523}
 524
 525sub svm {
 526        my ($self) = @_;
 527        return $self->{svm} if $self->{svm};
 528        my $svm;
 529        # see if we have it in our config, first:
 530        eval {
 531                my $section = "svn-remote.$self->{repo_id}";
 532                $svm = {
 533                  source => tmp_config('--get', "$section.svm-source"),
 534                  uuid => tmp_config('--get', "$section.svm-uuid"),
 535                  replace => tmp_config('--get', "$section.svm-replace"),
 536                }
 537        };
 538        if ($svm && $svm->{source} && $svm->{uuid} && $svm->{replace}) {
 539                $self->{svm} = $svm;
 540        }
 541        $self->{svm};
 542}
 543
 544sub _set_svm_vars {
 545        my ($self, $ra) = @_;
 546        return $ra if $self->svm;
 547
 548        my @err = ( "useSvmProps set, but failed to read SVM properties\n",
 549                    "(svm:source, svm:uuid) ",
 550                    "from the following URLs:\n" );
 551        sub read_svm_props {
 552                my ($self, $ra, $path, $r) = @_;
 553                my $props = ($ra->get_dir($path, $r))[2];
 554                my $src = $props->{'svm:source'};
 555                my $uuid = $props->{'svm:uuid'};
 556                return undef if (!$src || !$uuid);
 557
 558                chomp($src, $uuid);
 559
 560                $uuid =~ m{^[0-9a-f\-]{30,}$}i
 561                    or die "doesn't look right - svm:uuid is '$uuid'\n";
 562
 563                # the '!' is used to mark the repos_root!/relative/path
 564                $src =~ s{/?!/?}{/};
 565                $src =~ s{/+$}{}; # no trailing slashes please
 566                # username is of no interest
 567                $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1};
 568
 569                my $replace = add_path_to_url($ra->url, $path);
 570
 571                my $section = "svn-remote.$self->{repo_id}";
 572                tmp_config("$section.svm-source", $src);
 573                tmp_config("$section.svm-replace", $replace);
 574                tmp_config("$section.svm-uuid", $uuid);
 575                $self->{svm} = {
 576                        source => $src,
 577                        uuid => $uuid,
 578                        replace => $replace
 579                };
 580        }
 581
 582        my $r = $ra->get_latest_revnum;
 583        my $path = $self->path;
 584        my %tried;
 585        while (length $path) {
 586                my $try = add_path_to_url($self->url, $path);
 587                unless ($tried{$try}) {
 588                        return $ra if $self->read_svm_props($ra, $path, $r);
 589                        $tried{$try} = 1;
 590                }
 591                $path =~ s#/?[^/]+$##;
 592        }
 593        die "Path: '$path' should be ''\n" if $path ne '';
 594        return $ra if $self->read_svm_props($ra, $path, $r);
 595        $tried{ add_path_to_url($self->url, $path) } = 1;
 596
 597        if ($ra->{repos_root} eq $self->url) {
 598                die @err, (map { "  $_\n" } keys %tried), "\n";
 599        }
 600
 601        # nope, make sure we're connected to the repository root:
 602        my $ok;
 603        my @tried_b;
 604        $path = $ra->{svn_path};
 605        $ra = Git::SVN::Ra->new($ra->{repos_root});
 606        while (length $path) {
 607                my $try = add_path_to_url($ra->url, $path);
 608                unless ($tried{$try}) {
 609                        $ok = $self->read_svm_props($ra, $path, $r);
 610                        last if $ok;
 611                        $tried{$try} = 1;
 612                }
 613                $path =~ s#/?[^/]+$##;
 614        }
 615        die "Path: '$path' should be ''\n" if $path ne '';
 616        $ok ||= $self->read_svm_props($ra, $path, $r);
 617        $tried{ add_path_to_url($ra->url, $path) } = 1;
 618        if (!$ok) {
 619                die @err, (map { "  $_\n" } keys %tried), "\n";
 620        }
 621        Git::SVN::Ra->new($self->url);
 622}
 623
 624sub svnsync {
 625        my ($self) = @_;
 626        return $self->{svnsync} if $self->{svnsync};
 627
 628        if ($self->no_metadata) {
 629                die "Can't have both 'noMetadata' and ",
 630                    "'useSvnsyncProps' options set!\n";
 631        }
 632        if ($self->rewrite_root) {
 633                die "Can't have both 'useSvnsyncProps' and 'rewriteRoot' ",
 634                    "options set!\n";
 635        }
 636        if ($self->rewrite_uuid) {
 637                die "Can't have both 'useSvnsyncProps' and 'rewriteUUID' ",
 638                    "options set!\n";
 639        }
 640
 641        my $svnsync;
 642        # see if we have it in our config, first:
 643        eval {
 644                my $section = "svn-remote.$self->{repo_id}";
 645
 646                my $url = tmp_config('--get', "$section.svnsync-url");
 647                ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
 648                   die "doesn't look right - svn:sync-from-url is '$url'\n";
 649
 650                my $uuid = tmp_config('--get', "$section.svnsync-uuid");
 651                ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
 652                   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
 653
 654                $svnsync = { url => $url, uuid => $uuid }
 655        };
 656        if ($svnsync && $svnsync->{url} && $svnsync->{uuid}) {
 657                return $self->{svnsync} = $svnsync;
 658        }
 659
 660        my $err = "useSvnsyncProps set, but failed to read " .
 661                  "svnsync property: svn:sync-from-";
 662        my $rp = $self->ra->rev_proplist(0);
 663
 664        my $url = $rp->{'svn:sync-from-url'} or die $err . "url\n";
 665        ($url) = ($url =~ m{^([a-z\+]+://\S+)$}) or
 666                   die "doesn't look right - svn:sync-from-url is '$url'\n";
 667
 668        my $uuid = $rp->{'svn:sync-from-uuid'} or die $err . "uuid\n";
 669        ($uuid) = ($uuid =~ m{^([0-9a-f\-]{30,})$}i) or
 670                   die "doesn't look right - svn:sync-from-uuid is '$uuid'\n";
 671
 672        my $section = "svn-remote.$self->{repo_id}";
 673        tmp_config('--add', "$section.svnsync-uuid", $uuid);
 674        tmp_config('--add', "$section.svnsync-url", $url);
 675        return $self->{svnsync} = { url => $url, uuid => $uuid };
 676}
 677
 678# this allows us to memoize our SVN::Ra UUID locally and avoid a
 679# remote lookup (useful for 'git svn log').
 680sub ra_uuid {
 681        my ($self) = @_;
 682        unless ($self->{ra_uuid}) {
 683                my $key = "svn-remote.$self->{repo_id}.uuid";
 684                my $uuid = eval { tmp_config('--get', $key) };
 685                if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) {
 686                        $self->{ra_uuid} = $uuid;
 687                } else {
 688                        die "ra_uuid called without URL\n" unless $self->url;
 689                        $self->{ra_uuid} = $self->ra->get_uuid;
 690                        tmp_config('--add', $key, $self->{ra_uuid});
 691                }
 692        }
 693        $self->{ra_uuid};
 694}
 695
 696sub _set_repos_root {
 697        my ($self, $repos_root) = @_;
 698        my $k = "svn-remote.$self->{repo_id}.reposRoot";
 699        $repos_root ||= $self->ra->{repos_root};
 700        tmp_config($k, $repos_root);
 701        $repos_root;
 702}
 703
 704sub repos_root {
 705        my ($self) = @_;
 706        my $k = "svn-remote.$self->{repo_id}.reposRoot";
 707        eval { tmp_config('--get', $k) } || $self->_set_repos_root;
 708}
 709
 710sub ra {
 711        my ($self) = shift;
 712        my $ra = Git::SVN::Ra->new($self->url);
 713        $self->_set_repos_root($ra->{repos_root});
 714        if ($self->use_svm_props && !$self->{svm}) {
 715                if ($self->no_metadata) {
 716                        die "Can't have both 'noMetadata' and ",
 717                            "'useSvmProps' options set!\n";
 718                } elsif ($self->use_svnsync_props) {
 719                        die "Can't have both 'useSvnsyncProps' and ",
 720                            "'useSvmProps' options set!\n";
 721                }
 722                $ra = $self->_set_svm_vars($ra);
 723                $self->{-want_revprops} = 1;
 724        }
 725        $ra;
 726}
 727
 728# prop_walk(PATH, REV, SUB)
 729# -------------------------
 730# Recursively traverse PATH at revision REV and invoke SUB for each
 731# directory that contains a SVN property.  SUB will be invoked as
 732# follows:  &SUB(gs, path, props);  where `gs' is this instance of
 733# Git::SVN, `path' the path to the directory where the properties
 734# `props' were found.  The `path' will be relative to point of checkout,
 735# that is, if url://repo/trunk is the current Git branch, and that
 736# directory contains a sub-directory `d', SUB will be invoked with `/d/'
 737# as `path' (note the trailing `/').
 738sub prop_walk {
 739        my ($self, $path, $rev, $sub) = @_;
 740
 741        $path =~ s#^/##;
 742        my ($dirent, undef, $props) = $self->ra->get_dir($path, $rev);
 743        $path =~ s#^/*#/#g;
 744        my $p = $path;
 745        # Strip the irrelevant part of the path.
 746        $p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#;
 747        # Ensure the path is terminated by a `/'.
 748        $p =~ s#/*$#/#;
 749
 750        # The properties contain all the internal SVN stuff nobody
 751        # (usually) cares about.
 752        my $interesting_props = 0;
 753        foreach (keys %{$props}) {
 754                # If it doesn't start with `svn:', it must be a
 755                # user-defined property.
 756                ++$interesting_props and next if $_ !~ /^svn:/;
 757                # FIXME: Fragile, if SVN adds new public properties,
 758                # this needs to be updated.
 759                ++$interesting_props if /^svn:(?:ignore|keywords|executable
 760                                                 |eol-style|mime-type
 761                                                 |externals|needs-lock)$/x;
 762        }
 763        &$sub($self, $p, $props) if $interesting_props;
 764
 765        foreach (sort keys %$dirent) {
 766                next if $dirent->{$_}->{kind} != $SVN::Node::dir;
 767                $self->prop_walk($self->path . $p . $_, $rev, $sub);
 768        }
 769}
 770
 771sub last_rev { ($_[0]->last_rev_commit)[0] }
 772sub last_commit { ($_[0]->last_rev_commit)[1] }
 773
 774# returns the newest SVN revision number and newest commit SHA1
 775sub last_rev_commit {
 776        my ($self) = @_;
 777        if (defined $self->{last_rev} && defined $self->{last_commit}) {
 778                return ($self->{last_rev}, $self->{last_commit});
 779        }
 780        my $c = ::verify_ref($self->refname.'^0');
 781        if ($c && !$self->use_svm_props && !$self->no_metadata) {
 782                my $rev = (::cmt_metadata($c))[1];
 783                if (defined $rev) {
 784                        ($self->{last_rev}, $self->{last_commit}) = ($rev, $c);
 785                        return ($rev, $c);
 786                }
 787        }
 788        my $map_path = $self->map_path;
 789        unless (-e $map_path) {
 790                ($self->{last_rev}, $self->{last_commit}) = (undef, undef);
 791                return (undef, undef);
 792        }
 793        my ($rev, $commit) = $self->rev_map_max(1);
 794        ($self->{last_rev}, $self->{last_commit}) = ($rev, $commit);
 795        return ($rev, $commit);
 796}
 797
 798sub get_fetch_range {
 799        my ($self, $min, $max) = @_;
 800        $max ||= $self->ra->get_latest_revnum;
 801        $min ||= $self->rev_map_max;
 802        (++$min, $max);
 803}
 804
 805sub tmp_config {
 806        my (@args) = @_;
 807        my $old_def_config = "$ENV{GIT_DIR}/svn/config";
 808        my $config = "$ENV{GIT_DIR}/svn/.metadata";
 809        if (! -f $config && -f $old_def_config) {
 810                rename $old_def_config, $config or
 811                       die "Failed rename $old_def_config => $config: $!\n";
 812        }
 813        my $old_config = $ENV{GIT_CONFIG};
 814        $ENV{GIT_CONFIG} = $config;
 815        $@ = undef;
 816        my @ret = eval {
 817                unless (-f $config) {
 818                        mkfile($config);
 819                        open my $fh, '>', $config or
 820                            die "Can't open $config: $!\n";
 821                        print $fh "; This file is used internally by ",
 822                                  "git-svn\n" or die
 823                                  "Couldn't write to $config: $!\n";
 824                        print $fh "; You should not have to edit it\n" or
 825                              die "Couldn't write to $config: $!\n";
 826                        close $fh or die "Couldn't close $config: $!\n";
 827                }
 828                command('config', @args);
 829        };
 830        my $err = $@;
 831        if (defined $old_config) {
 832                $ENV{GIT_CONFIG} = $old_config;
 833        } else {
 834                delete $ENV{GIT_CONFIG};
 835        }
 836        die $err if $err;
 837        wantarray ? @ret : $ret[0];
 838}
 839
 840sub tmp_index_do {
 841        my ($self, $sub) = @_;
 842        my $old_index = $ENV{GIT_INDEX_FILE};
 843        $ENV{GIT_INDEX_FILE} = $self->{index};
 844        $@ = undef;
 845        my @ret = eval {
 846                my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#);
 847                mkpath([$dir]) unless -d $dir;
 848                &$sub;
 849        };
 850        my $err = $@;
 851        if (defined $old_index) {
 852                $ENV{GIT_INDEX_FILE} = $old_index;
 853        } else {
 854                delete $ENV{GIT_INDEX_FILE};
 855        }
 856        die $err if $err;
 857        wantarray ? @ret : $ret[0];
 858}
 859
 860sub assert_index_clean {
 861        my ($self, $treeish) = @_;
 862
 863        $self->tmp_index_do(sub {
 864                command_noisy('read-tree', $treeish) unless -e $self->{index};
 865                my $x = command_oneline('write-tree');
 866                my ($y) = (command(qw/cat-file commit/, $treeish) =~
 867                           /^tree ($::sha1)/mo);
 868                return if $y eq $x;
 869
 870                warn "Index mismatch: $y != $x\nrereading $treeish\n";
 871                unlink $self->{index} or die "unlink $self->{index}: $!\n";
 872                command_noisy('read-tree', $treeish);
 873                $x = command_oneline('write-tree');
 874                if ($y ne $x) {
 875                        fatal "trees ($treeish) $y != $x\n",
 876                              "Something is seriously wrong...";
 877                }
 878        });
 879}
 880
 881sub get_commit_parents {
 882        my ($self, $log_entry) = @_;
 883        my (%seen, @ret, @tmp);
 884        # legacy support for 'set-tree'; this is only used by set_tree_cb:
 885        if (my $ip = $self->{inject_parents}) {
 886                if (my $commit = delete $ip->{$log_entry->{revision}}) {
 887                        push @tmp, $commit;
 888                }
 889        }
 890        if (my $cur = ::verify_ref($self->refname.'^0')) {
 891                push @tmp, $cur;
 892        }
 893        if (my $ipd = $self->{inject_parents_dcommit}) {
 894                if (my $commit = delete $ipd->{$log_entry->{revision}}) {
 895                        push @tmp, @$commit;
 896                }
 897        }
 898        push @tmp, $_ foreach (@{$log_entry->{parents}}, @tmp);
 899        while (my $p = shift @tmp) {
 900                next if $seen{$p};
 901                $seen{$p} = 1;
 902                push @ret, $p;
 903        }
 904        @ret;
 905}
 906
 907sub rewrite_root {
 908        my ($self) = @_;
 909        return $self->{-rewrite_root} if exists $self->{-rewrite_root};
 910        my $k = "svn-remote.$self->{repo_id}.rewriteRoot";
 911        my $rwr = eval { command_oneline(qw/config --get/, $k) };
 912        if ($rwr) {
 913                $rwr =~ s#/+$##;
 914                if ($rwr !~ m#^[a-z\+]+://#) {
 915                        die "$rwr is not a valid URL (key: $k)\n";
 916                }
 917        }
 918        $self->{-rewrite_root} = $rwr;
 919}
 920
 921sub rewrite_uuid {
 922        my ($self) = @_;
 923        return $self->{-rewrite_uuid} if exists $self->{-rewrite_uuid};
 924        my $k = "svn-remote.$self->{repo_id}.rewriteUUID";
 925        my $rwid = eval { command_oneline(qw/config --get/, $k) };
 926        if ($rwid) {
 927                $rwid =~ s#/+$##;
 928                if ($rwid !~ m#^[a-f0-9]{8}-(?:[a-f0-9]{4}-){3}[a-f0-9]{12}$#) {
 929                        die "$rwid is not a valid UUID (key: $k)\n";
 930                }
 931        }
 932        $self->{-rewrite_uuid} = $rwid;
 933}
 934
 935sub metadata_url {
 936        my ($self) = @_;
 937        my $url = $self->rewrite_root || $self->url;
 938        return canonicalize_url( add_path_to_url( $url, $self->path ) );
 939}
 940
 941sub full_url {
 942        my ($self) = @_;
 943        return canonicalize_url( add_path_to_url( $self->url, $self->path ) );
 944}
 945
 946sub full_pushurl {
 947        my ($self) = @_;
 948        if ($self->{pushurl}) {
 949                return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) );
 950        } else {
 951                return $self->full_url;
 952        }
 953}
 954
 955sub set_commit_header_env {
 956        my ($log_entry) = @_;
 957        my %env;
 958        foreach my $ned (qw/NAME EMAIL DATE/) {
 959                foreach my $ac (qw/AUTHOR COMMITTER/) {
 960                        $env{"GIT_${ac}_${ned}"} = $ENV{"GIT_${ac}_${ned}"};
 961                }
 962        }
 963
 964        $ENV{GIT_AUTHOR_NAME} = $log_entry->{name};
 965        $ENV{GIT_AUTHOR_EMAIL} = $log_entry->{email};
 966        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date};
 967
 968        $ENV{GIT_COMMITTER_NAME} = (defined $log_entry->{commit_name})
 969                                                ? $log_entry->{commit_name}
 970                                                : $log_entry->{name};
 971        $ENV{GIT_COMMITTER_EMAIL} = (defined $log_entry->{commit_email})
 972                                                ? $log_entry->{commit_email}
 973                                                : $log_entry->{email};
 974        \%env;
 975}
 976
 977sub restore_commit_header_env {
 978        my ($env) = @_;
 979        foreach my $ned (qw/NAME EMAIL DATE/) {
 980                foreach my $ac (qw/AUTHOR COMMITTER/) {
 981                        my $k = "GIT_${ac}_${ned}";
 982                        if (defined $env->{$k}) {
 983                                $ENV{$k} = $env->{$k};
 984                        } else {
 985                                delete $ENV{$k};
 986                        }
 987                }
 988        }
 989}
 990
 991sub gc {
 992        command_noisy('gc', '--auto');
 993};
 994
 995sub do_git_commit {
 996        my ($self, $log_entry) = @_;
 997        my $lr = $self->last_rev;
 998        if (defined $lr && $lr >= $log_entry->{revision}) {
 999                die "Last fetched revision of ", $self->refname,
1000                    " was r$lr, but we are about to fetch: ",
1001                    "r$log_entry->{revision}!\n";
1002        }
1003        if (my $c = $self->rev_map_get($log_entry->{revision})) {
1004                croak "$log_entry->{revision} = $c already exists! ",
1005                      "Why are we refetching it?\n";
1006        }
1007        my $old_env = set_commit_header_env($log_entry);
1008        my $tree = $log_entry->{tree};
1009        if (!defined $tree) {
1010                $tree = $self->tmp_index_do(sub {
1011                                            command_oneline('write-tree') });
1012        }
1013        die "Tree is not a valid sha1: $tree\n" if $tree !~ /^$::sha1$/o;
1014
1015        my @exec = ('git', 'commit-tree', $tree);
1016        foreach ($self->get_commit_parents($log_entry)) {
1017                push @exec, '-p', $_;
1018        }
1019        defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec))
1020                                                                   or croak $!;
1021        binmode $msg_fh;
1022
1023        # we always get UTF-8 from SVN, but we may want our commits in
1024        # a different encoding.
1025        if (my $enc = Git::config('i18n.commitencoding')) {
1026                require Encode;
1027                Encode::from_to($log_entry->{log}, 'UTF-8', $enc);
1028        }
1029        print $msg_fh $log_entry->{log} or croak $!;
1030        restore_commit_header_env($old_env);
1031        unless ($self->no_metadata) {
1032                print $msg_fh "\ngit-svn-id: $log_entry->{metadata}\n"
1033                              or croak $!;
1034        }
1035        $msg_fh->flush == 0 or croak $!;
1036        close $msg_fh or croak $!;
1037        chomp(my $commit = do { local $/; <$out_fh> });
1038        close $out_fh or croak $!;
1039        waitpid $pid, 0;
1040        croak $? if $?;
1041        if ($commit !~ /^$::sha1$/o) {
1042                die "Failed to commit, invalid sha1: $commit\n";
1043        }
1044
1045        $self->rev_map_set($log_entry->{revision}, $commit, 1);
1046
1047        $self->{last_rev} = $log_entry->{revision};
1048        $self->{last_commit} = $commit;
1049        print "r$log_entry->{revision}" unless $::_q > 1;
1050        if (defined $log_entry->{svm_revision}) {
1051                 print " (\@$log_entry->{svm_revision})" unless $::_q > 1;
1052                 $self->rev_map_set($log_entry->{svm_revision}, $commit,
1053                                   0, $self->svm_uuid);
1054        }
1055        print " = $commit ($self->{ref_id})\n" unless $::_q > 1;
1056        if (--$_gc_nr == 0) {
1057                $_gc_nr = $_gc_period;
1058                gc();
1059        }
1060        return $commit;
1061}
1062
1063sub match_paths {
1064        my ($self, $paths, $r) = @_;
1065        return 1 if $self->path eq '';
1066        if (my $path = $paths->{"/".$self->path}) {
1067                return ($path->{action} eq 'D') ? 0 : 1;
1068        }
1069        $self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/};
1070        if (grep /$self->{path_regex}/, keys %$paths) {
1071                return 1;
1072        }
1073        my $c = '';
1074        foreach (split m#/#, $self->path) {
1075                $c .= "/$_";
1076                next unless ($paths->{$c} &&
1077                             ($paths->{$c}->{action} =~ /^[AR]$/));
1078                if ($self->ra->check_path($self->path, $r) ==
1079                    $SVN::Node::dir) {
1080                        return 1;
1081                }
1082        }
1083        return 0;
1084}
1085
1086sub find_parent_branch {
1087        my ($self, $paths, $rev) = @_;
1088        return undef unless $self->follow_parent;
1089        unless (defined $paths) {
1090                my $err_handler = $SVN::Error::handler;
1091                $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs;
1092                $self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1,
1093                                   sub { $paths = $_[0] });
1094                $SVN::Error::handler = $err_handler;
1095        }
1096        return undef unless defined $paths;
1097
1098        # look for a parent from another branch:
1099        my @b_path_components = split m#/#, $self->path;
1100        my @a_path_components;
1101        my $i;
1102        while (@b_path_components) {
1103                $i = $paths->{'/'.join('/', @b_path_components)};
1104                last if $i && defined $i->{copyfrom_path};
1105                unshift(@a_path_components, pop(@b_path_components));
1106        }
1107        return undef unless defined $i && defined $i->{copyfrom_path};
1108        my $branch_from = $i->{copyfrom_path};
1109        if (@a_path_components) {
1110                print STDERR "branch_from: $branch_from => ";
1111                $branch_from .= '/'.join('/', @a_path_components);
1112                print STDERR $branch_from, "\n";
1113        }
1114        my $r = $i->{copyfrom_rev};
1115        my $repos_root = $self->ra->{repos_root};
1116        my $url = $self->ra->url;
1117        my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) );
1118        print STDERR  "Found possible branch point: ",
1119                      "$new_url => ", $self->full_url, ", $r\n"
1120                      unless $::_q > 1;
1121        $branch_from =~ s#^/##;
1122        my $gs = $self->other_gs($new_url, $url,
1123                                 $branch_from, $r, $self->{ref_id});
1124        my ($r0, $parent) = $gs->find_rev_before($r, 1);
1125        {
1126                my ($base, $head);
1127                if (!defined $r0 || !defined $parent) {
1128                        ($base, $head) = parse_revision_argument(0, $r);
1129                } else {
1130                        if ($r0 < $r) {
1131                                $gs->ra->get_log([$gs->path], $r0 + 1, $r, 1,
1132                                        0, 1, sub { $base = $_[1] - 1 });
1133                        }
1134                }
1135                if (defined $base && $base <= $r) {
1136                        $gs->fetch($base, $r);
1137                }
1138                ($r0, $parent) = $gs->find_rev_before($r, 1);
1139        }
1140        if (defined $r0 && defined $parent) {
1141                print STDERR "Found branch parent: ($self->{ref_id}) $parent\n"
1142                             unless $::_q > 1;
1143                my $ed;
1144                if ($self->ra->can_do_switch) {
1145                        $self->assert_index_clean($parent);
1146                        print STDERR "Following parent with do_switch\n"
1147                                     unless $::_q > 1;
1148                        # do_switch works with svn/trunk >= r22312, but that
1149                        # is not included with SVN 1.4.3 (the latest version
1150                        # at the moment), so we can't rely on it
1151                        $self->{last_rev} = $r0;
1152                        $self->{last_commit} = $parent;
1153                        $ed = Git::SVN::Fetcher->new($self, $gs->path);
1154                        $gs->ra->gs_do_switch($r0, $rev, $gs,
1155                                              $self->full_url, $ed)
1156                          or die "SVN connection failed somewhere...\n";
1157                } elsif ($self->ra->trees_match($new_url, $r0,
1158                                                $self->full_url, $rev)) {
1159                        print STDERR "Trees match:\n",
1160                                     "  $new_url\@$r0\n",
1161                                     "  ${\$self->full_url}\@$rev\n",
1162                                     "Following parent with no changes\n"
1163                                     unless $::_q > 1;
1164                        $self->tmp_index_do(sub {
1165                            command_noisy('read-tree', $parent);
1166                        });
1167                        $self->{last_commit} = $parent;
1168                } else {
1169                        print STDERR "Following parent with do_update\n"
1170                                     unless $::_q > 1;
1171                        $ed = Git::SVN::Fetcher->new($self);
1172                        $self->ra->gs_do_update($rev, $rev, $self, $ed)
1173                          or die "SVN connection failed somewhere...\n";
1174                }
1175                print STDERR "Successfully followed parent\n" unless $::_q > 1;
1176                return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
1177        }
1178        return undef;
1179}
1180
1181sub do_fetch {
1182        my ($self, $paths, $rev) = @_;
1183        my $ed;
1184        my ($last_rev, @parents);
1185        if (my $lc = $self->last_commit) {
1186                # we can have a branch that was deleted, then re-added
1187                # under the same name but copied from another path, in
1188                # which case we'll have multiple parents (we don't
1189                # want to break the original ref or lose copypath info):
1190                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1191                        push @{$log_entry->{parents}}, $lc;
1192                        return $log_entry;
1193                }
1194                $ed = Git::SVN::Fetcher->new($self);
1195                $last_rev = $self->{last_rev};
1196                $ed->{c} = $lc;
1197                @parents = ($lc);
1198        } else {
1199                $last_rev = $rev;
1200                if (my $log_entry = $self->find_parent_branch($paths, $rev)) {
1201                        return $log_entry;
1202                }
1203                $ed = Git::SVN::Fetcher->new($self);
1204        }
1205        unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
1206                die "SVN connection failed somewhere...\n";
1207        }
1208        $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
1209}
1210
1211sub mkemptydirs {
1212        my ($self, $r) = @_;
1213
1214        sub scan {
1215                my ($r, $empty_dirs, $line) = @_;
1216                if (defined $r && $line =~ /^r(\d+)$/) {
1217                        return 0 if $1 > $r;
1218                } elsif ($line =~ /^  \+empty_dir: (.+)$/) {
1219                        $empty_dirs->{$1} = 1;
1220                } elsif ($line =~ /^  \-empty_dir: (.+)$/) {
1221                        my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs);
1222                        delete @$empty_dirs{@d};
1223                }
1224                1; # continue
1225        };
1226
1227        my %empty_dirs = ();
1228        my $gz_file = "$self->{dir}/unhandled.log.gz";
1229        if (-f $gz_file) {
1230                if (!can_compress()) {
1231                        warn "Compress::Zlib could not be found; ",
1232                             "empty directories in $gz_file will not be read\n";
1233                } else {
1234                        my $gz = Compress::Zlib::gzopen($gz_file, "rb") or
1235                                die "Unable to open $gz_file: $!\n";
1236                        my $line;
1237                        while ($gz->gzreadline($line) > 0) {
1238                                scan($r, \%empty_dirs, $line) or last;
1239                        }
1240                        $gz->gzclose;
1241                }
1242        }
1243
1244        if (open my $fh, '<', "$self->{dir}/unhandled.log") {
1245                binmode $fh or croak "binmode: $!";
1246                while (<$fh>) {
1247                        scan($r, \%empty_dirs, $_) or last;
1248                }
1249                close $fh;
1250        }
1251
1252        my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
1253        foreach my $d (sort keys %empty_dirs) {
1254                $d = uri_decode($d);
1255                $d =~ s/$strip//;
1256                next unless length($d);
1257                next if -d $d;
1258                if (-e $d) {
1259                        warn "$d exists but is not a directory\n";
1260                } else {
1261                        print "creating empty directory: $d\n";
1262                        mkpath([$d]);
1263                }
1264        }
1265}
1266
1267sub get_untracked {
1268        my ($self, $ed) = @_;
1269        my @out;
1270        my $h = $ed->{empty};
1271        foreach (sort keys %$h) {
1272                my $act = $h->{$_} ? '+empty_dir' : '-empty_dir';
1273                push @out, "  $act: " . uri_encode($_);
1274                warn "W: $act: $_\n";
1275        }
1276        foreach my $t (qw/dir_prop file_prop/) {
1277                $h = $ed->{$t} or next;
1278                foreach my $path (sort keys %$h) {
1279                        my $ppath = $path eq '' ? '.' : $path;
1280                        foreach my $prop (sort keys %{$h->{$path}}) {
1281                                next if $SKIP_PROP{$prop};
1282                                my $v = $h->{$path}->{$prop};
1283                                my $t_ppath_prop = "$t: " .
1284                                                    uri_encode($ppath) . ' ' .
1285                                                    uri_encode($prop);
1286                                if (defined $v) {
1287                                        push @out, "  +$t_ppath_prop " .
1288                                                   uri_encode($v);
1289                                } else {
1290                                        push @out, "  -$t_ppath_prop";
1291                                }
1292                        }
1293                }
1294        }
1295        foreach my $t (qw/absent_file absent_directory/) {
1296                $h = $ed->{$t} or next;
1297                foreach my $parent (sort keys %$h) {
1298                        foreach my $path (sort @{$h->{$parent}}) {
1299                                push @out, "  $t: " .
1300                                           uri_encode("$parent/$path");
1301                                warn "W: $t: $parent/$path ",
1302                                     "Insufficient permissions?\n";
1303                        }
1304                }
1305        }
1306        \@out;
1307}
1308
1309# parse_svn_date(DATE)
1310# --------------------
1311# Given a date (in UTC) from Subversion, return a string in the format
1312# "<TZ Offset> <local date/time>" that Git will use.
1313#
1314# By default the parsed date will be in UTC; if $Git::SVN::_localtime
1315# is true we'll convert it to the local timezone instead.
1316sub parse_svn_date {
1317        my $date = shift || return '+0000 1970-01-01 00:00:00';
1318        my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
1319                                            (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
1320                                         croak "Unable to parse date: $date\n";
1321        my $parsed_date;    # Set next.
1322
1323        if ($Git::SVN::_localtime) {
1324                # Translate the Subversion datetime to an epoch time.
1325                # Begin by switching ourselves to $date's timezone, UTC.
1326                my $old_env_TZ = $ENV{TZ};
1327                $ENV{TZ} = 'UTC';
1328
1329                my $epoch_in_UTC =
1330                    Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y - 1900);
1331
1332                # Determine our local timezone (including DST) at the
1333                # time of $epoch_in_UTC.  $Git::SVN::Log::TZ stored the
1334                # value of TZ, if any, at the time we were run.
1335                if (defined $Git::SVN::Log::TZ) {
1336                        $ENV{TZ} = $Git::SVN::Log::TZ;
1337                } else {
1338                        delete $ENV{TZ};
1339                }
1340
1341                my $our_TZ = get_tz_offset();
1342
1343                # This converts $epoch_in_UTC into our local timezone.
1344                my ($sec, $min, $hour, $mday, $mon, $year,
1345                    $wday, $yday, $isdst) = localtime($epoch_in_UTC);
1346
1347                $parsed_date = sprintf('%s %04d-%02d-%02d %02d:%02d:%02d',
1348                                       $our_TZ, $year + 1900, $mon + 1,
1349                                       $mday, $hour, $min, $sec);
1350
1351                # Reset us to the timezone in effect when we entered
1352                # this routine.
1353                if (defined $old_env_TZ) {
1354                        $ENV{TZ} = $old_env_TZ;
1355                } else {
1356                        delete $ENV{TZ};
1357                }
1358        } else {
1359                $parsed_date = "+0000 $Y-$m-$d $H:$M:$S";
1360        }
1361
1362        return $parsed_date;
1363}
1364
1365sub other_gs {
1366        my ($self, $new_url, $url,
1367            $branch_from, $r, $old_ref_id) = @_;
1368        my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from);
1369        unless ($gs) {
1370                my $ref_id = $old_ref_id;
1371                $ref_id =~ s/\@\d+-*$//;
1372                $ref_id .= "\@$r";
1373                # just grow a tail if we're not unique enough :x
1374                $ref_id .= '-' while find_ref($ref_id);
1375                my ($u, $p, $repo_id) = ($new_url, '', $ref_id);
1376                if ($u =~ s#^\Q$url\E(/|$)##) {
1377                        $p = $u;
1378                        $u = $url;
1379                        $repo_id = $self->{repo_id};
1380                }
1381                while (1) {
1382                        # It is possible to tag two different subdirectories at
1383                        # the same revision.  If the url for an existing ref
1384                        # does not match, we must either find a ref with a
1385                        # matching url or create a new ref by growing a tail.
1386                        $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1);
1387                        my (undef, $max_commit) = $gs->rev_map_max(1);
1388                        last if (!$max_commit);
1389                        my ($url) = ::cmt_metadata($max_commit);
1390                        last if ($url eq $gs->metadata_url);
1391                        $ref_id .= '-';
1392                }
1393                print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1;
1394        }
1395        $gs
1396}
1397
1398sub call_authors_prog {
1399        my ($orig_author) = @_;
1400        $orig_author = command_oneline('rev-parse', '--sq-quote', $orig_author);
1401        my $author = `$::_authors_prog $orig_author`;
1402        if ($? != 0) {
1403                die "$::_authors_prog failed with exit code $?\n"
1404        }
1405        if ($author =~ /^\s*(.+?)\s*<(.*)>\s*$/) {
1406                my ($name, $email) = ($1, $2);
1407                $email = undef if length $2 == 0;
1408                return [$name, $email];
1409        } else {
1410                die "Author: $orig_author: $::_authors_prog returned "
1411                        . "invalid author format: $author\n";
1412        }
1413}
1414
1415sub check_author {
1416        my ($author) = @_;
1417        if (!defined $author || length $author == 0) {
1418                $author = '(no author)';
1419        }
1420        if (!defined $::users{$author}) {
1421                if (defined $::_authors_prog) {
1422                        $::users{$author} = call_authors_prog($author);
1423                } elsif (defined $::_authors) {
1424                        die "Author: $author not defined in $::_authors file\n";
1425                }
1426        }
1427        $author;
1428}
1429
1430sub find_extra_svk_parents {
1431        my ($self, $tickets, $parents) = @_;
1432        # aha!  svk:merge property changed...
1433        my @tickets = split "\n", $tickets;
1434        my @known_parents;
1435        for my $ticket ( @tickets ) {
1436                my ($uuid, $path, $rev) = split /:/, $ticket;
1437                if ( $uuid eq $self->ra_uuid ) {
1438                        my $repos_root = $self->url;
1439                        my $branch_from = $path;
1440                        $branch_from =~ s{^/}{};
1441                        my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ),
1442                                                 $repos_root,
1443                                                 $branch_from,
1444                                                 $rev,
1445                                                 $self->{ref_id});
1446                        if ( my $commit = $gs->rev_map_get($rev, $uuid) ) {
1447                                # wahey!  we found it, but it might be
1448                                # an old one (!)
1449                                push @known_parents, [ $rev, $commit ];
1450                        }
1451                }
1452        }
1453        # Ordering matters; highest-numbered commit merge tickets
1454        # first, as they may account for later merge ticket additions
1455        # or changes.
1456        @known_parents = map {$_->[1]} sort {$b->[0] <=> $a->[0]} @known_parents;
1457        for my $parent ( @known_parents ) {
1458                my @cmd = ('rev-list', $parent, map { "^$_" } @$parents );
1459                my ($msg_fh, $ctx) = command_output_pipe(@cmd);
1460                my $new;
1461                while ( <$msg_fh> ) {
1462                        $new=1;last;
1463                }
1464                command_close_pipe($msg_fh, $ctx);
1465                if ( $new ) {
1466                        print STDERR
1467                            "Found merge parent (svk:merge ticket): $parent\n";
1468                        push @$parents, $parent;
1469                }
1470        }
1471}
1472
1473sub lookup_svn_merge {
1474        my $uuid = shift;
1475        my $url = shift;
1476        my $source = shift;
1477        my $revs = shift;
1478
1479        my $path = $source;
1480        $path =~ s{^/}{};
1481        my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
1482        if ( !$gs ) {
1483                warn "Couldn't find revmap for $url$source\n";
1484                return;
1485        }
1486        my @ranges = split ",", $revs;
1487        my ($tip, $tip_commit);
1488        my @merged_commit_ranges;
1489        # find the tip
1490        for my $range ( @ranges ) {
1491                if ($range =~ /[*]$/) {
1492                        warn "W: Ignoring partial merge in svn:mergeinfo "
1493                                ."dirprop: $source:$range\n";
1494                        next;
1495                }
1496                my ($bottom, $top) = split "-", $range;
1497                $top ||= $bottom;
1498                my $bottom_commit = $gs->find_rev_after( $bottom, 1, $top );
1499                my $top_commit = $gs->find_rev_before( $top, 1, $bottom );
1500
1501                unless ($top_commit and $bottom_commit) {
1502                        warn "W: unknown path/rev in svn:mergeinfo "
1503                                ."dirprop: $source:$range\n";
1504                        next;
1505                }
1506
1507                if (scalar(command('rev-parse', "$bottom_commit^@"))) {
1508                        push @merged_commit_ranges,
1509                             "$bottom_commit^..$top_commit";
1510                } else {
1511                        push @merged_commit_ranges, "$top_commit";
1512                }
1513
1514                if ( !defined $tip or $top > $tip ) {
1515                        $tip = $top;
1516                        $tip_commit = $top_commit;
1517                }
1518        }
1519        return ($tip_commit, @merged_commit_ranges);
1520}
1521
1522sub _rev_list {
1523        my ($msg_fh, $ctx) = command_output_pipe(
1524                "rev-list", @_,
1525               );
1526        my @rv;
1527        while ( <$msg_fh> ) {
1528                chomp;
1529                push @rv, $_;
1530        }
1531        command_close_pipe($msg_fh, $ctx);
1532        @rv;
1533}
1534
1535sub check_cherry_pick2 {
1536        my $base = shift;
1537        my $tip = shift;
1538        my $parents = shift;
1539        my @ranges = @_;
1540        my %commits = map { $_ => 1 }
1541                _rev_list("--no-merges", $tip, "--not", $base, @$parents, "--");
1542        for my $range ( @ranges ) {
1543                delete @commits{_rev_list($range, "--")};
1544        }
1545        for my $commit (keys %commits) {
1546                if (has_no_changes($commit)) {
1547                        delete $commits{$commit};
1548                }
1549        }
1550        my @k = (keys %commits);
1551        return (scalar @k, $k[0]);
1552}
1553
1554sub has_no_changes {
1555        my $commit = shift;
1556
1557        my @revs = split / /, command_oneline(
1558                qw(rev-list --parents -1 -m), $commit);
1559
1560        # Commits with no parents, e.g. the start of a partial branch,
1561        # have changes by definition.
1562        return 1 if (@revs < 2);
1563
1564        # Commits with multiple parents, e.g a merge, have no changes
1565        # by definition.
1566        return 0 if (@revs > 2);
1567
1568        return (command_oneline("rev-parse", "$commit^{tree}") eq
1569                command_oneline("rev-parse", "$commit~1^{tree}"));
1570}
1571
1572sub tie_for_persistent_memoization {
1573        my $hash = shift;
1574        my $path = shift;
1575
1576        unless ($memo_backend) {
1577                if (eval { require Git::SVN::Memoize::YAML; 1}) {
1578                        $memo_backend = 1;
1579                } else {
1580                        require Memoize::Storable;
1581                        $memo_backend = -1;
1582                }
1583        }
1584
1585        if ($memo_backend > 0) {
1586                tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
1587        } else {
1588                tie %$hash => 'Memoize::Storable', "$path.db", 'nstore';
1589        }
1590}
1591
1592# The GIT_DIR environment variable is not always set until after the command
1593# line arguments are processed, so we can't memoize in a BEGIN block.
1594{
1595        my $memoized = 0;
1596
1597        sub memoize_svn_mergeinfo_functions {
1598                return if $memoized;
1599                $memoized = 1;
1600
1601                my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
1602                mkpath([$cache_path]) unless -d $cache_path;
1603
1604                my %lookup_svn_merge_cache;
1605                my %check_cherry_pick2_cache;
1606                my %has_no_changes_cache;
1607
1608                tie_for_persistent_memoization(\%lookup_svn_merge_cache,
1609                    "$cache_path/lookup_svn_merge");
1610                memoize 'lookup_svn_merge',
1611                        SCALAR_CACHE => 'FAULT',
1612                        LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
1613                ;
1614
1615                tie_for_persistent_memoization(\%check_cherry_pick2_cache,
1616                    "$cache_path/check_cherry_pick2");
1617                memoize 'check_cherry_pick2',
1618                        SCALAR_CACHE => 'FAULT',
1619                        LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
1620                ;
1621
1622                tie_for_persistent_memoization(\%has_no_changes_cache,
1623                    "$cache_path/has_no_changes");
1624                memoize 'has_no_changes',
1625                        SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
1626                        LIST_CACHE => 'FAULT',
1627                ;
1628        }
1629
1630        sub unmemoize_svn_mergeinfo_functions {
1631                return if not $memoized;
1632                $memoized = 0;
1633
1634                Memoize::unmemoize 'lookup_svn_merge';
1635                Memoize::unmemoize 'check_cherry_pick2';
1636                Memoize::unmemoize 'has_no_changes';
1637        }
1638
1639        sub clear_memoized_mergeinfo_caches {
1640                die "Only call this method in non-memoized context" if ($memoized);
1641
1642                my $cache_path = "$ENV{GIT_DIR}/svn/.caches/";
1643                return unless -d $cache_path;
1644
1645                for my $cache_file (("$cache_path/lookup_svn_merge",
1646                                     "$cache_path/check_cherry_pick", # old
1647                                     "$cache_path/check_cherry_pick2",
1648                                     "$cache_path/has_no_changes")) {
1649                        for my $suffix (qw(yaml db)) {
1650                                my $file = "$cache_file.$suffix";
1651                                next unless -e $file;
1652                                unlink($file) or die "unlink($file) failed: $!\n";
1653                        }
1654                }
1655        }
1656
1657
1658        Memoize::memoize 'Git::SVN::repos_root';
1659}
1660
1661END {
1662        # Force cache writeout explicitly instead of waiting for
1663        # global destruction to avoid segfault in Storable:
1664        # http://rt.cpan.org/Public/Bug/Display.html?id=36087
1665        unmemoize_svn_mergeinfo_functions();
1666}
1667
1668sub parents_exclude {
1669        my $parents = shift;
1670        my @commits = @_;
1671        return unless @commits;
1672
1673        my @excluded;
1674        my $excluded;
1675        do {
1676                my @cmd = ('rev-list', "-1", @commits, "--not", @$parents );
1677                $excluded = command_oneline(@cmd);
1678                if ( $excluded ) {
1679                        my @new;
1680                        my $found;
1681                        for my $commit ( @commits ) {
1682                                if ( $commit eq $excluded ) {
1683                                        push @excluded, $commit;
1684                                        $found++;
1685                                }
1686                                else {
1687                                        push @new, $commit;
1688                                }
1689                        }
1690                        die "saw commit '$excluded' in rev-list output, "
1691                                ."but we didn't ask for that commit (wanted: @commits --not @$parents)"
1692                                        unless $found;
1693                        @commits = @new;
1694                }
1695        }
1696                while ($excluded and @commits);
1697
1698        return @excluded;
1699}
1700
1701# Compute what's new in svn:mergeinfo.
1702sub mergeinfo_changes {
1703        my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
1704        my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
1705        my $old_minfo = {};
1706
1707        my $ra = $self->ra;
1708        # Give up if $old_path isn't in the repo.
1709        # This is probably a merge on a subtree.
1710        if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
1711                warn "W: ignoring svn:mergeinfo on $old_path, ",
1712                        "directory didn't exist in r$old_rev\n";
1713                return {};
1714        }
1715        my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev);
1716        if (defined $props->{"svn:mergeinfo"}) {
1717                my %omi = map {split ":", $_ } split "\n",
1718                        $props->{"svn:mergeinfo"};
1719                $old_minfo = \%omi;
1720        }
1721
1722        my %changes = ();
1723        foreach my $p (keys %minfo) {
1724                my $a = $old_minfo->{$p} || "";
1725                my $b = $minfo{$p};
1726                # Omit merged branches whose ranges lists are unchanged.
1727                next if $a eq $b;
1728                # Remove any common range list prefix.
1729                ($a ^ $b) =~ /^[\0]*/;
1730                my $common_prefix = rindex $b, ",", $+[0] - 1;
1731                $changes{$p} = substr $b, $common_prefix + 1;
1732        }
1733        print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
1734                scalar(keys %minfo), " sources, ",
1735                scalar(keys %changes), " changed\n";
1736
1737        return \%changes;
1738}
1739
1740# note: this function should only be called if the various dirprops
1741# have actually changed
1742sub find_extra_svn_parents {
1743        my ($self, $mergeinfo, $parents) = @_;
1744        # aha!  svk:merge property changed...
1745
1746        memoize_svn_mergeinfo_functions();
1747
1748        # We first search for merged tips which are not in our
1749        # history.  Then, we figure out which git revisions are in
1750        # that tip, but not this revision.  If all of those revisions
1751        # are now marked as merge, we can add the tip as a parent.
1752        my @merges = sort keys %$mergeinfo;
1753        my @merge_tips;
1754        my $url = $self->url;
1755        my $uuid = $self->ra_uuid;
1756        my @all_ranges;
1757        for my $merge ( @merges ) {
1758                my ($tip_commit, @ranges) =
1759                        lookup_svn_merge( $uuid, $url,
1760                                          $merge, $mergeinfo->{$merge} );
1761                unless (!$tip_commit or
1762                                grep { $_ eq $tip_commit } @$parents ) {
1763                        push @merge_tips, $tip_commit;
1764                        push @all_ranges, @ranges;
1765                } else {
1766                        push @merge_tips, undef;
1767                }
1768        }
1769
1770        my %excluded = map { $_ => 1 }
1771                parents_exclude($parents, grep { defined } @merge_tips);
1772
1773        # check merge tips for new parents
1774        my @new_parents;
1775        for my $merge_tip ( @merge_tips ) {
1776                my $merge = shift @merges;
1777                next unless $merge_tip and $excluded{$merge_tip};
1778                my $spec = "$merge:$mergeinfo->{$merge}";
1779
1780                # check out 'new' tips
1781                my $merge_base;
1782                eval {
1783                        $merge_base = command_oneline(
1784                                "merge-base",
1785                                @$parents, $merge_tip,
1786                        );
1787                };
1788                if ($@) {
1789                        die "An error occurred during merge-base"
1790                                unless $@->isa("Git::Error::Command");
1791
1792                        warn "W: Cannot find common ancestor between ".
1793                             "@$parents and $merge_tip. Ignoring merge info.\n";
1794                        next;
1795                }
1796
1797                # double check that there are no missing non-merge commits
1798                my ($ninc, $ifirst) = check_cherry_pick2(
1799                        $merge_base, $merge_tip,
1800                        $parents,
1801                        @all_ranges,
1802                       );
1803
1804                if ($ninc) {
1805                        warn "W: svn cherry-pick ignored ($spec) - missing " .
1806                                "$ninc commit(s) (eg $ifirst)\n";
1807                } else {
1808                        warn "Found merge parent ($spec): ", $merge_tip, "\n";
1809                        push @new_parents, $merge_tip;
1810                }
1811        }
1812
1813        # cater for merges which merge commits from multiple branches
1814        if ( @new_parents > 1 ) {
1815                for ( my $i = 0; $i <= $#new_parents; $i++ ) {
1816                        for ( my $j = 0; $j <= $#new_parents; $j++ ) {
1817                                next if $i == $j;
1818                                next unless $new_parents[$i];
1819                                next unless $new_parents[$j];
1820                                my $revs = command_oneline(
1821                                        "rev-list", "-1",
1822                                        "$new_parents[$i]..$new_parents[$j]",
1823                                       );
1824                                if ( !$revs ) {
1825                                        undef($new_parents[$j]);
1826                                }
1827                        }
1828                }
1829        }
1830        push @$parents, grep { defined } @new_parents;
1831}
1832
1833sub make_log_entry {
1834        my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
1835        my $untracked = $self->get_untracked($ed);
1836
1837        my @parents = @$parents;
1838        my $props = $ed->{dir_prop}{$self->path};
1839        if ( $props->{"svk:merge"} ) {
1840                $self->find_extra_svk_parents($props->{"svk:merge"}, \@parents);
1841        }
1842        if ( $props->{"svn:mergeinfo"} ) {
1843                my $mi_changes = $self->mergeinfo_changes
1844                        ($parent_path, $parent_rev,
1845                         $self->path, $rev,
1846                         $props->{"svn:mergeinfo"});
1847                $self->find_extra_svn_parents($mi_changes, \@parents);
1848        }
1849
1850        open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
1851        print $un "r$rev\n" or croak $!;
1852        print $un $_, "\n" foreach @$untracked;
1853        my %log_entry = ( parents => \@parents, revision => $rev,
1854                          log => '');
1855
1856        my $headrev;
1857        my $logged = delete $self->{logged_rev_props};
1858        if (!$logged || $self->{-want_revprops}) {
1859                my $rp = $self->ra->rev_proplist($rev);
1860                foreach (sort keys %$rp) {
1861                        my $v = $rp->{$_};
1862                        if (/^svn:(author|date|log)$/) {
1863                                $log_entry{$1} = $v;
1864                        } elsif ($_ eq 'svm:headrev') {
1865                                $headrev = $v;
1866                        } else {
1867                                print $un "  rev_prop: ", uri_encode($_), ' ',
1868                                          uri_encode($v), "\n";
1869                        }
1870                }
1871        } else {
1872                map { $log_entry{$_} = $logged->{$_} } keys %$logged;
1873        }
1874        close $un or croak $!;
1875
1876        $log_entry{date} = parse_svn_date($log_entry{date});
1877        $log_entry{log} .= "\n";
1878        my $author = $log_entry{author} = check_author($log_entry{author});
1879        my ($name, $email) = defined $::users{$author} ? @{$::users{$author}}
1880                                                       : ($author, undef);
1881
1882        my ($commit_name, $commit_email) = ($name, $email);
1883        if ($_use_log_author) {
1884                my $name_field;
1885                if ($log_entry{log} =~ /From:\s+(.*\S)\s*\n/i) {
1886                        $name_field = $1;
1887                } elsif ($log_entry{log} =~ /Signed-off-by:\s+(.*\S)\s*\n/i) {
1888                        $name_field = $1;
1889                }
1890                if (!defined $name_field) {
1891                        if (!defined $email) {
1892                                $email = $name;
1893                        }
1894                } elsif ($name_field =~ /(.*?)\s+<(.*)>/) {
1895                        ($name, $email) = ($1, $2);
1896                } elsif ($name_field =~ /(.*)@/) {
1897                        ($name, $email) = ($1, $name_field);
1898                } else {
1899                        ($name, $email) = ($name_field, $name_field);
1900                }
1901        }
1902        if (defined $headrev && $self->use_svm_props) {
1903                if ($self->rewrite_root) {
1904                        die "Can't have both 'useSvmProps' and 'rewriteRoot' ",
1905                            "options set!\n";
1906                }
1907                if ($self->rewrite_uuid) {
1908                        die "Can't have both 'useSvmProps' and 'rewriteUUID' ",
1909                            "options set!\n";
1910                }
1911                my ($uuid, $r) = $headrev =~ m{^([a-f\d\-]{30,}):(\d+)$}i;
1912                # we don't want "SVM: initializing mirror for junk" ...
1913                return undef if $r == 0;
1914                my $svm = $self->svm;
1915                if ($uuid ne $svm->{uuid}) {
1916                        die "UUID mismatch on SVM path:\n",
1917                            "expected: $svm->{uuid}\n",
1918                            "     got: $uuid\n";
1919                }
1920                my $full_url = $self->full_url;
1921                $full_url =~ s#^\Q$svm->{replace}\E(/|$)#$svm->{source}$1# or
1922                             die "Failed to replace '$svm->{replace}' with ",
1923                                 "'$svm->{source}' in $full_url\n";
1924                # throw away username for storing in records
1925                remove_username($full_url);
1926                $log_entry{metadata} = "$full_url\@$r $uuid";
1927                $log_entry{svm_revision} = $r;
1928                $email ||= "$author\@$uuid";
1929                $commit_email ||= "$author\@$uuid";
1930        } elsif ($self->use_svnsync_props) {
1931                my $full_url = canonicalize_url(
1932                        add_path_to_url( $self->svnsync->{url}, $self->path )
1933                );
1934                remove_username($full_url);
1935                my $uuid = $self->svnsync->{uuid};
1936                $log_entry{metadata} = "$full_url\@$rev $uuid";
1937                $email ||= "$author\@$uuid";
1938                $commit_email ||= "$author\@$uuid";
1939        } else {
1940                my $url = $self->metadata_url;
1941                remove_username($url);
1942                my $uuid = $self->rewrite_uuid || $self->ra->get_uuid;
1943                $log_entry{metadata} = "$url\@$rev " . $uuid;
1944                $email ||= "$author\@" . $uuid;
1945                $commit_email ||= "$author\@" . $uuid;
1946        }
1947        $log_entry{name} = $name;
1948        $log_entry{email} = $email;
1949        $log_entry{commit_name} = $commit_name;
1950        $log_entry{commit_email} = $commit_email;
1951        \%log_entry;
1952}
1953
1954sub fetch {
1955        my ($self, $min_rev, $max_rev, @parents) = @_;
1956        my ($last_rev, $last_commit) = $self->last_rev_commit;
1957        my ($base, $head) = $self->get_fetch_range($min_rev, $max_rev);
1958        $self->ra->gs_fetch_loop_common($base, $head, [$self]);
1959}
1960
1961sub set_tree_cb {
1962        my ($self, $log_entry, $tree, $rev, $date, $author) = @_;
1963        $self->{inject_parents} = { $rev => $tree };
1964        $self->fetch(undef, undef);
1965}
1966
1967sub set_tree {
1968        my ($self, $tree) = (shift, shift);
1969        my $log_entry = ::get_commit_entry($tree);
1970        unless ($self->{last_rev}) {
1971                fatal("Must have an existing revision to commit");
1972        }
1973        my %ed_opts = ( r => $self->{last_rev},
1974                        log => $log_entry->{log},
1975                        ra => $self->ra,
1976                        tree_a => $self->{last_commit},
1977                        tree_b => $tree,
1978                        editor_cb => sub {
1979                               $self->set_tree_cb($log_entry, $tree, @_) },
1980                        svn_path => $self->path );
1981        if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) {
1982                print "No changes\nr$self->{last_rev} = $tree\n";
1983        }
1984}
1985
1986sub rebuild_from_rev_db {
1987        my ($self, $path) = @_;
1988        my $r = -1;
1989        open my $fh, '<', $path or croak "open: $!";
1990        binmode $fh or croak "binmode: $!";
1991        while (<$fh>) {
1992                length($_) == 41 or croak "inconsistent size in ($_) != 41";
1993                chomp($_);
1994                ++$r;
1995                next if $_ eq ('0' x 40);
1996                $self->rev_map_set($r, $_);
1997                print "r$r = $_\n";
1998        }
1999        close $fh or croak "close: $!";
2000        unlink $path or croak "unlink: $!";
2001}
2002
2003#define a global associate map to record rebuild status
2004my %rebuild_status;
2005#define a global associate map to record rebuild verify status
2006my %rebuild_verify_status;
2007
2008sub rebuild {
2009        my ($self) = @_;
2010        my $map_path = $self->map_path;
2011        my $partial = (-e $map_path && ! -z $map_path);
2012        my $verify_key = $self->refname.'^0';
2013        if (!$rebuild_verify_status{$verify_key}) {
2014                my $verify_result = ::verify_ref($verify_key);
2015                if ($verify_result) {
2016                        $rebuild_verify_status{$verify_key} = 1;
2017                }
2018        }
2019        if (!$rebuild_verify_status{$verify_key}) {
2020                return;
2021        }
2022        if (!$partial && ($self->use_svm_props || $self->no_metadata)) {
2023                my $rev_db = $self->rev_db_path;
2024                $self->rebuild_from_rev_db($rev_db);
2025                if ($self->use_svm_props) {
2026                        my $svm_rev_db = $self->rev_db_path($self->svm_uuid);
2027                        $self->rebuild_from_rev_db($svm_rev_db);
2028                }
2029                $self->unlink_rev_db_symlink;
2030                return;
2031        }
2032        print "Rebuilding $map_path ...\n" if (!$partial);
2033        my ($base_rev, $head) = ($partial ? $self->rev_map_max_norebuild(1) :
2034                (undef, undef));
2035        my $key_value = ($head ? "$head.." : "") . $self->refname;
2036        if (exists $rebuild_status{$key_value}) {
2037                print "Done rebuilding $map_path\n" if (!$partial || !$head);
2038                my $rev_db_path = $self->rev_db_path;
2039                if (-f $self->rev_db_path) {
2040                        unlink $self->rev_db_path or croak "unlink: $!";
2041                }
2042                $self->unlink_rev_db_symlink;
2043                return;
2044        }
2045        my ($log, $ctx) =
2046                command_output_pipe(qw/rev-list --pretty=raw --reverse/,
2047                                $key_value,
2048                                '--');
2049        $rebuild_status{$key_value} = 1;
2050        my $metadata_url = $self->metadata_url;
2051        remove_username($metadata_url);
2052        my $svn_uuid = $self->rewrite_uuid || $self->ra_uuid;
2053        my $c;
2054        while (<$log>) {
2055                if ( m{^commit ($::sha1)$} ) {
2056                        $c = $1;
2057                        next;
2058                }
2059                next unless s{^\s*(git-svn-id:)}{$1};
2060                my ($url, $rev, $uuid) = ::extract_metadata($_);
2061                remove_username($url);
2062
2063                # ignore merges (from set-tree)
2064                next if (!defined $rev || !$uuid);
2065
2066                # if we merged or otherwise started elsewhere, this is
2067                # how we break out of it
2068                if (($uuid ne $svn_uuid) ||
2069                    ($metadata_url && $url && ($url ne $metadata_url))) {
2070                        next;
2071                }
2072                if ($partial && $head) {
2073                        print "Partial-rebuilding $map_path ...\n";
2074                        print "Currently at $base_rev = $head\n";
2075                        $head = undef;
2076                }
2077
2078                $self->rev_map_set($rev, $c);
2079                print "r$rev = $c\n";
2080        }
2081        command_close_pipe($log, $ctx);
2082        print "Done rebuilding $map_path\n" if (!$partial || !$head);
2083        my $rev_db_path = $self->rev_db_path;
2084        if (-f $self->rev_db_path) {
2085                unlink $self->rev_db_path or croak "unlink: $!";
2086        }
2087        $self->unlink_rev_db_symlink;
2088}
2089
2090# rev_map:
2091# Tie::File seems to be prone to offset errors if revisions get sparse,
2092# it's not that fast, either.  Tie::File is also not in Perl 5.6.  So
2093# one of my favorite modules is out :<  Next up would be one of the DBM
2094# modules, but I'm not sure which is most portable...
2095#
2096# This is the replacement for the rev_db format, which was too big
2097# and inefficient for large repositories with a lot of sparse history
2098# (mainly tags)
2099#
2100# The format is this:
2101#   - 24 bytes for every record,
2102#     * 4 bytes for the integer representing an SVN revision number
2103#     * 20 bytes representing the sha1 of a git commit
2104#   - No empty padding records like the old format
2105#     (except the last record, which can be overwritten)
2106#   - new records are written append-only since SVN revision numbers
2107#     increase monotonically
2108#   - lookups on SVN revision number are done via a binary search
2109#   - Piping the file to xxd -c24 is a good way of dumping it for
2110#     viewing or editing (piped back through xxd -r), should the need
2111#     ever arise.
2112#   - The last record can be padding revision with an all-zero sha1
2113#     This is used to optimize fetch performance when using multiple
2114#     "fetch" directives in .git/config
2115#
2116# These files are disposable unless noMetadata or useSvmProps is set
2117
2118sub _rev_map_set {
2119        my ($fh, $rev, $commit) = @_;
2120
2121        binmode $fh or croak "binmode: $!";
2122        my $size = (stat($fh))[7];
2123        ($size % 24) == 0 or croak "inconsistent size: $size";
2124
2125        my $wr_offset = 0;
2126        if ($size > 0) {
2127                sysseek($fh, -24, SEEK_END) or croak "seek: $!";
2128                my $read = sysread($fh, my $buf, 24) or croak "read: $!";
2129                $read == 24 or croak "read only $read bytes (!= 24)";
2130                my ($last_rev, $last_commit) = unpack(rev_map_fmt, $buf);
2131                if ($last_commit eq ('0' x40)) {
2132                        if ($size >= 48) {
2133                                sysseek($fh, -48, SEEK_END) or croak "seek: $!";
2134                                $read = sysread($fh, $buf, 24) or
2135                                    croak "read: $!";
2136                                $read == 24 or
2137                                    croak "read only $read bytes (!= 24)";
2138                                ($last_rev, $last_commit) =
2139                                    unpack(rev_map_fmt, $buf);
2140                                if ($last_commit eq ('0' x40)) {
2141                                        croak "inconsistent .rev_map\n";
2142                                }
2143                        }
2144                        if ($last_rev >= $rev) {
2145                                croak "last_rev is higher!: $last_rev >= $rev";
2146                        }
2147                        $wr_offset = -24;
2148                }
2149        }
2150        sysseek($fh, $wr_offset, SEEK_END) or croak "seek: $!";
2151        syswrite($fh, pack(rev_map_fmt, $rev, $commit), 24) == 24 or
2152          croak "write: $!";
2153}
2154
2155sub _rev_map_reset {
2156        my ($fh, $rev, $commit) = @_;
2157        my $c = _rev_map_get($fh, $rev);
2158        $c eq $commit or die "_rev_map_reset(@_) commit $c does not match!\n";
2159        my $offset = sysseek($fh, 0, SEEK_CUR) or croak "seek: $!";
2160        truncate $fh, $offset or croak "truncate: $!";
2161}
2162
2163sub mkfile {
2164        my ($path) = @_;
2165        unless (-e $path) {
2166                my ($dir, $base) = ($path =~ m#^(.*?)/?([^/]+)$#);
2167                mkpath([$dir]) unless -d $dir;
2168                open my $fh, '>>', $path or die "Couldn't create $path: $!\n";
2169                close $fh or die "Couldn't close (create) $path: $!\n";
2170        }
2171}
2172
2173sub rev_map_set {
2174        my ($self, $rev, $commit, $update_ref, $uuid) = @_;
2175        defined $commit or die "missing arg3\n";
2176        length $commit == 40 or die "arg3 must be a full SHA1 hexsum\n";
2177        my $db = $self->map_path($uuid);
2178        my $db_lock = "$db.lock";
2179        my $sigmask;
2180        $update_ref ||= 0;
2181        if ($update_ref) {
2182                $sigmask = POSIX::SigSet->new();
2183                my $signew = POSIX::SigSet->new(SIGINT, SIGHUP, SIGTERM,
2184                        SIGALRM, SIGUSR1, SIGUSR2);
2185                sigprocmask(SIG_BLOCK, $signew, $sigmask) or
2186                        croak "Can't block signals: $!";
2187        }
2188        mkfile($db);
2189
2190        $LOCKFILES{$db_lock} = 1;
2191        my $sync;
2192        # both of these options make our .rev_db file very, very important
2193        # and we can't afford to lose it because rebuild() won't work
2194        if ($self->use_svm_props || $self->no_metadata) {
2195                require File::Copy;
2196                $sync = 1;
2197                File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ",
2198                                           "Failed to copy: ",
2199                                           "$db => $db_lock ($!)\n";
2200        } else {
2201                rename $db, $db_lock or die "rev_map_set(@_): ",
2202                                            "Failed to rename: ",
2203                                            "$db => $db_lock ($!)\n";
2204        }
2205
2206        sysopen(my $fh, $db_lock, O_RDWR | O_CREAT)
2207             or croak "Couldn't open $db_lock: $!\n";
2208        if ($update_ref eq 'reset') {
2209                clear_memoized_mergeinfo_caches();
2210                _rev_map_reset($fh, $rev, $commit);
2211        } else {
2212                _rev_map_set($fh, $rev, $commit);
2213        }
2214
2215        if ($sync) {
2216                $fh->flush or die "Couldn't flush $db_lock: $!\n";
2217                $fh->sync or die "Couldn't sync $db_lock: $!\n";
2218        }
2219        close $fh or croak $!;
2220        if ($update_ref) {
2221                $_head = $self;
2222                my $note = "";
2223                $note = " ($update_ref)" if ($update_ref !~ /^\d*$/);
2224                command_noisy('update-ref', '-m', "r$rev$note",
2225                              $self->refname, $commit);
2226        }
2227        rename $db_lock, $db or die "rev_map_set(@_): ", "Failed to rename: ",
2228                                    "$db_lock => $db ($!)\n";
2229        delete $LOCKFILES{$db_lock};
2230        if ($update_ref) {
2231                sigprocmask(SIG_SETMASK, $sigmask) or
2232                        croak "Can't restore signal mask: $!";
2233        }
2234}
2235
2236# If want_commit, this will return an array of (rev, commit) where
2237# commit _must_ be a valid commit in the archive.
2238# Otherwise, it'll return the max revision (whether or not the
2239# commit is valid or just a 0x40 placeholder).
2240sub rev_map_max {
2241        my ($self, $want_commit) = @_;
2242        $self->rebuild;
2243        my ($r, $c) = $self->rev_map_max_norebuild($want_commit);
2244        $want_commit ? ($r, $c) : $r;
2245}
2246
2247sub rev_map_max_norebuild {
2248        my ($self, $want_commit) = @_;
2249        my $map_path = $self->map_path;
2250        stat $map_path or return $want_commit ? (0, undef) : 0;
2251        sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2252        binmode $fh or croak "binmode: $!";
2253        my $size = (stat($fh))[7];
2254        ($size % 24) == 0 or croak "inconsistent size: $size";
2255
2256        if ($size == 0) {
2257                close $fh or croak "close: $!";
2258                return $want_commit ? (0, undef) : 0;
2259        }
2260
2261        sysseek($fh, -24, SEEK_END) or croak "seek: $!";
2262        sysread($fh, my $buf, 24) == 24 or croak "read: $!";
2263        my ($r, $c) = unpack(rev_map_fmt, $buf);
2264        if ($want_commit && $c eq ('0' x40)) {
2265                if ($size < 48) {
2266                        return $want_commit ? (0, undef) : 0;
2267                }
2268                sysseek($fh, -48, SEEK_END) or croak "seek: $!";
2269                sysread($fh, $buf, 24) == 24 or croak "read: $!";
2270                ($r, $c) = unpack(rev_map_fmt, $buf);
2271                if ($c eq ('0'x40)) {
2272                        croak "Penultimate record is all-zeroes in $map_path";
2273                }
2274        }
2275        close $fh or croak "close: $!";
2276        $want_commit ? ($r, $c) : $r;
2277}
2278
2279sub rev_map_get {
2280        my ($self, $rev, $uuid) = @_;
2281        my $map_path = $self->map_path($uuid);
2282        return undef unless -e $map_path;
2283
2284        sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
2285        my $c = _rev_map_get($fh, $rev);
2286        close($fh) or croak "close: $!";
2287        $c
2288}
2289
2290sub _rev_map_get {
2291        my ($fh, $rev) = @_;
2292
2293        binmode $fh or croak "binmode: $!";
2294        my $size = (stat($fh))[7];
2295        ($size % 24) == 0 or croak "inconsistent size: $size";
2296
2297        if ($size == 0) {
2298                return undef;
2299        }
2300
2301        my ($l, $u) = (0, $size - 24);
2302        my ($r, $c, $buf);
2303
2304        while ($l <= $u) {
2305                my $i = int(($l/24 + $u/24) / 2) * 24;
2306                sysseek($fh, $i, SEEK_SET) or croak "seek: $!";
2307                sysread($fh, my $buf, 24) == 24 or croak "read: $!";
2308                my ($r, $c) = unpack(rev_map_fmt, $buf);
2309
2310                if ($r < $rev) {
2311                        $l = $i + 24;
2312                } elsif ($r > $rev) {
2313                        $u = $i - 24;
2314                } else { # $r == $rev
2315                        return $c eq ('0' x 40) ? undef : $c;
2316                }
2317        }
2318        undef;
2319}
2320
2321# Finds the first svn revision that exists on (if $eq_ok is true) or
2322# before $rev for the current branch.  It will not search any lower
2323# than $min_rev.  Returns the git commit hash and svn revision number
2324# if found, else (undef, undef).
2325sub find_rev_before {
2326        my ($self, $rev, $eq_ok, $min_rev) = @_;
2327        --$rev unless $eq_ok;
2328        $min_rev ||= 1;
2329        my $max_rev = $self->rev_map_max;
2330        $rev = $max_rev if ($rev > $max_rev);
2331        while ($rev >= $min_rev) {
2332                if (my $c = $self->rev_map_get($rev)) {
2333                        return ($rev, $c);
2334                }
2335                --$rev;
2336        }
2337        return (undef, undef);
2338}
2339
2340# Finds the first svn revision that exists on (if $eq_ok is true) or
2341# after $rev for the current branch.  It will not search any higher
2342# than $max_rev.  Returns the git commit hash and svn revision number
2343# if found, else (undef, undef).
2344sub find_rev_after {
2345        my ($self, $rev, $eq_ok, $max_rev) = @_;
2346        ++$rev unless $eq_ok;
2347        $max_rev ||= $self->rev_map_max;
2348        while ($rev <= $max_rev) {
2349                if (my $c = $self->rev_map_get($rev)) {
2350                        return ($rev, $c);
2351                }
2352                ++$rev;
2353        }
2354        return (undef, undef);
2355}
2356
2357sub _new {
2358        my ($class, $repo_id, $ref_id, $path) = @_;
2359        unless (defined $repo_id && length $repo_id) {
2360                $repo_id = $default_repo_id;
2361        }
2362        unless (defined $ref_id && length $ref_id) {
2363                # Access the prefix option from the git-svn main program if it's loaded.
2364                my $prefix = defined &::opt_prefix ? ::opt_prefix() : "";
2365                $_[2] = $ref_id =
2366                             "refs/remotes/$prefix$default_ref_id";
2367        }
2368        $_[1] = $repo_id;
2369        my $dir = "$ENV{GIT_DIR}/svn/$ref_id";
2370
2371        # Older repos imported by us used $GIT_DIR/svn/foo instead of
2372        # $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo
2373        if ($ref_id =~ m{^refs/remotes/(.+)}) {
2374                my $old_dir = "$ENV{GIT_DIR}/svn/$1";
2375                if (-d $old_dir && ! -d $dir) {
2376                        $dir = $old_dir;
2377                }
2378        }
2379
2380        $_[3] = $path = '' unless (defined $path);
2381        mkpath([$dir]);
2382        my $obj = bless {
2383                ref_id => $ref_id, dir => $dir, index => "$dir/index",
2384                config => "$ENV{GIT_DIR}/svn/config",
2385                map_root => "$dir/.rev_map", repo_id => $repo_id }, $class;
2386
2387        # Ensure it gets canonicalized
2388        $obj->path($path);
2389
2390        return $obj;
2391}
2392
2393sub path {
2394        my $self = shift;
2395
2396        if (@_) {
2397                my $path = shift;
2398                $self->{_path} = canonicalize_path($path);
2399                return;
2400        }
2401
2402        return $self->{_path};
2403}
2404
2405sub url {
2406        my $self = shift;
2407
2408        if (@_) {
2409                my $url = shift;
2410                $self->{url} = canonicalize_url($url);
2411                return;
2412        }
2413
2414        return $self->{url};
2415}
2416
2417# for read-only access of old .rev_db formats
2418sub unlink_rev_db_symlink {
2419        my ($self) = @_;
2420        my $link = $self->rev_db_path;
2421        $link =~ s/\.[\w-]+$// or croak "missing UUID at the end of $link";
2422        if (-l $link) {
2423                unlink $link or croak "unlink: $link failed!";
2424        }
2425}
2426
2427sub rev_db_path {
2428        my ($self, $uuid) = @_;
2429        my $db_path = $self->map_path($uuid);
2430        $db_path =~ s{/\.rev_map\.}{/\.rev_db\.}
2431            or croak "map_path: $db_path does not contain '/.rev_map.' !";
2432        $db_path;
2433}
2434
2435# the new replacement for .rev_db
2436sub map_path {
2437        my ($self, $uuid) = @_;
2438        $uuid ||= $self->ra_uuid;
2439        "$self->{map_root}.$uuid";
2440}
2441
2442sub uri_encode {
2443        my ($f) = @_;
2444        $f =~ s#([^a-zA-Z0-9\*!\:_\./\-])#sprintf("%%%02X",ord($1))#eg;
2445        $f
2446}
2447
2448sub uri_decode {
2449        my ($f) = @_;
2450        $f =~ s#%([0-9a-fA-F]{2})#chr(hex($1))#eg;
2451        $f
2452}
2453
2454sub remove_username {
2455        $_[0] =~ s{^([^:]*://)[^@]+@}{$1};
2456}
2457
24581;