041791b3ce729dd0d0cd10fce7302d77f9278ecc
   1#!/usr/bin/env perl
   2# Copyright (C) 2006, Eric Wong <normalperson@yhbt.net>
   3# License: GPL v2 or later
   4use warnings;
   5use strict;
   6use vars qw/    $AUTHOR $VERSION
   7                $SVN_URL $SVN_INFO $SVN_WC $SVN_UUID
   8                $GIT_SVN_INDEX $GIT_SVN
   9                $GIT_DIR $REV_DIR/;
  10$AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
  11$VERSION = '0.10.0';
  12$GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git";
  13$GIT_SVN = $ENV{GIT_SVN_ID} || 'git-svn';
  14$GIT_SVN_INDEX = "$GIT_DIR/$GIT_SVN/index";
  15$ENV{GIT_DIR} ||= $GIT_DIR;
  16$SVN_URL = undef;
  17$REV_DIR = "$GIT_DIR/$GIT_SVN/revs";
  18$SVN_WC = "$GIT_DIR/$GIT_SVN/tree";
  19
  20# make sure the svn binary gives consistent output between locales and TZs:
  21$ENV{TZ} = 'UTC';
  22$ENV{LC_ALL} = 'C';
  23
  24# If SVN:: library support is added, please make the dependencies
  25# optional and preserve the capability to use the command-line client.
  26# use eval { require SVN::... } to make it lazy load
  27# We don't use any modules not in the standard Perl distribution:
  28use Carp qw/croak/;
  29use IO::File qw//;
  30use File::Basename qw/dirname basename/;
  31use File::Path qw/mkpath/;
  32use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
  33use File::Spec qw//;
  34use POSIX qw/strftime/;
  35my $sha1 = qr/[a-f\d]{40}/;
  36my $sha1_short = qr/[a-f\d]{4,40}/;
  37my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
  38        $_find_copies_harder, $_l, $_version, $_upgrade, $_authors);
  39my (@_branch_from, %tree_map, %users);
  40
  41my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext,
  42                'branch|b=s' => \@_branch_from,
  43                'authors-file|A=s' => \$_authors );
  44my %cmd = (
  45        fetch => [ \&fetch, "Download new revisions from SVN",
  46                        { 'revision|r=s' => \$_revision, %fc_opts } ],
  47        init => [ \&init, "Initialize and fetch (import)", { } ],
  48        commit => [ \&commit, "Commit git revisions to SVN",
  49                        {       'stdin|' => \$_stdin,
  50                                'edit|e' => \$_edit,
  51                                'rmdir' => \$_rmdir,
  52                                'find-copies-harder' => \$_find_copies_harder,
  53                                'l=i' => \$_l,
  54                                %fc_opts,
  55                        } ],
  56        'show-ignore' => [ \&show_ignore, "Show svn:ignore listings", { } ],
  57        rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
  58                        { 'no-ignore-externals' => \$_no_ignore_ext,
  59                          'upgrade' => \$_upgrade } ],
  60);
  61my $cmd;
  62for (my $i = 0; $i < @ARGV; $i++) {
  63        if (defined $cmd{$ARGV[$i]}) {
  64                $cmd = $ARGV[$i];
  65                splice @ARGV, $i, 1;
  66                last;
  67        }
  68};
  69
  70# we may be called as git-svn-(command), or git-svn(command).
  71foreach (keys %cmd) {
  72        if (/git\-svn\-?($_)(?:\.\w+)?$/) {
  73                $cmd = $1;
  74                last;
  75        }
  76}
  77
  78my %opts;
  79%opts = %{$cmd{$cmd}->[2]} if (defined $cmd);
  80
  81GetOptions(%opts, 'help|H|h' => \$_help, 'version|V' => \$_version ) or exit 1;
  82usage(0) if $_help;
  83version() if $_version;
  84usage(1) unless defined $cmd;
  85load_authors() if $_authors;
  86svn_check_ignore_externals();
  87$cmd{$cmd}->[0]->(@ARGV);
  88exit 0;
  89
  90####################### primary functions ######################
  91sub usage {
  92        my $exit = shift || 0;
  93        my $fd = $exit ? \*STDERR : \*STDOUT;
  94        print $fd <<"";
  95git-svn - bidirectional operations between a single Subversion tree and git
  96Usage: $0 <command> [options] [arguments]\n
  97Available commands:
  98
  99        foreach (sort keys %cmd) {
 100                print $fd '  ',pack('A13',$_),$cmd{$_}->[1],"\n";
 101        }
 102        print $fd <<"";
 103\nGIT_SVN_ID may be set in the environment to an arbitrary identifier if
 104you're tracking multiple SVN branches/repositories in one git repository
 105and want to keep them separate.  See git-svn(1) for more information.
 106
 107        exit $exit;
 108}
 109
 110sub version {
 111        print "git-svn version $VERSION\n";
 112        exit 0;
 113}
 114
 115sub rebuild {
 116        $SVN_URL = shift or undef;
 117        my $newest_rev = 0;
 118        if ($_upgrade) {
 119                sys('git-update-ref',"refs/remotes/$GIT_SVN","$GIT_SVN-HEAD");
 120        } else {
 121                check_upgrade_needed();
 122        }
 123
 124        my $pid = open(my $rev_list,'-|');
 125        defined $pid or croak $!;
 126        if ($pid == 0) {
 127                exec("git-rev-list","refs/remotes/$GIT_SVN") or croak $!;
 128        }
 129        my $latest;
 130        while (<$rev_list>) {
 131                chomp;
 132                my $c = $_;
 133                croak "Non-SHA1: $c\n" unless $c =~ /^$sha1$/o;
 134                my @commit = grep(/^git-svn-id: /,`git-cat-file commit $c`);
 135                next if (!@commit); # skip merges
 136                my $id = $commit[$#commit];
 137                my ($url, $rev, $uuid) = ($id =~ /^git-svn-id:\s(\S+?)\@(\d+)
 138                                                \s([a-f\d\-]+)$/x);
 139                if (!$rev || !$uuid || !$url) {
 140                        # some of the original repositories I made had
 141                        # indentifiers like this:
 142                        ($rev, $uuid) = ($id =~/^git-svn-id:\s(\d+)
 143                                                        \@([a-f\d\-]+)/x);
 144                        if (!$rev || !$uuid) {
 145                                croak "Unable to extract revision or UUID from ",
 146                                        "$c, $id\n";
 147                        }
 148                }
 149
 150                # if we merged or otherwise started elsewhere, this is
 151                # how we break out of it
 152                next if (defined $SVN_UUID && ($uuid ne $SVN_UUID));
 153                next if (defined $SVN_URL && ($url ne $SVN_URL));
 154
 155                print "r$rev = $c\n";
 156                unless (defined $latest) {
 157                        if (!$SVN_URL && !$url) {
 158                                croak "SVN repository location required: $url\n";
 159                        }
 160                        $SVN_URL ||= $url;
 161                        $SVN_UUID ||= setup_git_svn();
 162                        $latest = $rev;
 163                }
 164                assert_revision_eq_or_unknown($rev, $c);
 165                sys('git-update-ref',"$GIT_SVN/revs/$rev",$c);
 166                $newest_rev = $rev if ($rev > $newest_rev);
 167        }
 168        close $rev_list or croak $?;
 169        if (!chdir $SVN_WC) {
 170                my @svn_co = ('svn','co',"-r$latest");
 171                push @svn_co, '--ignore-externals' unless $_no_ignore_ext;
 172                sys(@svn_co, $SVN_URL, $SVN_WC);
 173                chdir $SVN_WC or croak $!;
 174        }
 175
 176        $pid = fork;
 177        defined $pid or croak $!;
 178        if ($pid == 0) {
 179                my @svn_up = qw(svn up);
 180                push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 181                sys(@svn_up,"-r$newest_rev");
 182                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 183                git_addremove();
 184                exec('git-write-tree');
 185        }
 186        waitpid $pid, 0;
 187
 188        if ($_upgrade) {
 189                print STDERR <<"";
 190Keeping deprecated refs/head/$GIT_SVN-HEAD for now.  Please remove it
 191when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
 192
 193        }
 194}
 195
 196sub init {
 197        $SVN_URL = shift or croak "SVN repository location required\n";
 198        unless (-d $GIT_DIR) {
 199                sys('git-init-db');
 200        }
 201        setup_git_svn();
 202}
 203
 204sub fetch {
 205        my (@parents) = @_;
 206        check_upgrade_needed();
 207        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 208        my @log_args = -d $SVN_WC ? ($SVN_WC) : ($SVN_URL);
 209        unless ($_revision) {
 210                $_revision = -d $SVN_WC ? 'BASE:HEAD' : '0:HEAD';
 211        }
 212        push @log_args, "-r$_revision";
 213        push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 214
 215        my $svn_log = svn_log_raw(@log_args);
 216        @$svn_log = sort { $a->{revision} <=> $b->{revision} } @$svn_log;
 217
 218        my $base = shift @$svn_log or croak "No base revision!\n";
 219        my $last_commit = undef;
 220        unless (-d $SVN_WC) {
 221                my @svn_co = ('svn','co',"-r$base->{revision}");
 222                push @svn_co,'--ignore-externals' unless $_no_ignore_ext;
 223                sys(@svn_co, $SVN_URL, $SVN_WC);
 224                chdir $SVN_WC or croak $!;
 225                $last_commit = git_commit($base, @parents);
 226                assert_svn_wc_clean($base->{revision}, $last_commit);
 227        } else {
 228                chdir $SVN_WC or croak $!;
 229                $last_commit = file_to_s("$REV_DIR/$base->{revision}");
 230        }
 231        my @svn_up = qw(svn up);
 232        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 233        my $last_rev = $base->{revision};
 234        foreach my $log_msg (@$svn_log) {
 235                assert_svn_wc_clean($last_rev, $last_commit);
 236                $last_rev = $log_msg->{revision};
 237                sys(@svn_up,"-r$last_rev");
 238                $last_commit = git_commit($log_msg, $last_commit, @parents);
 239        }
 240        assert_svn_wc_clean($last_rev, $last_commit);
 241        unless (-e "$GIT_DIR/refs/heads/master") {
 242                sys(qw(git-update-ref refs/heads/master),$last_commit);
 243        }
 244        return pop @$svn_log;
 245}
 246
 247sub commit {
 248        my (@commits) = @_;
 249        check_upgrade_needed();
 250        if ($_stdin || !@commits) {
 251                print "Reading from stdin...\n";
 252                @commits = ();
 253                while (<STDIN>) {
 254                        if (/\b($sha1_short)\b/o) {
 255                                unshift @commits, $1;
 256                        }
 257                }
 258        }
 259        my @revs;
 260        foreach my $c (@commits) {
 261                chomp(my @tmp = safe_qx('git-rev-parse',$c));
 262                if (scalar @tmp == 1) {
 263                        push @revs, $tmp[0];
 264                } elsif (scalar @tmp > 1) {
 265                        push @revs, reverse (safe_qx('git-rev-list',@tmp));
 266                } else {
 267                        die "Failed to rev-parse $c\n";
 268                }
 269        }
 270        chomp @revs;
 271
 272        fetch();
 273        chdir $SVN_WC or croak $!;
 274        my $svn_current_rev =  svn_info('.')->{'Last Changed Rev'};
 275        foreach my $c (@revs) {
 276                my $mods = svn_checkout_tree($svn_current_rev, $c);
 277                if (scalar @$mods == 0) {
 278                        print "Skipping, no changes detected\n";
 279                        next;
 280                }
 281                $svn_current_rev = svn_commit_tree($svn_current_rev, $c);
 282        }
 283        print "Done committing ",scalar @revs," revisions to SVN\n";
 284
 285}
 286
 287sub show_ignore {
 288        require File::Find or die $!;
 289        my $exclude_file = "$GIT_DIR/info/exclude";
 290        open my $fh, '<', $exclude_file or croak $!;
 291        chomp(my @excludes = (<$fh>));
 292        close $fh or croak $!;
 293
 294        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 295        chdir $SVN_WC or croak $!;
 296        my %ign;
 297        File::Find::find({wanted=>sub{if(lstat $_ && -d _ && -d "$_/.svn"){
 298                s#^\./##;
 299                @{$ign{$_}} = safe_qx(qw(svn propget svn:ignore),$_);
 300                }}, no_chdir=>1},'.');
 301
 302        print "\n# /\n";
 303        foreach (@{$ign{'.'}}) { print '/',$_ if /\S/ }
 304        delete $ign{'.'};
 305        foreach my $i (sort keys %ign) {
 306                print "\n# ",$i,"\n";
 307                foreach (@{$ign{$i}}) { print '/',$i,'/',$_ if /\S/ }
 308        }
 309}
 310
 311########################### utility functions #########################
 312
 313sub setup_git_svn {
 314        defined $SVN_URL or croak "SVN repository location required\n";
 315        unless (-d $GIT_DIR) {
 316                croak "GIT_DIR=$GIT_DIR does not exist!\n";
 317        }
 318        mkpath(["$GIT_DIR/$GIT_SVN"]);
 319        mkpath(["$GIT_DIR/$GIT_SVN/info"]);
 320        mkpath([$REV_DIR]);
 321        s_to_file($SVN_URL,"$GIT_DIR/$GIT_SVN/info/url");
 322        $SVN_UUID = svn_info($SVN_URL)->{'Repository UUID'} or
 323                                        croak "Repository UUID unreadable\n";
 324        s_to_file($SVN_UUID,"$GIT_DIR/$GIT_SVN/info/uuid");
 325
 326        open my $fd, '>>', "$GIT_DIR/$GIT_SVN/info/exclude" or croak $!;
 327        print $fd '.svn',"\n";
 328        close $fd or croak $!;
 329        return $SVN_UUID;
 330}
 331
 332sub assert_svn_wc_clean {
 333        my ($svn_rev, $treeish) = @_;
 334        croak "$svn_rev is not an integer!\n" unless ($svn_rev =~ /^\d+$/);
 335        croak "$treeish is not a sha1!\n" unless ($treeish =~ /^$sha1$/o);
 336        my $svn_info = svn_info('.');
 337        if ($svn_rev != $svn_info->{'Last Changed Rev'}) {
 338                croak "Expected r$svn_rev, got r",
 339                                $svn_info->{'Last Changed Rev'},"\n";
 340        }
 341        my @status = grep(!/^Performing status on external/,(`svn status`));
 342        @status = grep(!/^\s*$/,@status);
 343        if (scalar @status) {
 344                print STDERR "Tree ($SVN_WC) is not clean:\n";
 345                print STDERR $_ foreach @status;
 346                croak;
 347        }
 348        assert_tree($treeish);
 349}
 350
 351sub assert_tree {
 352        my ($treeish) = @_;
 353        croak "Not a sha1: $treeish\n" unless $treeish =~ /^$sha1$/o;
 354        chomp(my $type = `git-cat-file -t $treeish`);
 355        my $expected;
 356        while ($type eq 'tag') {
 357                chomp(($treeish, $type) = `git-cat-file tag $treeish`);
 358        }
 359        if ($type eq 'commit') {
 360                $expected = (grep /^tree /,`git-cat-file commit $treeish`)[0];
 361                ($expected) = ($expected =~ /^tree ($sha1)$/);
 362                die "Unable to get tree from $treeish\n" unless $expected;
 363        } elsif ($type eq 'tree') {
 364                $expected = $treeish;
 365        } else {
 366                die "$treeish is a $type, expected tree, tag or commit\n";
 367        }
 368
 369        my $old_index = $ENV{GIT_INDEX_FILE};
 370        my $tmpindex = $GIT_SVN_INDEX.'.assert-tmp';
 371        if (-e $tmpindex) {
 372                unlink $tmpindex or croak $!;
 373        }
 374        $ENV{GIT_INDEX_FILE} = $tmpindex;
 375        git_addremove();
 376        chomp(my $tree = `git-write-tree`);
 377        if ($old_index) {
 378                $ENV{GIT_INDEX_FILE} = $old_index;
 379        } else {
 380                delete $ENV{GIT_INDEX_FILE};
 381        }
 382        if ($tree ne $expected) {
 383                croak "Tree mismatch, Got: $tree, Expected: $expected\n";
 384        }
 385}
 386
 387sub parse_diff_tree {
 388        my $diff_fh = shift;
 389        local $/ = "\0";
 390        my $state = 'meta';
 391        my @mods;
 392        while (<$diff_fh>) {
 393                chomp $_; # this gets rid of the trailing "\0"
 394                if ($state eq 'meta' && /^:(\d{6})\s(\d{6})\s
 395                                        $sha1\s($sha1)\s([MTCRAD])\d*$/xo) {
 396                        push @mods, {   mode_a => $1, mode_b => $2,
 397                                        sha1_b => $3, chg => $4 };
 398                        if ($4 =~ /^(?:C|R)$/) {
 399                                $state = 'file_a';
 400                        } else {
 401                                $state = 'file_b';
 402                        }
 403                } elsif ($state eq 'file_a') {
 404                        my $x = $mods[$#mods] or croak "Empty array\n";
 405                        if ($x->{chg} !~ /^(?:C|R)$/) {
 406                                croak "Error parsing $_, $x->{chg}\n";
 407                        }
 408                        $x->{file_a} = $_;
 409                        $state = 'file_b';
 410                } elsif ($state eq 'file_b') {
 411                        my $x = $mods[$#mods] or croak "Empty array\n";
 412                        if (exists $x->{file_a} && $x->{chg} !~ /^(?:C|R)$/) {
 413                                croak "Error parsing $_, $x->{chg}\n";
 414                        }
 415                        if (!exists $x->{file_a} && $x->{chg} =~ /^(?:C|R)$/) {
 416                                croak "Error parsing $_, $x->{chg}\n";
 417                        }
 418                        $x->{file_b} = $_;
 419                        $state = 'meta';
 420                } else {
 421                        croak "Error parsing $_\n";
 422                }
 423        }
 424        close $diff_fh or croak $!;
 425
 426        return \@mods;
 427}
 428
 429sub svn_check_prop_executable {
 430        my $m = shift;
 431        return if -l $m->{file_b};
 432        if ($m->{mode_b} =~ /755$/) {
 433                chmod((0755 &~ umask),$m->{file_b}) or croak $!;
 434                if ($m->{mode_a} !~ /755$/) {
 435                        sys(qw(svn propset svn:executable 1), $m->{file_b});
 436                }
 437                -x $m->{file_b} or croak "$m->{file_b} is not executable!\n";
 438        } elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 439                sys(qw(svn propdel svn:executable), $m->{file_b});
 440                chmod((0644 &~ umask),$m->{file_b}) or croak $!;
 441                -x $m->{file_b} and croak "$m->{file_b} is executable!\n";
 442        }
 443}
 444
 445sub svn_ensure_parent_path {
 446        my $dir_b = dirname(shift);
 447        svn_ensure_parent_path($dir_b) if ($dir_b ne File::Spec->curdir);
 448        mkpath([$dir_b]) unless (-d $dir_b);
 449        sys(qw(svn add -N), $dir_b) unless (-d "$dir_b/.svn");
 450}
 451
 452sub precommit_check {
 453        my $mods = shift;
 454        my (%rm_file, %rmdir_check, %added_check);
 455
 456        my %o = ( D => 0, R => 1, C => 2, A => 3, M => 3, T => 3 );
 457        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 458                if ($m->{chg} eq 'R') {
 459                        if (-d $m->{file_b}) {
 460                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 461                        }
 462                        # dir/$file => dir/file/$file
 463                        my $dirname = dirname($m->{file_b});
 464                        while ($dirname ne File::Spec->curdir) {
 465                                if ($dirname ne $m->{file_a}) {
 466                                        $dirname = dirname($dirname);
 467                                        next;
 468                                }
 469                                err_file_to_dir("$m->{file_a} => $m->{file_b}");
 470                        }
 471                        # baz/zzz => baz (baz is a file)
 472                        $dirname = dirname($m->{file_a});
 473                        while ($dirname ne File::Spec->curdir) {
 474                                if ($dirname ne $m->{file_b}) {
 475                                        $dirname = dirname($dirname);
 476                                        next;
 477                                }
 478                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 479                        }
 480                }
 481                if ($m->{chg} =~ /^(D|R)$/) {
 482                        my $t = $1 eq 'D' ? 'file_b' : 'file_a';
 483                        $rm_file{ $m->{$t} } = 1;
 484                        my $dirname = dirname( $m->{$t} );
 485                        my $basename = basename( $m->{$t} );
 486                        $rmdir_check{$dirname}->{$basename} = 1;
 487                } elsif ($m->{chg} =~ /^(?:A|C)$/) {
 488                        if (-d $m->{file_b}) {
 489                                err_dir_to_file($m->{file_b});
 490                        }
 491                        my $dirname = dirname( $m->{file_b} );
 492                        my $basename = basename( $m->{file_b} );
 493                        $added_check{$dirname}->{$basename} = 1;
 494                        while ($dirname ne File::Spec->curdir) {
 495                                if ($rm_file{$dirname}) {
 496                                        err_file_to_dir($m->{file_b});
 497                                }
 498                                $dirname = dirname $dirname;
 499                        }
 500                }
 501        }
 502        return (\%rmdir_check, \%added_check);
 503
 504        sub err_dir_to_file {
 505                my $file = shift;
 506                print STDERR "Node change from directory to file ",
 507                                "is not supported by Subversion: ",$file,"\n";
 508                exit 1;
 509        }
 510        sub err_file_to_dir {
 511                my $file = shift;
 512                print STDERR "Node change from file to directory ",
 513                                "is not supported by Subversion: ",$file,"\n";
 514                exit 1;
 515        }
 516}
 517
 518sub svn_checkout_tree {
 519        my ($svn_rev, $treeish) = @_;
 520        my $from = file_to_s("$REV_DIR/$svn_rev");
 521        assert_svn_wc_clean($svn_rev,$from);
 522        print "diff-tree $from $treeish\n";
 523        my $pid = open my $diff_fh, '-|';
 524        defined $pid or croak $!;
 525        if ($pid == 0) {
 526                my @diff_tree = qw(git-diff-tree -z -r -C);
 527                push @diff_tree, '--find-copies-harder' if $_find_copies_harder;
 528                push @diff_tree, "-l$_l" if defined $_l;
 529                exec(@diff_tree, $from, $treeish) or croak $!;
 530        }
 531        my $mods = parse_diff_tree($diff_fh);
 532        unless (@$mods) {
 533                # git can do empty commits, but SVN doesn't allow it...
 534                return $mods;
 535        }
 536        my ($rm, $add) = precommit_check($mods);
 537
 538        my %o = ( D => 1, R => 0, C => -1, A => 3, M => 3, T => 3 );
 539        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 540                if ($m->{chg} eq 'C') {
 541                        svn_ensure_parent_path( $m->{file_b} );
 542                        sys(qw(svn cp),         $m->{file_a}, $m->{file_b});
 543                        apply_mod_line_blob($m);
 544                        svn_check_prop_executable($m);
 545                } elsif ($m->{chg} eq 'D') {
 546                        sys(qw(svn rm --force), $m->{file_b});
 547                } elsif ($m->{chg} eq 'R') {
 548                        svn_ensure_parent_path( $m->{file_b} );
 549                        sys(qw(svn mv --force), $m->{file_a}, $m->{file_b});
 550                        apply_mod_line_blob($m);
 551                        svn_check_prop_executable($m);
 552                } elsif ($m->{chg} eq 'M') {
 553                        apply_mod_line_blob($m);
 554                        svn_check_prop_executable($m);
 555                } elsif ($m->{chg} eq 'T') {
 556                        sys(qw(svn rm --force),$m->{file_b});
 557                        apply_mod_line_blob($m);
 558                        sys(qw(svn add --force), $m->{file_b});
 559                        svn_check_prop_executable($m);
 560                } elsif ($m->{chg} eq 'A') {
 561                        svn_ensure_parent_path( $m->{file_b} );
 562                        apply_mod_line_blob($m);
 563                        sys(qw(svn add --force), $m->{file_b});
 564                        svn_check_prop_executable($m);
 565                } else {
 566                        croak "Invalid chg: $m->{chg}\n";
 567                }
 568        }
 569
 570        assert_tree($treeish);
 571        if ($_rmdir) { # remove empty directories
 572                handle_rmdir($rm, $add);
 573        }
 574        assert_tree($treeish);
 575        return $mods;
 576}
 577
 578# svn ls doesn't work with respect to the current working tree, but what's
 579# in the repository.  There's not even an option for it... *sigh*
 580# (added files don't show up and removed files remain in the ls listing)
 581sub svn_ls_current {
 582        my ($dir, $rm, $add) = @_;
 583        chomp(my @ls = safe_qx('svn','ls',$dir));
 584        my @ret = ();
 585        foreach (@ls) {
 586                s#/$##; # trailing slashes are evil
 587                push @ret, $_ unless $rm->{$dir}->{$_};
 588        }
 589        if (exists $add->{$dir}) {
 590                push @ret, keys %{$add->{$dir}};
 591        }
 592        return \@ret;
 593}
 594
 595sub handle_rmdir {
 596        my ($rm, $add) = @_;
 597
 598        foreach my $dir (sort {length $b <=> length $a} keys %$rm) {
 599                my $ls = svn_ls_current($dir, $rm, $add);
 600                next if (scalar @$ls);
 601                sys(qw(svn rm --force),$dir);
 602
 603                my $dn = dirname $dir;
 604                $rm->{ $dn }->{ basename $dir } = 1;
 605                $ls = svn_ls_current($dn, $rm, $add);
 606                while (scalar @$ls == 0 && $dn ne File::Spec->curdir) {
 607                        sys(qw(svn rm --force),$dn);
 608                        $dir = basename $dn;
 609                        $dn = dirname $dn;
 610                        $rm->{ $dn }->{ $dir } = 1;
 611                        $ls = svn_ls_current($dn, $rm, $add);
 612                }
 613        }
 614}
 615
 616sub svn_commit_tree {
 617        my ($svn_rev, $commit) = @_;
 618        my $commit_msg = "$GIT_DIR/$GIT_SVN/.svn-commit.tmp.$$";
 619        my %log_msg = ( msg => '' );
 620        open my $msg, '>', $commit_msg or croak $!;
 621
 622        chomp(my $type = `git-cat-file -t $commit`);
 623        if ($type eq 'commit') {
 624                my $pid = open my $msg_fh, '-|';
 625                defined $pid or croak $!;
 626
 627                if ($pid == 0) {
 628                        exec(qw(git-cat-file commit), $commit) or croak $!;
 629                }
 630                my $in_msg = 0;
 631                while (<$msg_fh>) {
 632                        if (!$in_msg) {
 633                                $in_msg = 1 if (/^\s*$/);
 634                        } elsif (/^git-svn-id: /) {
 635                                # skip this, we regenerate the correct one
 636                                # on re-fetch anyways
 637                        } else {
 638                                print $msg $_ or croak $!;
 639                        }
 640                }
 641                close $msg_fh or croak $!;
 642        }
 643        close $msg or croak $!;
 644
 645        if ($_edit || ($type eq 'tree')) {
 646                my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi';
 647                system($editor, $commit_msg);
 648        }
 649
 650        # file_to_s removes all trailing newlines, so just use chomp() here:
 651        open $msg, '<', $commit_msg or croak $!;
 652        { local $/; chomp($log_msg{msg} = <$msg>); }
 653        close $msg or croak $!;
 654
 655        my ($oneline) = ($log_msg{msg} =~ /([^\n\r]+)/);
 656        print "Committing $commit: $oneline\n";
 657
 658        my @ci_output = safe_qx(qw(svn commit -F),$commit_msg);
 659        my ($committed) = grep(/^Committed revision \d+\./,@ci_output);
 660        unlink $commit_msg;
 661        defined $committed or croak
 662                        "Commit output failed to parse committed revision!\n",
 663                        join("\n",@ci_output),"\n";
 664        my ($rev_committed) = ($committed =~ /^Committed revision (\d+)\./);
 665
 666        my @svn_up = qw(svn up);
 667        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 668        if ($rev_committed == ($svn_rev + 1)) {
 669                push @svn_up, "-r$rev_committed";
 670                sys(@svn_up);
 671                my $info = svn_info('.');
 672                my $date = $info->{'Last Changed Date'} or die "Missing date\n";
 673                if ($info->{'Last Changed Rev'} != $rev_committed) {
 674                        croak "$info->{'Last Changed Rev'} != $rev_committed\n"
 675                }
 676                my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 677                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 678                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 679                                         or croak "Failed to parse date: $date\n";
 680                $log_msg{date} = "$tz $Y-$m-$d $H:$M:$S";
 681                $log_msg{author} = $info->{'Last Changed Author'};
 682                $log_msg{revision} = $rev_committed;
 683                $log_msg{msg} .= "\n";
 684                my $parent = file_to_s("$REV_DIR/$svn_rev");
 685                git_commit(\%log_msg, $parent, $commit);
 686                return $rev_committed;
 687        }
 688        # resync immediately
 689        push @svn_up, "-r$svn_rev";
 690        sys(@svn_up);
 691        return fetch("$rev_committed=$commit")->{revision};
 692}
 693
 694sub svn_log_raw {
 695        my (@log_args) = @_;
 696        my $pid = open my $log_fh,'-|';
 697        defined $pid or croak $!;
 698
 699        if ($pid == 0) {
 700                exec (qw(svn log), @log_args) or croak $!
 701        }
 702
 703        my @svn_log;
 704        my $state = 'sep';
 705        while (<$log_fh>) {
 706                chomp;
 707                if (/^\-{72}$/) {
 708                        if ($state eq 'msg') {
 709                                if ($svn_log[$#svn_log]->{lines}) {
 710                                        $svn_log[$#svn_log]->{msg} .= $_."\n";
 711                                        unless(--$svn_log[$#svn_log]->{lines}) {
 712                                                $state = 'sep';
 713                                        }
 714                                } else {
 715                                        croak "Log parse error at: $_\n",
 716                                                $svn_log[$#svn_log]->{revision},
 717                                                "\n";
 718                                }
 719                                next;
 720                        }
 721                        if ($state ne 'sep') {
 722                                croak "Log parse error at: $_\n",
 723                                        "state: $state\n",
 724                                        $svn_log[$#svn_log]->{revision},
 725                                        "\n";
 726                        }
 727                        $state = 'rev';
 728
 729                        # if we have an empty log message, put something there:
 730                        if (@svn_log) {
 731                                $svn_log[$#svn_log]->{msg} ||= "\n";
 732                                delete $svn_log[$#svn_log]->{lines};
 733                        }
 734                        next;
 735                }
 736                if ($state eq 'rev' && s/^r(\d+)\s*\|\s*//) {
 737                        my $rev = $1;
 738                        my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
 739                        ($lines) = ($lines =~ /(\d+)/);
 740                        my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 741                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 742                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 743                                         or croak "Failed to parse date: $date\n";
 744                        my %log_msg = ( revision => $rev,
 745                                        date => "$tz $Y-$m-$d $H:$M:$S",
 746                                        author => $author,
 747                                        lines => $lines,
 748                                        msg => '' );
 749                        if (defined $_authors && ! defined $users{$author}) {
 750                                die "Author: $author not defined in ",
 751                                                "$_authors file\n";
 752                        }
 753                        push @svn_log, \%log_msg;
 754                        $state = 'msg_start';
 755                        next;
 756                }
 757                # skip the first blank line of the message:
 758                if ($state eq 'msg_start' && /^$/) {
 759                        $state = 'msg';
 760                } elsif ($state eq 'msg') {
 761                        if ($svn_log[$#svn_log]->{lines}) {
 762                                $svn_log[$#svn_log]->{msg} .= $_."\n";
 763                                unless (--$svn_log[$#svn_log]->{lines}) {
 764                                        $state = 'sep';
 765                                }
 766                        } else {
 767                                croak "Log parse error at: $_\n",
 768                                        $svn_log[$#svn_log]->{revision},"\n";
 769                        }
 770                }
 771        }
 772        close $log_fh or croak $?;
 773        return \@svn_log;
 774}
 775
 776sub svn_info {
 777        my $url = shift || $SVN_URL;
 778
 779        my $pid = open my $info_fh, '-|';
 780        defined $pid or croak $!;
 781
 782        if ($pid == 0) {
 783                exec(qw(svn info),$url) or croak $!;
 784        }
 785
 786        my $ret = {};
 787        # only single-lines seem to exist in svn info output
 788        while (<$info_fh>) {
 789                chomp $_;
 790                if (m#^([^:]+)\s*:\s*(\S.*)$#) {
 791                        $ret->{$1} = $2;
 792                        push @{$ret->{-order}}, $1;
 793                }
 794        }
 795        close $info_fh or croak $!;
 796        return $ret;
 797}
 798
 799sub sys { system(@_) == 0 or croak $? }
 800
 801sub git_addremove {
 802        system( "git-diff-files --name-only -z ".
 803                                " | git-update-index --remove -z --stdin && ".
 804                "git-ls-files -z --others ".
 805                        "'--exclude-from=$GIT_DIR/$GIT_SVN/info/exclude'".
 806                                " | git-update-index --add -z --stdin"
 807                ) == 0 or croak $?
 808}
 809
 810sub s_to_file {
 811        my ($str, $file, $mode) = @_;
 812        open my $fd,'>',$file or croak $!;
 813        print $fd $str,"\n" or croak $!;
 814        close $fd or croak $!;
 815        chmod ($mode &~ umask, $file) if (defined $mode);
 816}
 817
 818sub file_to_s {
 819        my $file = shift;
 820        open my $fd,'<',$file or croak "$!: file: $file\n";
 821        local $/;
 822        my $ret = <$fd>;
 823        close $fd or croak $!;
 824        $ret =~ s/\s*$//s;
 825        return $ret;
 826}
 827
 828sub assert_revision_unknown {
 829        my $revno = shift;
 830        if (-f "$REV_DIR/$revno") {
 831                croak "$REV_DIR/$revno already exists! ",
 832                                "Why are we refetching it?";
 833        }
 834}
 835
 836sub assert_revision_eq_or_unknown {
 837        my ($revno, $commit) = @_;
 838        if (-f "$REV_DIR/$revno") {
 839                my $current = file_to_s("$REV_DIR/$revno");
 840                if ($commit ne $current) {
 841                        croak "$REV_DIR/$revno already exists!\n",
 842                                "current: $current\nexpected: $commit\n";
 843                }
 844                return;
 845        }
 846}
 847
 848sub git_commit {
 849        my ($log_msg, @parents) = @_;
 850        assert_revision_unknown($log_msg->{revision});
 851        my $out_fh = IO::File->new_tmpfile or croak $!;
 852        $SVN_UUID ||= svn_info('.')->{'Repository UUID'};
 853
 854        map_tree_joins() if (@_branch_from && !%tree_map);
 855
 856        # commit parents can be conditionally bound to a particular
 857        # svn revision via: "svn_revno=commit_sha1", filter them out here:
 858        my @exec_parents;
 859        foreach my $p (@parents) {
 860                next unless defined $p;
 861                if ($p =~ /^(\d+)=($sha1_short)$/o) {
 862                        if ($1 == $log_msg->{revision}) {
 863                                push @exec_parents, $2;
 864                        }
 865                } else {
 866                        push @exec_parents, $p if $p =~ /$sha1_short/o;
 867                }
 868        }
 869
 870        my $pid = fork;
 871        defined $pid or croak $!;
 872        if ($pid == 0) {
 873                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 874                git_addremove();
 875                chomp(my $tree = `git-write-tree`);
 876                croak if $?;
 877                if (exists $tree_map{$tree}) {
 878                        my %seen_parent = map { $_ => 1 } @exec_parents;
 879                        foreach (@{$tree_map{$tree}}) {
 880                                # MAXPARENT is defined to 16 in commit-tree.c:
 881                                if ($seen_parent{$_} || @exec_parents > 16) {
 882                                        next;
 883                                }
 884                                push @exec_parents, $_;
 885                                $seen_parent{$_} = 1;
 886                        }
 887                }
 888                my $msg_fh = IO::File->new_tmpfile or croak $!;
 889                print $msg_fh $log_msg->{msg}, "\ngit-svn-id: ",
 890                                        "$SVN_URL\@$log_msg->{revision}",
 891                                        " $SVN_UUID\n" or croak $!;
 892                $msg_fh->flush == 0 or croak $!;
 893                seek $msg_fh, 0, 0 or croak $!;
 894
 895                set_commit_env($log_msg);
 896
 897                my @exec = ('git-commit-tree',$tree);
 898                push @exec, '-p', $_  foreach @exec_parents;
 899                open STDIN, '<&', $msg_fh or croak $!;
 900                open STDOUT, '>&', $out_fh or croak $!;
 901                exec @exec or croak $!;
 902        }
 903        waitpid($pid,0);
 904        croak if $?;
 905
 906        $out_fh->flush == 0 or croak $!;
 907        seek $out_fh, 0, 0 or croak $!;
 908        chomp(my $commit = do { local $/; <$out_fh> });
 909        if ($commit !~ /^$sha1$/o) {
 910                croak "Failed to commit, invalid sha1: $commit\n";
 911        }
 912        my @update_ref = ('git-update-ref',"refs/remotes/$GIT_SVN",$commit);
 913        if (my $primary_parent = shift @exec_parents) {
 914                push @update_ref, $primary_parent;
 915        }
 916        sys(@update_ref);
 917        sys('git-update-ref',"$GIT_SVN/revs/$log_msg->{revision}",$commit);
 918        print "r$log_msg->{revision} = $commit\n";
 919        return $commit;
 920}
 921
 922sub set_commit_env {
 923        my ($log_msg) = @_;
 924        my $author = $log_msg->{author};
 925        my ($name,$email) = defined $users{$author} ?  @{$users{$author}}
 926                                : ($author,"$author\@$SVN_UUID");
 927        $ENV{GIT_AUTHOR_NAME} = $ENV{GIT_COMMITTER_NAME} = $name;
 928        $ENV{GIT_AUTHOR_EMAIL} = $ENV{GIT_COMMITTER_EMAIL} = $email;
 929        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_msg->{date};
 930}
 931
 932sub apply_mod_line_blob {
 933        my $m = shift;
 934        if ($m->{mode_b} =~ /^120/) {
 935                blob_to_symlink($m->{sha1_b}, $m->{file_b});
 936        } else {
 937                blob_to_file($m->{sha1_b}, $m->{file_b});
 938        }
 939}
 940
 941sub blob_to_symlink {
 942        my ($blob, $link) = @_;
 943        defined $link or croak "\$link not defined!\n";
 944        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
 945        if (-l $link || -f _) {
 946                unlink $link or croak $!;
 947        }
 948
 949        my $dest = `git-cat-file blob $blob`; # no newline, so no chomp
 950        symlink $dest, $link or croak $!;
 951}
 952
 953sub blob_to_file {
 954        my ($blob, $file) = @_;
 955        defined $file or croak "\$file not defined!\n";
 956        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
 957        if (-l $file || -f _) {
 958                unlink $file or croak $!;
 959        }
 960
 961        open my $blob_fh, '>', $file or croak "$!: $file\n";
 962        my $pid = fork;
 963        defined $pid or croak $!;
 964
 965        if ($pid == 0) {
 966                open STDOUT, '>&', $blob_fh or croak $!;
 967                exec('git-cat-file','blob',$blob);
 968        }
 969        waitpid $pid, 0;
 970        croak $? if $?;
 971
 972        close $blob_fh or croak $!;
 973}
 974
 975sub safe_qx {
 976        my $pid = open my $child, '-|';
 977        defined $pid or croak $!;
 978        if ($pid == 0) {
 979                exec(@_) or croak $?;
 980        }
 981        my @ret = (<$child>);
 982        close $child or croak $?;
 983        die $? if $?; # just in case close didn't error out
 984        return wantarray ? @ret : join('',@ret);
 985}
 986
 987sub svn_check_ignore_externals {
 988        return if $_no_ignore_ext;
 989        unless (grep /ignore-externals/,(safe_qx(qw(svn co -h)))) {
 990                print STDERR "W: Installed svn version does not support ",
 991                                "--ignore-externals\n";
 992                $_no_ignore_ext = 1;
 993        }
 994}
 995
 996sub check_upgrade_needed {
 997        my $old = eval {
 998                my $pid = open my $child, '-|';
 999                defined $pid or croak $!;
1000                if ($pid == 0) {
1001                        close STDERR;
1002                        exec('git-rev-parse',"$GIT_SVN-HEAD") or croak $?;
1003                }
1004                my @ret = (<$child>);
1005                close $child or croak $?;
1006                die $? if $?; # just in case close didn't error out
1007                return wantarray ? @ret : join('',@ret);
1008        };
1009        return unless $old;
1010        my $head = eval { safe_qx('git-rev-parse',"refs/remotes/$GIT_SVN") };
1011        if ($@ || !$head) {
1012                print STDERR "Please run: $0 rebuild --upgrade\n";
1013                exit 1;
1014        }
1015}
1016
1017# fills %tree_map with a reverse mapping of trees to commits.  Useful
1018# for finding parents to commit on.
1019sub map_tree_joins {
1020        foreach my $br (@_branch_from) {
1021                my $pid = open my $pipe, '-|';
1022                defined $pid or croak $!;
1023                if ($pid == 0) {
1024                        exec(qw(git-rev-list --pretty=raw), $br) or croak $?;
1025                }
1026                while (<$pipe>) {
1027                        if (/^commit ($sha1)$/o) {
1028                                my $commit = $1;
1029                                my ($tree) = (<$pipe> =~ /^tree ($sha1)$/o);
1030                                unless (defined $tree) {
1031                                        die "Failed to parse commit $commit\n";
1032                                }
1033                                push @{$tree_map{$tree}}, $commit;
1034                        }
1035                }
1036                close $pipe or croak $?;
1037        }
1038}
1039
1040# '<svn username> = real-name <email address>' mapping based on git-svnimport:
1041sub load_authors {
1042        open my $authors, '<', $_authors or die "Can't open $_authors $!\n";
1043        while (<$authors>) {
1044                chomp;
1045                next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
1046                my ($user, $name, $email) = ($1, $2, $3);
1047                $users{$user} = [$name, $email];
1048        }
1049        close $authors or croak $!;
1050}
1051
1052__END__
1053
1054Data structures:
1055
1056@svn_log = array of log_msg hashes
1057
1058$log_msg hash
1059{
1060        msg => 'whitespace-formatted log entry
1061',                                              # trailing newline is preserved
1062        revision => '8',                        # integer
1063        date => '2004-02-24T17:01:44.108345Z',  # commit date
1064        author => 'committer name'
1065};
1066
1067
1068@mods = array of diff-index line hashes, each element represents one line
1069        of diff-index output
1070
1071diff-index line ($m hash)
1072{
1073        mode_a => first column of diff-index output, no leading ':',
1074        mode_b => second column of diff-index output,
1075        sha1_b => sha1sum of the final blob,
1076        chg => change type [MCRADT],
1077        file_a => original file name of a file (iff chg is 'C' or 'R')
1078        file_b => new/current file name of a file (any chg)
1079}
1080;