contrib / git-svn / git-svn.perlon commit contrib/git-svn: make sure our git-svn is up-to-date for test (5f2f424)
   1#!/usr/bin/env perl
   2# Copyright (C) 2006, Eric Wong <normalperson@yhbt.net>
   3# License: GPL v2 or later
   4use warnings;
   5use strict;
   6use vars qw/    $AUTHOR $VERSION
   7                $SVN_URL $SVN_INFO $SVN_WC $SVN_UUID
   8                $GIT_SVN_INDEX $GIT_SVN
   9                $GIT_DIR $REV_DIR/;
  10$AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
  11$VERSION = '0.11.0';
  12
  13use Cwd qw/abs_path/;
  14$GIT_DIR = abs_path($ENV{GIT_DIR} || '.git');
  15$ENV{GIT_DIR} = $GIT_DIR;
  16
  17# make sure the svn binary gives consistent output between locales and TZs:
  18$ENV{TZ} = 'UTC';
  19$ENV{LC_ALL} = 'C';
  20
  21# If SVN:: library support is added, please make the dependencies
  22# optional and preserve the capability to use the command-line client.
  23# use eval { require SVN::... } to make it lazy load
  24# We don't use any modules not in the standard Perl distribution:
  25use Carp qw/croak/;
  26use IO::File qw//;
  27use File::Basename qw/dirname basename/;
  28use File::Path qw/mkpath/;
  29use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
  30use File::Spec qw//;
  31use POSIX qw/strftime/;
  32my $sha1 = qr/[a-f\d]{40}/;
  33my $sha1_short = qr/[a-f\d]{4,40}/;
  34my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
  35        $_find_copies_harder, $_l, $_version, $_upgrade, $_authors);
  36my (@_branch_from, %tree_map, %users);
  37my $_svn_co_url_revs;
  38
  39my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext,
  40                'branch|b=s' => \@_branch_from,
  41                'authors-file|A=s' => \$_authors );
  42my %cmd = (
  43        fetch => [ \&fetch, "Download new revisions from SVN",
  44                        { 'revision|r=s' => \$_revision, %fc_opts } ],
  45        init => [ \&init, "Initialize and fetch (import)", { } ],
  46        commit => [ \&commit, "Commit git revisions to SVN",
  47                        {       'stdin|' => \$_stdin,
  48                                'edit|e' => \$_edit,
  49                                'rmdir' => \$_rmdir,
  50                                'find-copies-harder' => \$_find_copies_harder,
  51                                'l=i' => \$_l,
  52                                %fc_opts,
  53                        } ],
  54        'show-ignore' => [ \&show_ignore, "Show svn:ignore listings", { } ],
  55        rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
  56                        { 'no-ignore-externals' => \$_no_ignore_ext,
  57                          'upgrade' => \$_upgrade } ],
  58);
  59my $cmd;
  60for (my $i = 0; $i < @ARGV; $i++) {
  61        if (defined $cmd{$ARGV[$i]}) {
  62                $cmd = $ARGV[$i];
  63                splice @ARGV, $i, 1;
  64                last;
  65        }
  66};
  67
  68my %opts = %{$cmd{$cmd}->[2]} if (defined $cmd);
  69
  70# convert GetOpt::Long specs for use by git-repo-config
  71foreach my $o (keys %opts) {
  72        my $v = $opts{$o};
  73        my ($key) = ($o =~ /^([a-z\-]+)/);
  74        $key =~ s/-//g;
  75        my $arg = 'git-repo-config';
  76        $arg .= ' --int' if ($o =~ /=i$/);
  77        $arg .= ' --bool' if ($o !~ /=[sfi]$/);
  78        $arg .= " svn.$key"; # $key only matches [a-z\-], always shell-safe
  79        if (ref $v eq 'ARRAY') {
  80                chomp(my @tmp = `$arg`);
  81                @$v = @tmp if @tmp;
  82        } else {
  83                chomp(my $tmp = `$arg`);
  84                if ($tmp && !($arg =~ / --bool / && $tmp eq 'false')) {
  85                        $$v = $tmp;
  86                }
  87        }
  88}
  89
  90GetOptions(%opts, 'help|H|h' => \$_help,
  91                'version|V' => \$_version,
  92                'id|i=s' => \$GIT_SVN) or exit 1;
  93
  94$GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn';
  95$GIT_SVN_INDEX = "$GIT_DIR/$GIT_SVN/index";
  96$SVN_URL = undef;
  97$REV_DIR = "$GIT_DIR/$GIT_SVN/revs";
  98$SVN_WC = "$GIT_DIR/$GIT_SVN/tree";
  99
 100usage(0) if $_help;
 101version() if $_version;
 102usage(1) unless defined $cmd;
 103load_authors() if $_authors;
 104svn_compat_check();
 105$cmd{$cmd}->[0]->(@ARGV);
 106exit 0;
 107
 108####################### primary functions ######################
 109sub usage {
 110        my $exit = shift || 0;
 111        my $fd = $exit ? \*STDERR : \*STDOUT;
 112        print $fd <<"";
 113git-svn - bidirectional operations between a single Subversion tree and git
 114Usage: $0 <command> [options] [arguments]\n
 115
 116        print $fd "Available commands:\n" unless $cmd;
 117
 118        foreach (sort keys %cmd) {
 119                next if $cmd && $cmd ne $_;
 120                print $fd '  ',pack('A13',$_),$cmd{$_}->[1],"\n";
 121                foreach (keys %{$cmd{$_}->[2]}) {
 122                        # prints out arguments as they should be passed:
 123                        my $x = s#=s$## ? '<arg>' : s#=i$## ? '<num>' : '';
 124                        print $fd ' ' x 17, join(', ', map { length $_ > 1 ?
 125                                                        "--$_" : "-$_" }
 126                                                split /\|/,$_)," $x\n";
 127                }
 128        }
 129        print $fd <<"";
 130\nGIT_SVN_ID may be set in the environment or via the --id/-i switch to an
 131arbitrary identifier if you're tracking multiple SVN branches/repositories in
 132one git repository and want to keep them separate.  See git-svn(1) for more
 133information.
 134
 135        exit $exit;
 136}
 137
 138sub version {
 139        print "git-svn version $VERSION\n";
 140        exit 0;
 141}
 142
 143sub rebuild {
 144        $SVN_URL = shift or undef;
 145        my $newest_rev = 0;
 146        if ($_upgrade) {
 147                sys('git-update-ref',"refs/remotes/$GIT_SVN","$GIT_SVN-HEAD");
 148        } else {
 149                check_upgrade_needed();
 150        }
 151
 152        my $pid = open(my $rev_list,'-|');
 153        defined $pid or croak $!;
 154        if ($pid == 0) {
 155                exec("git-rev-list","refs/remotes/$GIT_SVN") or croak $!;
 156        }
 157        my $latest;
 158        while (<$rev_list>) {
 159                chomp;
 160                my $c = $_;
 161                croak "Non-SHA1: $c\n" unless $c =~ /^$sha1$/o;
 162                my @commit = grep(/^git-svn-id: /,`git-cat-file commit $c`);
 163                next if (!@commit); # skip merges
 164                my $id = $commit[$#commit];
 165                my ($url, $rev, $uuid) = ($id =~ /^git-svn-id:\s(\S+?)\@(\d+)
 166                                                \s([a-f\d\-]+)$/x);
 167                if (!$rev || !$uuid || !$url) {
 168                        # some of the original repositories I made had
 169                        # indentifiers like this:
 170                        ($rev, $uuid) = ($id =~/^git-svn-id:\s(\d+)
 171                                                        \@([a-f\d\-]+)/x);
 172                        if (!$rev || !$uuid) {
 173                                croak "Unable to extract revision or UUID from ",
 174                                        "$c, $id\n";
 175                        }
 176                }
 177
 178                # if we merged or otherwise started elsewhere, this is
 179                # how we break out of it
 180                next if (defined $SVN_UUID && ($uuid ne $SVN_UUID));
 181                next if (defined $SVN_URL && defined $url && ($url ne $SVN_URL));
 182
 183                print "r$rev = $c\n";
 184                unless (defined $latest) {
 185                        if (!$SVN_URL && !$url) {
 186                                croak "SVN repository location required: $url\n";
 187                        }
 188                        $SVN_URL ||= $url;
 189                        $SVN_UUID ||= $uuid;
 190                        setup_git_svn();
 191                        $latest = $rev;
 192                }
 193                assert_revision_eq_or_unknown($rev, $c);
 194                sys('git-update-ref',"$GIT_SVN/revs/$rev",$c);
 195                $newest_rev = $rev if ($rev > $newest_rev);
 196        }
 197        close $rev_list or croak $?;
 198        if (!chdir $SVN_WC) {
 199                svn_cmd_checkout($SVN_URL, $latest, $SVN_WC);
 200                chdir $SVN_WC or croak $!;
 201        }
 202
 203        $pid = fork;
 204        defined $pid or croak $!;
 205        if ($pid == 0) {
 206                my @svn_up = qw(svn up);
 207                push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 208                sys(@svn_up,"-r$newest_rev");
 209                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 210                git_addremove();
 211                exec('git-write-tree');
 212        }
 213        waitpid $pid, 0;
 214
 215        if ($_upgrade) {
 216                print STDERR <<"";
 217Keeping deprecated refs/head/$GIT_SVN-HEAD for now.  Please remove it
 218when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
 219
 220        }
 221}
 222
 223sub init {
 224        $SVN_URL = shift or croak "SVN repository location required\n";
 225        unless (-d $GIT_DIR) {
 226                sys('git-init-db');
 227        }
 228        setup_git_svn();
 229}
 230
 231sub fetch {
 232        my (@parents) = @_;
 233        check_upgrade_needed();
 234        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 235        my @log_args = -d $SVN_WC ? ($SVN_WC) : ($SVN_URL);
 236        unless ($_revision) {
 237                $_revision = -d $SVN_WC ? 'BASE:HEAD' : '0:HEAD';
 238        }
 239        push @log_args, "-r$_revision";
 240        push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 241
 242        my $svn_log = svn_log_raw(@log_args);
 243
 244        my $base = next_log_entry($svn_log) or croak "No base revision!\n";
 245        my $last_commit = undef;
 246        unless (-d $SVN_WC) {
 247                svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
 248                chdir $SVN_WC or croak $!;
 249                read_uuid();
 250                $last_commit = git_commit($base, @parents);
 251                assert_svn_wc_clean($base->{revision}, $last_commit);
 252        } else {
 253                chdir $SVN_WC or croak $!;
 254                read_uuid();
 255                $last_commit = file_to_s("$REV_DIR/$base->{revision}");
 256        }
 257        my @svn_up = qw(svn up);
 258        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 259        my $last = $base;
 260        while (my $log_msg = next_log_entry($svn_log)) {
 261                assert_svn_wc_clean($last->{revision}, $last_commit);
 262                if ($last->{revision} >= $log_msg->{revision}) {
 263                        croak "Out of order: last >= current: ",
 264                                "$last->{revision} >= $log_msg->{revision}\n";
 265                }
 266                sys(@svn_up,"-r$log_msg->{revision}");
 267                $last_commit = git_commit($log_msg, $last_commit, @parents);
 268                $last = $log_msg;
 269        }
 270        assert_svn_wc_clean($last->{revision}, $last_commit);
 271        unless (-e "$GIT_DIR/refs/heads/master") {
 272                sys(qw(git-update-ref refs/heads/master),$last_commit);
 273        }
 274        return $last;
 275}
 276
 277sub commit {
 278        my (@commits) = @_;
 279        check_upgrade_needed();
 280        if ($_stdin || !@commits) {
 281                print "Reading from stdin...\n";
 282                @commits = ();
 283                while (<STDIN>) {
 284                        if (/\b($sha1_short)\b/o) {
 285                                unshift @commits, $1;
 286                        }
 287                }
 288        }
 289        my @revs;
 290        foreach my $c (@commits) {
 291                chomp(my @tmp = safe_qx('git-rev-parse',$c));
 292                if (scalar @tmp == 1) {
 293                        push @revs, $tmp[0];
 294                } elsif (scalar @tmp > 1) {
 295                        push @revs, reverse (safe_qx('git-rev-list',@tmp));
 296                } else {
 297                        die "Failed to rev-parse $c\n";
 298                }
 299        }
 300        chomp @revs;
 301
 302        fetch();
 303        chdir $SVN_WC or croak $!;
 304        my $info = svn_info('.');
 305        read_uuid($info);
 306        my $svn_current_rev =  $info->{'Last Changed Rev'};
 307        foreach my $c (@revs) {
 308                my $mods = svn_checkout_tree($svn_current_rev, $c);
 309                if (scalar @$mods == 0) {
 310                        print "Skipping, no changes detected\n";
 311                        next;
 312                }
 313                $svn_current_rev = svn_commit_tree($svn_current_rev, $c);
 314        }
 315        print "Done committing ",scalar @revs," revisions to SVN\n";
 316
 317}
 318
 319sub show_ignore {
 320        require File::Find or die $!;
 321        my $exclude_file = "$GIT_DIR/info/exclude";
 322        open my $fh, '<', $exclude_file or croak $!;
 323        chomp(my @excludes = (<$fh>));
 324        close $fh or croak $!;
 325
 326        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 327        chdir $SVN_WC or croak $!;
 328        my %ign;
 329        File::Find::find({wanted=>sub{if(lstat $_ && -d _ && -d "$_/.svn"){
 330                s#^\./##;
 331                @{$ign{$_}} = safe_qx(qw(svn propget svn:ignore),$_);
 332                }}, no_chdir=>1},'.');
 333
 334        print "\n# /\n";
 335        foreach (@{$ign{'.'}}) { print '/',$_ if /\S/ }
 336        delete $ign{'.'};
 337        foreach my $i (sort keys %ign) {
 338                print "\n# ",$i,"\n";
 339                foreach (@{$ign{$i}}) { print '/',$i,'/',$_ if /\S/ }
 340        }
 341}
 342
 343########################### utility functions #########################
 344
 345sub read_uuid {
 346        return if $SVN_UUID;
 347        my $info = shift || svn_info('.');
 348        $SVN_UUID = $info->{'Repository UUID'} or
 349                                        croak "Repository UUID unreadable\n";
 350        s_to_file($SVN_UUID,"$GIT_DIR/$GIT_SVN/info/uuid");
 351}
 352
 353sub setup_git_svn {
 354        defined $SVN_URL or croak "SVN repository location required\n";
 355        unless (-d $GIT_DIR) {
 356                croak "GIT_DIR=$GIT_DIR does not exist!\n";
 357        }
 358        mkpath(["$GIT_DIR/$GIT_SVN"]);
 359        mkpath(["$GIT_DIR/$GIT_SVN/info"]);
 360        mkpath([$REV_DIR]);
 361        s_to_file($SVN_URL,"$GIT_DIR/$GIT_SVN/info/url");
 362
 363        open my $fd, '>>', "$GIT_DIR/$GIT_SVN/info/exclude" or croak $!;
 364        print $fd '.svn',"\n";
 365        close $fd or croak $!;
 366}
 367
 368sub assert_svn_wc_clean {
 369        my ($svn_rev, $treeish) = @_;
 370        croak "$svn_rev is not an integer!\n" unless ($svn_rev =~ /^\d+$/);
 371        croak "$treeish is not a sha1!\n" unless ($treeish =~ /^$sha1$/o);
 372        my $lcr = svn_info('.')->{'Last Changed Rev'};
 373        if ($svn_rev != $lcr) {
 374                print STDERR "Checking for copy-tree ... ";
 375                # use
 376                my @diff = grep(/^Index: /,(safe_qx(qw(svn diff),
 377                                                "-r$lcr:$svn_rev")));
 378                if (@diff) {
 379                        croak "Nope!  Expected r$svn_rev, got r$lcr\n";
 380                } else {
 381                        print STDERR "OK!\n";
 382                }
 383        }
 384        my @status = grep(!/^Performing status on external/,(`svn status`));
 385        @status = grep(!/^\s*$/,@status);
 386        if (scalar @status) {
 387                print STDERR "Tree ($SVN_WC) is not clean:\n";
 388                print STDERR $_ foreach @status;
 389                croak;
 390        }
 391        assert_tree($treeish);
 392}
 393
 394sub assert_tree {
 395        my ($treeish) = @_;
 396        croak "Not a sha1: $treeish\n" unless $treeish =~ /^$sha1$/o;
 397        chomp(my $type = `git-cat-file -t $treeish`);
 398        my $expected;
 399        while ($type eq 'tag') {
 400                chomp(($treeish, $type) = `git-cat-file tag $treeish`);
 401        }
 402        if ($type eq 'commit') {
 403                $expected = (grep /^tree /,`git-cat-file commit $treeish`)[0];
 404                ($expected) = ($expected =~ /^tree ($sha1)$/);
 405                die "Unable to get tree from $treeish\n" unless $expected;
 406        } elsif ($type eq 'tree') {
 407                $expected = $treeish;
 408        } else {
 409                die "$treeish is a $type, expected tree, tag or commit\n";
 410        }
 411
 412        my $old_index = $ENV{GIT_INDEX_FILE};
 413        my $tmpindex = $GIT_SVN_INDEX.'.assert-tmp';
 414        if (-e $tmpindex) {
 415                unlink $tmpindex or croak $!;
 416        }
 417        $ENV{GIT_INDEX_FILE} = $tmpindex;
 418        git_addremove();
 419        chomp(my $tree = `git-write-tree`);
 420        if ($old_index) {
 421                $ENV{GIT_INDEX_FILE} = $old_index;
 422        } else {
 423                delete $ENV{GIT_INDEX_FILE};
 424        }
 425        if ($tree ne $expected) {
 426                croak "Tree mismatch, Got: $tree, Expected: $expected\n";
 427        }
 428}
 429
 430sub parse_diff_tree {
 431        my $diff_fh = shift;
 432        local $/ = "\0";
 433        my $state = 'meta';
 434        my @mods;
 435        while (<$diff_fh>) {
 436                chomp $_; # this gets rid of the trailing "\0"
 437                if ($state eq 'meta' && /^:(\d{6})\s(\d{6})\s
 438                                        $sha1\s($sha1)\s([MTCRAD])\d*$/xo) {
 439                        push @mods, {   mode_a => $1, mode_b => $2,
 440                                        sha1_b => $3, chg => $4 };
 441                        if ($4 =~ /^(?:C|R)$/) {
 442                                $state = 'file_a';
 443                        } else {
 444                                $state = 'file_b';
 445                        }
 446                } elsif ($state eq 'file_a') {
 447                        my $x = $mods[$#mods] or croak "Empty array\n";
 448                        if ($x->{chg} !~ /^(?:C|R)$/) {
 449                                croak "Error parsing $_, $x->{chg}\n";
 450                        }
 451                        $x->{file_a} = $_;
 452                        $state = 'file_b';
 453                } elsif ($state eq 'file_b') {
 454                        my $x = $mods[$#mods] or croak "Empty array\n";
 455                        if (exists $x->{file_a} && $x->{chg} !~ /^(?:C|R)$/) {
 456                                croak "Error parsing $_, $x->{chg}\n";
 457                        }
 458                        if (!exists $x->{file_a} && $x->{chg} =~ /^(?:C|R)$/) {
 459                                croak "Error parsing $_, $x->{chg}\n";
 460                        }
 461                        $x->{file_b} = $_;
 462                        $state = 'meta';
 463                } else {
 464                        croak "Error parsing $_\n";
 465                }
 466        }
 467        close $diff_fh or croak $!;
 468
 469        return \@mods;
 470}
 471
 472sub svn_check_prop_executable {
 473        my $m = shift;
 474        return if -l $m->{file_b};
 475        if ($m->{mode_b} =~ /755$/) {
 476                chmod((0755 &~ umask),$m->{file_b}) or croak $!;
 477                if ($m->{mode_a} !~ /755$/) {
 478                        sys(qw(svn propset svn:executable 1), $m->{file_b});
 479                }
 480                -x $m->{file_b} or croak "$m->{file_b} is not executable!\n";
 481        } elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 482                sys(qw(svn propdel svn:executable), $m->{file_b});
 483                chmod((0644 &~ umask),$m->{file_b}) or croak $!;
 484                -x $m->{file_b} and croak "$m->{file_b} is executable!\n";
 485        }
 486}
 487
 488sub svn_ensure_parent_path {
 489        my $dir_b = dirname(shift);
 490        svn_ensure_parent_path($dir_b) if ($dir_b ne File::Spec->curdir);
 491        mkpath([$dir_b]) unless (-d $dir_b);
 492        sys(qw(svn add -N), $dir_b) unless (-d "$dir_b/.svn");
 493}
 494
 495sub precommit_check {
 496        my $mods = shift;
 497        my (%rm_file, %rmdir_check, %added_check);
 498
 499        my %o = ( D => 0, R => 1, C => 2, A => 3, M => 3, T => 3 );
 500        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 501                if ($m->{chg} eq 'R') {
 502                        if (-d $m->{file_b}) {
 503                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 504                        }
 505                        # dir/$file => dir/file/$file
 506                        my $dirname = dirname($m->{file_b});
 507                        while ($dirname ne File::Spec->curdir) {
 508                                if ($dirname ne $m->{file_a}) {
 509                                        $dirname = dirname($dirname);
 510                                        next;
 511                                }
 512                                err_file_to_dir("$m->{file_a} => $m->{file_b}");
 513                        }
 514                        # baz/zzz => baz (baz is a file)
 515                        $dirname = dirname($m->{file_a});
 516                        while ($dirname ne File::Spec->curdir) {
 517                                if ($dirname ne $m->{file_b}) {
 518                                        $dirname = dirname($dirname);
 519                                        next;
 520                                }
 521                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 522                        }
 523                }
 524                if ($m->{chg} =~ /^(D|R)$/) {
 525                        my $t = $1 eq 'D' ? 'file_b' : 'file_a';
 526                        $rm_file{ $m->{$t} } = 1;
 527                        my $dirname = dirname( $m->{$t} );
 528                        my $basename = basename( $m->{$t} );
 529                        $rmdir_check{$dirname}->{$basename} = 1;
 530                } elsif ($m->{chg} =~ /^(?:A|C)$/) {
 531                        if (-d $m->{file_b}) {
 532                                err_dir_to_file($m->{file_b});
 533                        }
 534                        my $dirname = dirname( $m->{file_b} );
 535                        my $basename = basename( $m->{file_b} );
 536                        $added_check{$dirname}->{$basename} = 1;
 537                        while ($dirname ne File::Spec->curdir) {
 538                                if ($rm_file{$dirname}) {
 539                                        err_file_to_dir($m->{file_b});
 540                                }
 541                                $dirname = dirname $dirname;
 542                        }
 543                }
 544        }
 545        return (\%rmdir_check, \%added_check);
 546
 547        sub err_dir_to_file {
 548                my $file = shift;
 549                print STDERR "Node change from directory to file ",
 550                                "is not supported by Subversion: ",$file,"\n";
 551                exit 1;
 552        }
 553        sub err_file_to_dir {
 554                my $file = shift;
 555                print STDERR "Node change from file to directory ",
 556                                "is not supported by Subversion: ",$file,"\n";
 557                exit 1;
 558        }
 559}
 560
 561sub svn_checkout_tree {
 562        my ($svn_rev, $treeish) = @_;
 563        my $from = file_to_s("$REV_DIR/$svn_rev");
 564        assert_svn_wc_clean($svn_rev,$from);
 565        print "diff-tree $from $treeish\n";
 566        my $pid = open my $diff_fh, '-|';
 567        defined $pid or croak $!;
 568        if ($pid == 0) {
 569                my @diff_tree = qw(git-diff-tree -z -r -C);
 570                push @diff_tree, '--find-copies-harder' if $_find_copies_harder;
 571                push @diff_tree, "-l$_l" if defined $_l;
 572                exec(@diff_tree, $from, $treeish) or croak $!;
 573        }
 574        my $mods = parse_diff_tree($diff_fh);
 575        unless (@$mods) {
 576                # git can do empty commits, but SVN doesn't allow it...
 577                return $mods;
 578        }
 579        my ($rm, $add) = precommit_check($mods);
 580
 581        my %o = ( D => 1, R => 0, C => -1, A => 3, M => 3, T => 3 );
 582        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 583                if ($m->{chg} eq 'C') {
 584                        svn_ensure_parent_path( $m->{file_b} );
 585                        sys(qw(svn cp),         $m->{file_a}, $m->{file_b});
 586                        apply_mod_line_blob($m);
 587                        svn_check_prop_executable($m);
 588                } elsif ($m->{chg} eq 'D') {
 589                        sys(qw(svn rm --force), $m->{file_b});
 590                } elsif ($m->{chg} eq 'R') {
 591                        svn_ensure_parent_path( $m->{file_b} );
 592                        sys(qw(svn mv --force), $m->{file_a}, $m->{file_b});
 593                        apply_mod_line_blob($m);
 594                        svn_check_prop_executable($m);
 595                } elsif ($m->{chg} eq 'M') {
 596                        apply_mod_line_blob($m);
 597                        svn_check_prop_executable($m);
 598                } elsif ($m->{chg} eq 'T') {
 599                        sys(qw(svn rm --force),$m->{file_b});
 600                        apply_mod_line_blob($m);
 601                        sys(qw(svn add --force), $m->{file_b});
 602                        svn_check_prop_executable($m);
 603                } elsif ($m->{chg} eq 'A') {
 604                        svn_ensure_parent_path( $m->{file_b} );
 605                        apply_mod_line_blob($m);
 606                        sys(qw(svn add --force), $m->{file_b});
 607                        svn_check_prop_executable($m);
 608                } else {
 609                        croak "Invalid chg: $m->{chg}\n";
 610                }
 611        }
 612
 613        assert_tree($treeish);
 614        if ($_rmdir) { # remove empty directories
 615                handle_rmdir($rm, $add);
 616        }
 617        assert_tree($treeish);
 618        return $mods;
 619}
 620
 621# svn ls doesn't work with respect to the current working tree, but what's
 622# in the repository.  There's not even an option for it... *sigh*
 623# (added files don't show up and removed files remain in the ls listing)
 624sub svn_ls_current {
 625        my ($dir, $rm, $add) = @_;
 626        chomp(my @ls = safe_qx('svn','ls',$dir));
 627        my @ret = ();
 628        foreach (@ls) {
 629                s#/$##; # trailing slashes are evil
 630                push @ret, $_ unless $rm->{$dir}->{$_};
 631        }
 632        if (exists $add->{$dir}) {
 633                push @ret, keys %{$add->{$dir}};
 634        }
 635        return \@ret;
 636}
 637
 638sub handle_rmdir {
 639        my ($rm, $add) = @_;
 640
 641        foreach my $dir (sort {length $b <=> length $a} keys %$rm) {
 642                my $ls = svn_ls_current($dir, $rm, $add);
 643                next if (scalar @$ls);
 644                sys(qw(svn rm --force),$dir);
 645
 646                my $dn = dirname $dir;
 647                $rm->{ $dn }->{ basename $dir } = 1;
 648                $ls = svn_ls_current($dn, $rm, $add);
 649                while (scalar @$ls == 0 && $dn ne File::Spec->curdir) {
 650                        sys(qw(svn rm --force),$dn);
 651                        $dir = basename $dn;
 652                        $dn = dirname $dn;
 653                        $rm->{ $dn }->{ $dir } = 1;
 654                        $ls = svn_ls_current($dn, $rm, $add);
 655                }
 656        }
 657}
 658
 659sub svn_commit_tree {
 660        my ($svn_rev, $commit) = @_;
 661        my $commit_msg = "$GIT_DIR/$GIT_SVN/.svn-commit.tmp.$$";
 662        my %log_msg = ( msg => '' );
 663        open my $msg, '>', $commit_msg or croak $!;
 664
 665        chomp(my $type = `git-cat-file -t $commit`);
 666        if ($type eq 'commit') {
 667                my $pid = open my $msg_fh, '-|';
 668                defined $pid or croak $!;
 669
 670                if ($pid == 0) {
 671                        exec(qw(git-cat-file commit), $commit) or croak $!;
 672                }
 673                my $in_msg = 0;
 674                while (<$msg_fh>) {
 675                        if (!$in_msg) {
 676                                $in_msg = 1 if (/^\s*$/);
 677                        } elsif (/^git-svn-id: /) {
 678                                # skip this, we regenerate the correct one
 679                                # on re-fetch anyways
 680                        } else {
 681                                print $msg $_ or croak $!;
 682                        }
 683                }
 684                close $msg_fh or croak $!;
 685        }
 686        close $msg or croak $!;
 687
 688        if ($_edit || ($type eq 'tree')) {
 689                my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi';
 690                system($editor, $commit_msg);
 691        }
 692
 693        # file_to_s removes all trailing newlines, so just use chomp() here:
 694        open $msg, '<', $commit_msg or croak $!;
 695        { local $/; chomp($log_msg{msg} = <$msg>); }
 696        close $msg or croak $!;
 697
 698        my ($oneline) = ($log_msg{msg} =~ /([^\n\r]+)/);
 699        print "Committing $commit: $oneline\n";
 700
 701        my @ci_output = safe_qx(qw(svn commit -F),$commit_msg);
 702        my ($committed) = grep(/^Committed revision \d+\./,@ci_output);
 703        unlink $commit_msg;
 704        defined $committed or croak
 705                        "Commit output failed to parse committed revision!\n",
 706                        join("\n",@ci_output),"\n";
 707        my ($rev_committed) = ($committed =~ /^Committed revision (\d+)\./);
 708
 709        my @svn_up = qw(svn up);
 710        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 711        if ($rev_committed == ($svn_rev + 1)) {
 712                push @svn_up, "-r$rev_committed";
 713                sys(@svn_up);
 714                my $info = svn_info('.');
 715                my $date = $info->{'Last Changed Date'} or die "Missing date\n";
 716                if ($info->{'Last Changed Rev'} != $rev_committed) {
 717                        croak "$info->{'Last Changed Rev'} != $rev_committed\n"
 718                }
 719                my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 720                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 721                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 722                                         or croak "Failed to parse date: $date\n";
 723                $log_msg{date} = "$tz $Y-$m-$d $H:$M:$S";
 724                $log_msg{author} = $info->{'Last Changed Author'};
 725                $log_msg{revision} = $rev_committed;
 726                $log_msg{msg} .= "\n";
 727                my $parent = file_to_s("$REV_DIR/$svn_rev");
 728                git_commit(\%log_msg, $parent, $commit);
 729                return $rev_committed;
 730        }
 731        # resync immediately
 732        push @svn_up, "-r$svn_rev";
 733        sys(@svn_up);
 734        return fetch("$rev_committed=$commit")->{revision};
 735}
 736
 737# read the entire log into a temporary file (which is removed ASAP)
 738# and store the file handle + parser state
 739sub svn_log_raw {
 740        my (@log_args) = @_;
 741        my $log_fh = IO::File->new_tmpfile or croak $!;
 742        my $pid = fork;
 743        defined $pid or croak $!;
 744        if (!$pid) {
 745                open STDOUT, '>&', $log_fh or croak $!;
 746                exec (qw(svn log), @log_args) or croak $!
 747        }
 748        waitpid $pid, 0;
 749        croak if $?;
 750        seek $log_fh, 0, 0 or croak $!;
 751        return { state => 'sep', fh => $log_fh };
 752}
 753
 754sub next_log_entry {
 755        my $log = shift; # retval of svn_log_raw()
 756        my $ret = undef;
 757        my $fh = $log->{fh};
 758
 759        while (<$fh>) {
 760                chomp;
 761                if (/^\-{72}$/) {
 762                        if ($log->{state} eq 'msg') {
 763                                if ($ret->{lines}) {
 764                                        $ret->{msg} .= $_."\n";
 765                                        unless(--$ret->{lines}) {
 766                                                $log->{state} = 'sep';
 767                                        }
 768                                } else {
 769                                        croak "Log parse error at: $_\n",
 770                                                $ret->{revision},
 771                                                "\n";
 772                                }
 773                                next;
 774                        }
 775                        if ($log->{state} ne 'sep') {
 776                                croak "Log parse error at: $_\n",
 777                                        "state: $log->{state}\n",
 778                                        $ret->{revision},
 779                                        "\n";
 780                        }
 781                        $log->{state} = 'rev';
 782
 783                        # if we have an empty log message, put something there:
 784                        if ($ret) {
 785                                $ret->{msg} ||= "\n";
 786                                delete $ret->{lines};
 787                                return $ret;
 788                        }
 789                        next;
 790                }
 791                if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
 792                        my $rev = $1;
 793                        my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
 794                        ($lines) = ($lines =~ /(\d+)/);
 795                        my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 796                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 797                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 798                                         or croak "Failed to parse date: $date\n";
 799                        $ret = {        revision => $rev,
 800                                        date => "$tz $Y-$m-$d $H:$M:$S",
 801                                        author => $author,
 802                                        lines => $lines,
 803                                        msg => '' };
 804                        if (defined $_authors && ! defined $users{$author}) {
 805                                die "Author: $author not defined in ",
 806                                                "$_authors file\n";
 807                        }
 808                        $log->{state} = 'msg_start';
 809                        next;
 810                }
 811                # skip the first blank line of the message:
 812                if ($log->{state} eq 'msg_start' && /^$/) {
 813                        $log->{state} = 'msg';
 814                } elsif ($log->{state} eq 'msg') {
 815                        if ($ret->{lines}) {
 816                                $ret->{msg} .= $_."\n";
 817                                unless (--$ret->{lines}) {
 818                                        $log->{state} = 'sep';
 819                                }
 820                        } else {
 821                                croak "Log parse error at: $_\n",
 822                                        $ret->{revision},"\n";
 823                        }
 824                }
 825        }
 826        return $ret;
 827}
 828
 829sub svn_info {
 830        my $url = shift || $SVN_URL;
 831
 832        my $pid = open my $info_fh, '-|';
 833        defined $pid or croak $!;
 834
 835        if ($pid == 0) {
 836                exec(qw(svn info),$url) or croak $!;
 837        }
 838
 839        my $ret = {};
 840        # only single-lines seem to exist in svn info output
 841        while (<$info_fh>) {
 842                chomp $_;
 843                if (m#^([^:]+)\s*:\s*(\S.*)$#) {
 844                        $ret->{$1} = $2;
 845                        push @{$ret->{-order}}, $1;
 846                }
 847        }
 848        close $info_fh or croak $!;
 849        return $ret;
 850}
 851
 852sub sys { system(@_) == 0 or croak $? }
 853
 854sub git_addremove {
 855        system( "git-diff-files --name-only -z ".
 856                                " | git-update-index --remove -z --stdin && ".
 857                "git-ls-files -z --others ".
 858                        "'--exclude-from=$GIT_DIR/$GIT_SVN/info/exclude'".
 859                                " | git-update-index --add -z --stdin"
 860                ) == 0 or croak $?
 861}
 862
 863sub s_to_file {
 864        my ($str, $file, $mode) = @_;
 865        open my $fd,'>',$file or croak $!;
 866        print $fd $str,"\n" or croak $!;
 867        close $fd or croak $!;
 868        chmod ($mode &~ umask, $file) if (defined $mode);
 869}
 870
 871sub file_to_s {
 872        my $file = shift;
 873        open my $fd,'<',$file or croak "$!: file: $file\n";
 874        local $/;
 875        my $ret = <$fd>;
 876        close $fd or croak $!;
 877        $ret =~ s/\s*$//s;
 878        return $ret;
 879}
 880
 881sub assert_revision_unknown {
 882        my $revno = shift;
 883        if (-f "$REV_DIR/$revno") {
 884                croak "$REV_DIR/$revno already exists! ",
 885                                "Why are we refetching it?";
 886        }
 887}
 888
 889sub trees_eq {
 890        my ($x, $y) = @_;
 891        my @x = safe_qx('git-cat-file','commit',$x);
 892        my @y = safe_qx('git-cat-file','commit',$y);
 893        if (($y[0] ne $x[0]) || $x[0] !~ /^tree $sha1\n$/
 894                                || $y[0] !~ /^tree $sha1\n$/) {
 895                print STDERR "Trees not equal: $y[0] != $x[0]\n";
 896                return 0
 897        }
 898        return 1;
 899}
 900
 901sub assert_revision_eq_or_unknown {
 902        my ($revno, $commit) = @_;
 903        if (-f "$REV_DIR/$revno") {
 904                my $current = file_to_s("$REV_DIR/$revno");
 905                if (($commit ne $current) && !trees_eq($commit, $current)) {
 906                        croak "$REV_DIR/$revno already exists!\n",
 907                                "current: $current\nexpected: $commit\n";
 908                }
 909                return;
 910        }
 911}
 912
 913sub git_commit {
 914        my ($log_msg, @parents) = @_;
 915        assert_revision_unknown($log_msg->{revision});
 916        my $out_fh = IO::File->new_tmpfile or croak $!;
 917
 918        map_tree_joins() if (@_branch_from && !%tree_map);
 919
 920        # commit parents can be conditionally bound to a particular
 921        # svn revision via: "svn_revno=commit_sha1", filter them out here:
 922        my @exec_parents;
 923        foreach my $p (@parents) {
 924                next unless defined $p;
 925                if ($p =~ /^(\d+)=($sha1_short)$/o) {
 926                        if ($1 == $log_msg->{revision}) {
 927                                push @exec_parents, $2;
 928                        }
 929                } else {
 930                        push @exec_parents, $p if $p =~ /$sha1_short/o;
 931                }
 932        }
 933
 934        my $pid = fork;
 935        defined $pid or croak $!;
 936        if ($pid == 0) {
 937                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 938                git_addremove();
 939                chomp(my $tree = `git-write-tree`);
 940                croak if $?;
 941                if (exists $tree_map{$tree}) {
 942                        my %seen_parent = map { $_ => 1 } @exec_parents;
 943                        foreach (@{$tree_map{$tree}}) {
 944                                # MAXPARENT is defined to 16 in commit-tree.c:
 945                                if ($seen_parent{$_} || @exec_parents > 16) {
 946                                        next;
 947                                }
 948                                push @exec_parents, $_;
 949                                $seen_parent{$_} = 1;
 950                        }
 951                }
 952                my $msg_fh = IO::File->new_tmpfile or croak $!;
 953                print $msg_fh $log_msg->{msg}, "\ngit-svn-id: ",
 954                                        "$SVN_URL\@$log_msg->{revision}",
 955                                        " $SVN_UUID\n" or croak $!;
 956                $msg_fh->flush == 0 or croak $!;
 957                seek $msg_fh, 0, 0 or croak $!;
 958
 959                set_commit_env($log_msg);
 960
 961                my @exec = ('git-commit-tree',$tree);
 962                push @exec, '-p', $_  foreach @exec_parents;
 963                open STDIN, '<&', $msg_fh or croak $!;
 964                open STDOUT, '>&', $out_fh or croak $!;
 965                exec @exec or croak $!;
 966        }
 967        waitpid($pid,0);
 968        croak if $?;
 969
 970        $out_fh->flush == 0 or croak $!;
 971        seek $out_fh, 0, 0 or croak $!;
 972        chomp(my $commit = do { local $/; <$out_fh> });
 973        if ($commit !~ /^$sha1$/o) {
 974                croak "Failed to commit, invalid sha1: $commit\n";
 975        }
 976        my @update_ref = ('git-update-ref',"refs/remotes/$GIT_SVN",$commit);
 977        if (my $primary_parent = shift @exec_parents) {
 978                $pid = fork;
 979                defined $pid or croak $!;
 980                if (!$pid) {
 981                        close STDERR;
 982                        close STDOUT;
 983                        exec 'git-rev-parse','--verify',
 984                                                "refs/remotes/$GIT_SVN^0";
 985                }
 986                waitpid $pid, 0;
 987                push @update_ref, $primary_parent unless $?;
 988        }
 989        sys(@update_ref);
 990        sys('git-update-ref',"$GIT_SVN/revs/$log_msg->{revision}",$commit);
 991        print "r$log_msg->{revision} = $commit\n";
 992        return $commit;
 993}
 994
 995sub set_commit_env {
 996        my ($log_msg) = @_;
 997        my $author = $log_msg->{author};
 998        my ($name,$email) = defined $users{$author} ?  @{$users{$author}}
 999                                : ($author,"$author\@$SVN_UUID");
1000        $ENV{GIT_AUTHOR_NAME} = $ENV{GIT_COMMITTER_NAME} = $name;
1001        $ENV{GIT_AUTHOR_EMAIL} = $ENV{GIT_COMMITTER_EMAIL} = $email;
1002        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_msg->{date};
1003}
1004
1005sub apply_mod_line_blob {
1006        my $m = shift;
1007        if ($m->{mode_b} =~ /^120/) {
1008                blob_to_symlink($m->{sha1_b}, $m->{file_b});
1009        } else {
1010                blob_to_file($m->{sha1_b}, $m->{file_b});
1011        }
1012}
1013
1014sub blob_to_symlink {
1015        my ($blob, $link) = @_;
1016        defined $link or croak "\$link not defined!\n";
1017        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
1018        if (-l $link || -f _) {
1019                unlink $link or croak $!;
1020        }
1021
1022        my $dest = `git-cat-file blob $blob`; # no newline, so no chomp
1023        symlink $dest, $link or croak $!;
1024}
1025
1026sub blob_to_file {
1027        my ($blob, $file) = @_;
1028        defined $file or croak "\$file not defined!\n";
1029        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
1030        if (-l $file || -f _) {
1031                unlink $file or croak $!;
1032        }
1033
1034        open my $blob_fh, '>', $file or croak "$!: $file\n";
1035        my $pid = fork;
1036        defined $pid or croak $!;
1037
1038        if ($pid == 0) {
1039                open STDOUT, '>&', $blob_fh or croak $!;
1040                exec('git-cat-file','blob',$blob);
1041        }
1042        waitpid $pid, 0;
1043        croak $? if $?;
1044
1045        close $blob_fh or croak $!;
1046}
1047
1048sub safe_qx {
1049        my $pid = open my $child, '-|';
1050        defined $pid or croak $!;
1051        if ($pid == 0) {
1052                exec(@_) or croak $?;
1053        }
1054        my @ret = (<$child>);
1055        close $child or croak $?;
1056        die $? if $?; # just in case close didn't error out
1057        return wantarray ? @ret : join('',@ret);
1058}
1059
1060sub svn_compat_check {
1061        my @co_help = safe_qx(qw(svn co -h));
1062        unless (grep /ignore-externals/,@co_help) {
1063                print STDERR "W: Installed svn version does not support ",
1064                                "--ignore-externals\n";
1065                $_no_ignore_ext = 1;
1066        }
1067        if (grep /usage: checkout URL\[\@REV\]/,@co_help) {
1068                $_svn_co_url_revs = 1;
1069        }
1070
1071        # I really, really hope nobody hits this...
1072        unless (grep /stop-on-copy/, (safe_qx(qw(svn log -h)))) {
1073                print STDERR <<'';
1074W: The installed svn version does not support the --stop-on-copy flag in
1075   the log command.
1076   Lets hope the directory you're tracking is not a branch or tag
1077   and was never moved within the repository...
1078
1079                $_no_stop_copy = 1;
1080        }
1081}
1082
1083# *sigh*, new versions of svn won't honor -r<rev> without URL@<rev>,
1084# (and they won't honor URL@<rev> without -r<rev>, too!)
1085sub svn_cmd_checkout {
1086        my ($url, $rev, $dir) = @_;
1087        my @cmd = ('svn','co', "-r$rev");
1088        push @cmd, '--ignore-externals' unless $_no_ignore_ext;
1089        $url .= "\@$rev" if $_svn_co_url_revs;
1090        sys(@cmd, $url, $dir);
1091}
1092
1093sub check_upgrade_needed {
1094        my $old = eval {
1095                my $pid = open my $child, '-|';
1096                defined $pid or croak $!;
1097                if ($pid == 0) {
1098                        close STDERR;
1099                        exec('git-rev-parse',"$GIT_SVN-HEAD") or croak $?;
1100                }
1101                my @ret = (<$child>);
1102                close $child or croak $?;
1103                die $? if $?; # just in case close didn't error out
1104                return wantarray ? @ret : join('',@ret);
1105        };
1106        return unless $old;
1107        my $head = eval { safe_qx('git-rev-parse',"refs/remotes/$GIT_SVN") };
1108        if ($@ || !$head) {
1109                print STDERR "Please run: $0 rebuild --upgrade\n";
1110                exit 1;
1111        }
1112}
1113
1114# fills %tree_map with a reverse mapping of trees to commits.  Useful
1115# for finding parents to commit on.
1116sub map_tree_joins {
1117        foreach my $br (@_branch_from) {
1118                my $pid = open my $pipe, '-|';
1119                defined $pid or croak $!;
1120                if ($pid == 0) {
1121                        exec(qw(git-rev-list --pretty=raw), $br) or croak $?;
1122                }
1123                while (<$pipe>) {
1124                        if (/^commit ($sha1)$/o) {
1125                                my $commit = $1;
1126                                my ($tree) = (<$pipe> =~ /^tree ($sha1)$/o);
1127                                unless (defined $tree) {
1128                                        die "Failed to parse commit $commit\n";
1129                                }
1130                                push @{$tree_map{$tree}}, $commit;
1131                        }
1132                }
1133                close $pipe or croak $?;
1134        }
1135}
1136
1137# '<svn username> = real-name <email address>' mapping based on git-svnimport:
1138sub load_authors {
1139        open my $authors, '<', $_authors or die "Can't open $_authors $!\n";
1140        while (<$authors>) {
1141                chomp;
1142                next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
1143                my ($user, $name, $email) = ($1, $2, $3);
1144                $users{$user} = [$name, $email];
1145        }
1146        close $authors or croak $!;
1147}
1148
1149__END__
1150
1151Data structures:
1152
1153$svn_log hashref (as returned by svn_log_raw)
1154{
1155        fh => file handle of the log file,
1156        state => state of the log file parser (sep/msg/rev/msg_start...)
1157}
1158
1159$log_msg hashref as returned by next_log_entry($svn_log)
1160{
1161        msg => 'whitespace-formatted log entry
1162',                                              # trailing newline is preserved
1163        revision => '8',                        # integer
1164        date => '2004-02-24T17:01:44.108345Z',  # commit date
1165        author => 'committer name'
1166};
1167
1168
1169@mods = array of diff-index line hashes, each element represents one line
1170        of diff-index output
1171
1172diff-index line ($m hash)
1173{
1174        mode_a => first column of diff-index output, no leading ':',
1175        mode_b => second column of diff-index output,
1176        sha1_b => sha1sum of the final blob,
1177        chg => change type [MCRADT],
1178        file_a => original file name of a file (iff chg is 'C' or 'R')
1179        file_b => new/current file name of a file (any chg)
1180}
1181;