contrib / git-svn / git-svn.perlon commit More accurately detect header lines in read_one_header_line (f30b202)
   1#!/usr/bin/env perl
   2# Copyright (C) 2006, Eric Wong <normalperson@yhbt.net>
   3# License: GPL v2 or later
   4use warnings;
   5use strict;
   6use vars qw/    $AUTHOR $VERSION
   7                $SVN_URL $SVN_INFO $SVN_WC $SVN_UUID
   8                $GIT_SVN_INDEX $GIT_SVN
   9                $GIT_DIR $REV_DIR/;
  10$AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
  11$VERSION = '1.0.0';
  12
  13use Cwd qw/abs_path/;
  14$GIT_DIR = abs_path($ENV{GIT_DIR} || '.git');
  15$ENV{GIT_DIR} = $GIT_DIR;
  16
  17# make sure the svn binary gives consistent output between locales and TZs:
  18$ENV{TZ} = 'UTC';
  19$ENV{LC_ALL} = 'C';
  20
  21# If SVN:: library support is added, please make the dependencies
  22# optional and preserve the capability to use the command-line client.
  23# use eval { require SVN::... } to make it lazy load
  24# We don't use any modules not in the standard Perl distribution:
  25use Carp qw/croak/;
  26use IO::File qw//;
  27use File::Basename qw/dirname basename/;
  28use File::Path qw/mkpath/;
  29use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
  30use File::Spec qw//;
  31use POSIX qw/strftime/;
  32my $sha1 = qr/[a-f\d]{40}/;
  33my $sha1_short = qr/[a-f\d]{4,40}/;
  34my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
  35        $_find_copies_harder, $_l, $_version, $_upgrade, $_authors);
  36my (@_branch_from, %tree_map, %users);
  37my $_svn_co_url_revs;
  38
  39my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext,
  40                'branch|b=s' => \@_branch_from,
  41                'authors-file|A=s' => \$_authors );
  42my %cmd = (
  43        fetch => [ \&fetch, "Download new revisions from SVN",
  44                        { 'revision|r=s' => \$_revision, %fc_opts } ],
  45        init => [ \&init, "Initialize a repo for tracking" .
  46                          " (requires URL argument)", { } ],
  47        commit => [ \&commit, "Commit git revisions to SVN",
  48                        {       'stdin|' => \$_stdin,
  49                                'edit|e' => \$_edit,
  50                                'rmdir' => \$_rmdir,
  51                                'find-copies-harder' => \$_find_copies_harder,
  52                                'l=i' => \$_l,
  53                                %fc_opts,
  54                        } ],
  55        'show-ignore' => [ \&show_ignore, "Show svn:ignore listings", { } ],
  56        rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
  57                        { 'no-ignore-externals' => \$_no_ignore_ext,
  58                          'upgrade' => \$_upgrade } ],
  59);
  60my $cmd;
  61for (my $i = 0; $i < @ARGV; $i++) {
  62        if (defined $cmd{$ARGV[$i]}) {
  63                $cmd = $ARGV[$i];
  64                splice @ARGV, $i, 1;
  65                last;
  66        }
  67};
  68
  69my %opts = %{$cmd{$cmd}->[2]} if (defined $cmd);
  70
  71# convert GetOpt::Long specs for use by git-repo-config
  72foreach my $o (keys %opts) {
  73        my $v = $opts{$o};
  74        my ($key) = ($o =~ /^([a-z\-]+)/);
  75        $key =~ s/-//g;
  76        my $arg = 'git-repo-config';
  77        $arg .= ' --int' if ($o =~ /=i$/);
  78        $arg .= ' --bool' if ($o !~ /=[sfi]$/);
  79        if (ref $v eq 'ARRAY') {
  80                chomp(my @tmp = `$arg --get-all svn.$key`);
  81                @$v = @tmp if @tmp;
  82        } else {
  83                chomp(my $tmp = `$arg --get svn.$key`);
  84                if ($tmp && !($arg =~ / --bool / && $tmp eq 'false')) {
  85                        $$v = $tmp;
  86                }
  87        }
  88}
  89
  90GetOptions(%opts, 'help|H|h' => \$_help,
  91                'version|V' => \$_version,
  92                'id|i=s' => \$GIT_SVN) or exit 1;
  93
  94$GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn';
  95$GIT_SVN_INDEX = "$GIT_DIR/$GIT_SVN/index";
  96$SVN_URL = undef;
  97$REV_DIR = "$GIT_DIR/$GIT_SVN/revs";
  98$SVN_WC = "$GIT_DIR/$GIT_SVN/tree";
  99
 100usage(0) if $_help;
 101version() if $_version;
 102usage(1) unless defined $cmd;
 103load_authors() if $_authors;
 104svn_compat_check();
 105$cmd{$cmd}->[0]->(@ARGV);
 106exit 0;
 107
 108####################### primary functions ######################
 109sub usage {
 110        my $exit = shift || 0;
 111        my $fd = $exit ? \*STDERR : \*STDOUT;
 112        print $fd <<"";
 113git-svn - bidirectional operations between a single Subversion tree and git
 114Usage: $0 <command> [options] [arguments]\n
 115
 116        print $fd "Available commands:\n" unless $cmd;
 117
 118        foreach (sort keys %cmd) {
 119                next if $cmd && $cmd ne $_;
 120                print $fd '  ',pack('A13',$_),$cmd{$_}->[1],"\n";
 121                foreach (keys %{$cmd{$_}->[2]}) {
 122                        # prints out arguments as they should be passed:
 123                        my $x = s#=s$## ? '<arg>' : s#=i$## ? '<num>' : '';
 124                        print $fd ' ' x 17, join(', ', map { length $_ > 1 ?
 125                                                        "--$_" : "-$_" }
 126                                                split /\|/,$_)," $x\n";
 127                }
 128        }
 129        print $fd <<"";
 130\nGIT_SVN_ID may be set in the environment or via the --id/-i switch to an
 131arbitrary identifier if you're tracking multiple SVN branches/repositories in
 132one git repository and want to keep them separate.  See git-svn(1) for more
 133information.
 134
 135        exit $exit;
 136}
 137
 138sub version {
 139        print "git-svn version $VERSION\n";
 140        exit 0;
 141}
 142
 143sub rebuild {
 144        $SVN_URL = shift or undef;
 145        my $newest_rev = 0;
 146        if ($_upgrade) {
 147                sys('git-update-ref',"refs/remotes/$GIT_SVN","$GIT_SVN-HEAD");
 148        } else {
 149                check_upgrade_needed();
 150        }
 151
 152        my $pid = open(my $rev_list,'-|');
 153        defined $pid or croak $!;
 154        if ($pid == 0) {
 155                exec("git-rev-list","refs/remotes/$GIT_SVN") or croak $!;
 156        }
 157        my $latest;
 158        while (<$rev_list>) {
 159                chomp;
 160                my $c = $_;
 161                croak "Non-SHA1: $c\n" unless $c =~ /^$sha1$/o;
 162                my @commit = grep(/^git-svn-id: /,`git-cat-file commit $c`);
 163                next if (!@commit); # skip merges
 164                my $id = $commit[$#commit];
 165                my ($url, $rev, $uuid) = ($id =~ /^git-svn-id:\s(\S+?)\@(\d+)
 166                                                \s([a-f\d\-]+)$/x);
 167                if (!$rev || !$uuid || !$url) {
 168                        # some of the original repositories I made had
 169                        # indentifiers like this:
 170                        ($rev, $uuid) = ($id =~/^git-svn-id:\s(\d+)
 171                                                        \@([a-f\d\-]+)/x);
 172                        if (!$rev || !$uuid) {
 173                                croak "Unable to extract revision or UUID from ",
 174                                        "$c, $id\n";
 175                        }
 176                }
 177
 178                # if we merged or otherwise started elsewhere, this is
 179                # how we break out of it
 180                next if (defined $SVN_UUID && ($uuid ne $SVN_UUID));
 181                next if (defined $SVN_URL && defined $url && ($url ne $SVN_URL));
 182
 183                print "r$rev = $c\n";
 184                unless (defined $latest) {
 185                        if (!$SVN_URL && !$url) {
 186                                croak "SVN repository location required: $url\n";
 187                        }
 188                        $SVN_URL ||= $url;
 189                        $SVN_UUID ||= $uuid;
 190                        setup_git_svn();
 191                        $latest = $rev;
 192                }
 193                assert_revision_eq_or_unknown($rev, $c);
 194                sys('git-update-ref',"$GIT_SVN/revs/$rev",$c);
 195                $newest_rev = $rev if ($rev > $newest_rev);
 196        }
 197        close $rev_list or croak $?;
 198        if (!chdir $SVN_WC) {
 199                svn_cmd_checkout($SVN_URL, $latest, $SVN_WC);
 200                chdir $SVN_WC or croak $!;
 201        }
 202
 203        $pid = fork;
 204        defined $pid or croak $!;
 205        if ($pid == 0) {
 206                my @svn_up = qw(svn up);
 207                push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 208                sys(@svn_up,"-r$newest_rev");
 209                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 210                git_addremove();
 211                exec('git-write-tree');
 212        }
 213        waitpid $pid, 0;
 214
 215        if ($_upgrade) {
 216                print STDERR <<"";
 217Keeping deprecated refs/head/$GIT_SVN-HEAD for now.  Please remove it
 218when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
 219
 220        }
 221}
 222
 223sub init {
 224        $SVN_URL = shift or die "SVN repository location required " .
 225                                "as a command-line argument\n";
 226        unless (-d $GIT_DIR) {
 227                sys('git-init-db');
 228        }
 229        setup_git_svn();
 230}
 231
 232sub fetch {
 233        my (@parents) = @_;
 234        check_upgrade_needed();
 235        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 236        my @log_args = -d $SVN_WC ? ($SVN_WC) : ($SVN_URL);
 237        unless ($_revision) {
 238                $_revision = -d $SVN_WC ? 'BASE:HEAD' : '0:HEAD';
 239        }
 240        push @log_args, "-r$_revision";
 241        push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 242
 243        my $svn_log = svn_log_raw(@log_args);
 244
 245        my $base = next_log_entry($svn_log) or croak "No base revision!\n";
 246        my $last_commit = undef;
 247        unless (-d $SVN_WC) {
 248                svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
 249                chdir $SVN_WC or croak $!;
 250                read_uuid();
 251                $last_commit = git_commit($base, @parents);
 252                assert_svn_wc_clean($base->{revision}, $last_commit);
 253        } else {
 254                chdir $SVN_WC or croak $!;
 255                read_uuid();
 256                $last_commit = file_to_s("$REV_DIR/$base->{revision}");
 257        }
 258        my @svn_up = qw(svn up);
 259        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 260        my $last = $base;
 261        while (my $log_msg = next_log_entry($svn_log)) {
 262                assert_svn_wc_clean($last->{revision}, $last_commit);
 263                if ($last->{revision} >= $log_msg->{revision}) {
 264                        croak "Out of order: last >= current: ",
 265                                "$last->{revision} >= $log_msg->{revision}\n";
 266                }
 267                sys(@svn_up,"-r$log_msg->{revision}");
 268                $last_commit = git_commit($log_msg, $last_commit, @parents);
 269                $last = $log_msg;
 270        }
 271        assert_svn_wc_clean($last->{revision}, $last_commit);
 272        unless (-e "$GIT_DIR/refs/heads/master") {
 273                sys(qw(git-update-ref refs/heads/master),$last_commit);
 274        }
 275        return $last;
 276}
 277
 278sub commit {
 279        my (@commits) = @_;
 280        check_upgrade_needed();
 281        if ($_stdin || !@commits) {
 282                print "Reading from stdin...\n";
 283                @commits = ();
 284                while (<STDIN>) {
 285                        if (/\b($sha1_short)\b/o) {
 286                                unshift @commits, $1;
 287                        }
 288                }
 289        }
 290        my @revs;
 291        foreach my $c (@commits) {
 292                chomp(my @tmp = safe_qx('git-rev-parse',$c));
 293                if (scalar @tmp == 1) {
 294                        push @revs, $tmp[0];
 295                } elsif (scalar @tmp > 1) {
 296                        push @revs, reverse (safe_qx('git-rev-list',@tmp));
 297                } else {
 298                        die "Failed to rev-parse $c\n";
 299                }
 300        }
 301        chomp @revs;
 302
 303        fetch();
 304        chdir $SVN_WC or croak $!;
 305        my $info = svn_info('.');
 306        read_uuid($info);
 307        my $svn_current_rev =  $info->{'Last Changed Rev'};
 308        foreach my $c (@revs) {
 309                my $mods = svn_checkout_tree($svn_current_rev, $c);
 310                if (scalar @$mods == 0) {
 311                        print "Skipping, no changes detected\n";
 312                        next;
 313                }
 314                $svn_current_rev = svn_commit_tree($svn_current_rev, $c);
 315        }
 316        print "Done committing ",scalar @revs," revisions to SVN\n";
 317
 318}
 319
 320sub show_ignore {
 321        require File::Find or die $!;
 322        my $exclude_file = "$GIT_DIR/info/exclude";
 323        open my $fh, '<', $exclude_file or croak $!;
 324        chomp(my @excludes = (<$fh>));
 325        close $fh or croak $!;
 326
 327        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 328        chdir $SVN_WC or croak $!;
 329        my %ign;
 330        File::Find::find({wanted=>sub{if(lstat $_ && -d _ && -d "$_/.svn"){
 331                s#^\./##;
 332                @{$ign{$_}} = safe_qx(qw(svn propget svn:ignore),$_);
 333                }}, no_chdir=>1},'.');
 334
 335        print "\n# /\n";
 336        foreach (@{$ign{'.'}}) { print '/',$_ if /\S/ }
 337        delete $ign{'.'};
 338        foreach my $i (sort keys %ign) {
 339                print "\n# ",$i,"\n";
 340                foreach (@{$ign{$i}}) { print '/',$i,'/',$_ if /\S/ }
 341        }
 342}
 343
 344########################### utility functions #########################
 345
 346sub read_uuid {
 347        return if $SVN_UUID;
 348        my $info = shift || svn_info('.');
 349        $SVN_UUID = $info->{'Repository UUID'} or
 350                                        croak "Repository UUID unreadable\n";
 351        s_to_file($SVN_UUID,"$GIT_DIR/$GIT_SVN/info/uuid");
 352}
 353
 354sub setup_git_svn {
 355        defined $SVN_URL or croak "SVN repository location required\n";
 356        unless (-d $GIT_DIR) {
 357                croak "GIT_DIR=$GIT_DIR does not exist!\n";
 358        }
 359        mkpath(["$GIT_DIR/$GIT_SVN"]);
 360        mkpath(["$GIT_DIR/$GIT_SVN/info"]);
 361        mkpath([$REV_DIR]);
 362        s_to_file($SVN_URL,"$GIT_DIR/$GIT_SVN/info/url");
 363
 364        open my $fd, '>>', "$GIT_DIR/$GIT_SVN/info/exclude" or croak $!;
 365        print $fd '.svn',"\n";
 366        close $fd or croak $!;
 367}
 368
 369sub assert_svn_wc_clean {
 370        my ($svn_rev, $treeish) = @_;
 371        croak "$svn_rev is not an integer!\n" unless ($svn_rev =~ /^\d+$/);
 372        croak "$treeish is not a sha1!\n" unless ($treeish =~ /^$sha1$/o);
 373        my $lcr = svn_info('.')->{'Last Changed Rev'};
 374        if ($svn_rev != $lcr) {
 375                print STDERR "Checking for copy-tree ... ";
 376                # use
 377                my @diff = grep(/^Index: /,(safe_qx(qw(svn diff),
 378                                                "-r$lcr:$svn_rev")));
 379                if (@diff) {
 380                        croak "Nope!  Expected r$svn_rev, got r$lcr\n";
 381                } else {
 382                        print STDERR "OK!\n";
 383                }
 384        }
 385        my @status = grep(!/^Performing status on external/,(`svn status`));
 386        @status = grep(!/^\s*$/,@status);
 387        if (scalar @status) {
 388                print STDERR "Tree ($SVN_WC) is not clean:\n";
 389                print STDERR $_ foreach @status;
 390                croak;
 391        }
 392        assert_tree($treeish);
 393}
 394
 395sub assert_tree {
 396        my ($treeish) = @_;
 397        croak "Not a sha1: $treeish\n" unless $treeish =~ /^$sha1$/o;
 398        chomp(my $type = `git-cat-file -t $treeish`);
 399        my $expected;
 400        while ($type eq 'tag') {
 401                chomp(($treeish, $type) = `git-cat-file tag $treeish`);
 402        }
 403        if ($type eq 'commit') {
 404                $expected = (grep /^tree /,`git-cat-file commit $treeish`)[0];
 405                ($expected) = ($expected =~ /^tree ($sha1)$/);
 406                die "Unable to get tree from $treeish\n" unless $expected;
 407        } elsif ($type eq 'tree') {
 408                $expected = $treeish;
 409        } else {
 410                die "$treeish is a $type, expected tree, tag or commit\n";
 411        }
 412
 413        my $old_index = $ENV{GIT_INDEX_FILE};
 414        my $tmpindex = $GIT_SVN_INDEX.'.assert-tmp';
 415        if (-e $tmpindex) {
 416                unlink $tmpindex or croak $!;
 417        }
 418        $ENV{GIT_INDEX_FILE} = $tmpindex;
 419        git_addremove();
 420        chomp(my $tree = `git-write-tree`);
 421        if ($old_index) {
 422                $ENV{GIT_INDEX_FILE} = $old_index;
 423        } else {
 424                delete $ENV{GIT_INDEX_FILE};
 425        }
 426        if ($tree ne $expected) {
 427                croak "Tree mismatch, Got: $tree, Expected: $expected\n";
 428        }
 429}
 430
 431sub parse_diff_tree {
 432        my $diff_fh = shift;
 433        local $/ = "\0";
 434        my $state = 'meta';
 435        my @mods;
 436        while (<$diff_fh>) {
 437                chomp $_; # this gets rid of the trailing "\0"
 438                if ($state eq 'meta' && /^:(\d{6})\s(\d{6})\s
 439                                        $sha1\s($sha1)\s([MTCRAD])\d*$/xo) {
 440                        push @mods, {   mode_a => $1, mode_b => $2,
 441                                        sha1_b => $3, chg => $4 };
 442                        if ($4 =~ /^(?:C|R)$/) {
 443                                $state = 'file_a';
 444                        } else {
 445                                $state = 'file_b';
 446                        }
 447                } elsif ($state eq 'file_a') {
 448                        my $x = $mods[$#mods] or croak "Empty array\n";
 449                        if ($x->{chg} !~ /^(?:C|R)$/) {
 450                                croak "Error parsing $_, $x->{chg}\n";
 451                        }
 452                        $x->{file_a} = $_;
 453                        $state = 'file_b';
 454                } elsif ($state eq 'file_b') {
 455                        my $x = $mods[$#mods] or croak "Empty array\n";
 456                        if (exists $x->{file_a} && $x->{chg} !~ /^(?:C|R)$/) {
 457                                croak "Error parsing $_, $x->{chg}\n";
 458                        }
 459                        if (!exists $x->{file_a} && $x->{chg} =~ /^(?:C|R)$/) {
 460                                croak "Error parsing $_, $x->{chg}\n";
 461                        }
 462                        $x->{file_b} = $_;
 463                        $state = 'meta';
 464                } else {
 465                        croak "Error parsing $_\n";
 466                }
 467        }
 468        close $diff_fh or croak $!;
 469
 470        return \@mods;
 471}
 472
 473sub svn_check_prop_executable {
 474        my $m = shift;
 475        return if -l $m->{file_b};
 476        if ($m->{mode_b} =~ /755$/) {
 477                chmod((0755 &~ umask),$m->{file_b}) or croak $!;
 478                if ($m->{mode_a} !~ /755$/) {
 479                        sys(qw(svn propset svn:executable 1), $m->{file_b});
 480                }
 481                -x $m->{file_b} or croak "$m->{file_b} is not executable!\n";
 482        } elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 483                sys(qw(svn propdel svn:executable), $m->{file_b});
 484                chmod((0644 &~ umask),$m->{file_b}) or croak $!;
 485                -x $m->{file_b} and croak "$m->{file_b} is executable!\n";
 486        }
 487}
 488
 489sub svn_ensure_parent_path {
 490        my $dir_b = dirname(shift);
 491        svn_ensure_parent_path($dir_b) if ($dir_b ne File::Spec->curdir);
 492        mkpath([$dir_b]) unless (-d $dir_b);
 493        sys(qw(svn add -N), $dir_b) unless (-d "$dir_b/.svn");
 494}
 495
 496sub precommit_check {
 497        my $mods = shift;
 498        my (%rm_file, %rmdir_check, %added_check);
 499
 500        my %o = ( D => 0, R => 1, C => 2, A => 3, M => 3, T => 3 );
 501        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 502                if ($m->{chg} eq 'R') {
 503                        if (-d $m->{file_b}) {
 504                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 505                        }
 506                        # dir/$file => dir/file/$file
 507                        my $dirname = dirname($m->{file_b});
 508                        while ($dirname ne File::Spec->curdir) {
 509                                if ($dirname ne $m->{file_a}) {
 510                                        $dirname = dirname($dirname);
 511                                        next;
 512                                }
 513                                err_file_to_dir("$m->{file_a} => $m->{file_b}");
 514                        }
 515                        # baz/zzz => baz (baz is a file)
 516                        $dirname = dirname($m->{file_a});
 517                        while ($dirname ne File::Spec->curdir) {
 518                                if ($dirname ne $m->{file_b}) {
 519                                        $dirname = dirname($dirname);
 520                                        next;
 521                                }
 522                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 523                        }
 524                }
 525                if ($m->{chg} =~ /^(D|R)$/) {
 526                        my $t = $1 eq 'D' ? 'file_b' : 'file_a';
 527                        $rm_file{ $m->{$t} } = 1;
 528                        my $dirname = dirname( $m->{$t} );
 529                        my $basename = basename( $m->{$t} );
 530                        $rmdir_check{$dirname}->{$basename} = 1;
 531                } elsif ($m->{chg} =~ /^(?:A|C)$/) {
 532                        if (-d $m->{file_b}) {
 533                                err_dir_to_file($m->{file_b});
 534                        }
 535                        my $dirname = dirname( $m->{file_b} );
 536                        my $basename = basename( $m->{file_b} );
 537                        $added_check{$dirname}->{$basename} = 1;
 538                        while ($dirname ne File::Spec->curdir) {
 539                                if ($rm_file{$dirname}) {
 540                                        err_file_to_dir($m->{file_b});
 541                                }
 542                                $dirname = dirname $dirname;
 543                        }
 544                }
 545        }
 546        return (\%rmdir_check, \%added_check);
 547
 548        sub err_dir_to_file {
 549                my $file = shift;
 550                print STDERR "Node change from directory to file ",
 551                                "is not supported by Subversion: ",$file,"\n";
 552                exit 1;
 553        }
 554        sub err_file_to_dir {
 555                my $file = shift;
 556                print STDERR "Node change from file to directory ",
 557                                "is not supported by Subversion: ",$file,"\n";
 558                exit 1;
 559        }
 560}
 561
 562sub svn_checkout_tree {
 563        my ($svn_rev, $treeish) = @_;
 564        my $from = file_to_s("$REV_DIR/$svn_rev");
 565        assert_svn_wc_clean($svn_rev,$from);
 566        print "diff-tree $from $treeish\n";
 567        my $pid = open my $diff_fh, '-|';
 568        defined $pid or croak $!;
 569        if ($pid == 0) {
 570                my @diff_tree = qw(git-diff-tree -z -r -C);
 571                push @diff_tree, '--find-copies-harder' if $_find_copies_harder;
 572                push @diff_tree, "-l$_l" if defined $_l;
 573                exec(@diff_tree, $from, $treeish) or croak $!;
 574        }
 575        my $mods = parse_diff_tree($diff_fh);
 576        unless (@$mods) {
 577                # git can do empty commits, but SVN doesn't allow it...
 578                return $mods;
 579        }
 580        my ($rm, $add) = precommit_check($mods);
 581
 582        my %o = ( D => 1, R => 0, C => -1, A => 3, M => 3, T => 3 );
 583        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 584                if ($m->{chg} eq 'C') {
 585                        svn_ensure_parent_path( $m->{file_b} );
 586                        sys(qw(svn cp),         $m->{file_a}, $m->{file_b});
 587                        apply_mod_line_blob($m);
 588                        svn_check_prop_executable($m);
 589                } elsif ($m->{chg} eq 'D') {
 590                        sys(qw(svn rm --force), $m->{file_b});
 591                } elsif ($m->{chg} eq 'R') {
 592                        svn_ensure_parent_path( $m->{file_b} );
 593                        sys(qw(svn mv --force), $m->{file_a}, $m->{file_b});
 594                        apply_mod_line_blob($m);
 595                        svn_check_prop_executable($m);
 596                } elsif ($m->{chg} eq 'M') {
 597                        apply_mod_line_blob($m);
 598                        svn_check_prop_executable($m);
 599                } elsif ($m->{chg} eq 'T') {
 600                        sys(qw(svn rm --force),$m->{file_b});
 601                        apply_mod_line_blob($m);
 602                        sys(qw(svn add --force), $m->{file_b});
 603                        svn_check_prop_executable($m);
 604                } elsif ($m->{chg} eq 'A') {
 605                        svn_ensure_parent_path( $m->{file_b} );
 606                        apply_mod_line_blob($m);
 607                        sys(qw(svn add --force), $m->{file_b});
 608                        svn_check_prop_executable($m);
 609                } else {
 610                        croak "Invalid chg: $m->{chg}\n";
 611                }
 612        }
 613
 614        assert_tree($treeish);
 615        if ($_rmdir) { # remove empty directories
 616                handle_rmdir($rm, $add);
 617        }
 618        assert_tree($treeish);
 619        return $mods;
 620}
 621
 622# svn ls doesn't work with respect to the current working tree, but what's
 623# in the repository.  There's not even an option for it... *sigh*
 624# (added files don't show up and removed files remain in the ls listing)
 625sub svn_ls_current {
 626        my ($dir, $rm, $add) = @_;
 627        chomp(my @ls = safe_qx('svn','ls',$dir));
 628        my @ret = ();
 629        foreach (@ls) {
 630                s#/$##; # trailing slashes are evil
 631                push @ret, $_ unless $rm->{$dir}->{$_};
 632        }
 633        if (exists $add->{$dir}) {
 634                push @ret, keys %{$add->{$dir}};
 635        }
 636        return \@ret;
 637}
 638
 639sub handle_rmdir {
 640        my ($rm, $add) = @_;
 641
 642        foreach my $dir (sort {length $b <=> length $a} keys %$rm) {
 643                my $ls = svn_ls_current($dir, $rm, $add);
 644                next if (scalar @$ls);
 645                sys(qw(svn rm --force),$dir);
 646
 647                my $dn = dirname $dir;
 648                $rm->{ $dn }->{ basename $dir } = 1;
 649                $ls = svn_ls_current($dn, $rm, $add);
 650                while (scalar @$ls == 0 && $dn ne File::Spec->curdir) {
 651                        sys(qw(svn rm --force),$dn);
 652                        $dir = basename $dn;
 653                        $dn = dirname $dn;
 654                        $rm->{ $dn }->{ $dir } = 1;
 655                        $ls = svn_ls_current($dn, $rm, $add);
 656                }
 657        }
 658}
 659
 660sub svn_commit_tree {
 661        my ($svn_rev, $commit) = @_;
 662        my $commit_msg = "$GIT_DIR/$GIT_SVN/.svn-commit.tmp.$$";
 663        my %log_msg = ( msg => '' );
 664        open my $msg, '>', $commit_msg or croak $!;
 665
 666        chomp(my $type = `git-cat-file -t $commit`);
 667        if ($type eq 'commit') {
 668                my $pid = open my $msg_fh, '-|';
 669                defined $pid or croak $!;
 670
 671                if ($pid == 0) {
 672                        exec(qw(git-cat-file commit), $commit) or croak $!;
 673                }
 674                my $in_msg = 0;
 675                while (<$msg_fh>) {
 676                        if (!$in_msg) {
 677                                $in_msg = 1 if (/^\s*$/);
 678                        } elsif (/^git-svn-id: /) {
 679                                # skip this, we regenerate the correct one
 680                                # on re-fetch anyways
 681                        } else {
 682                                print $msg $_ or croak $!;
 683                        }
 684                }
 685                close $msg_fh or croak $!;
 686        }
 687        close $msg or croak $!;
 688
 689        if ($_edit || ($type eq 'tree')) {
 690                my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi';
 691                system($editor, $commit_msg);
 692        }
 693
 694        # file_to_s removes all trailing newlines, so just use chomp() here:
 695        open $msg, '<', $commit_msg or croak $!;
 696        { local $/; chomp($log_msg{msg} = <$msg>); }
 697        close $msg or croak $!;
 698
 699        my ($oneline) = ($log_msg{msg} =~ /([^\n\r]+)/);
 700        print "Committing $commit: $oneline\n";
 701
 702        my @ci_output = safe_qx(qw(svn commit -F),$commit_msg);
 703        my ($committed) = grep(/^Committed revision \d+\./,@ci_output);
 704        unlink $commit_msg;
 705        defined $committed or croak
 706                        "Commit output failed to parse committed revision!\n",
 707                        join("\n",@ci_output),"\n";
 708        my ($rev_committed) = ($committed =~ /^Committed revision (\d+)\./);
 709
 710        my @svn_up = qw(svn up);
 711        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 712        if ($rev_committed == ($svn_rev + 1)) {
 713                push @svn_up, "-r$rev_committed";
 714                sys(@svn_up);
 715                my $info = svn_info('.');
 716                my $date = $info->{'Last Changed Date'} or die "Missing date\n";
 717                if ($info->{'Last Changed Rev'} != $rev_committed) {
 718                        croak "$info->{'Last Changed Rev'} != $rev_committed\n"
 719                }
 720                my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 721                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 722                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 723                                         or croak "Failed to parse date: $date\n";
 724                $log_msg{date} = "$tz $Y-$m-$d $H:$M:$S";
 725                $log_msg{author} = $info->{'Last Changed Author'};
 726                $log_msg{revision} = $rev_committed;
 727                $log_msg{msg} .= "\n";
 728                my $parent = file_to_s("$REV_DIR/$svn_rev");
 729                git_commit(\%log_msg, $parent, $commit);
 730                return $rev_committed;
 731        }
 732        # resync immediately
 733        push @svn_up, "-r$svn_rev";
 734        sys(@svn_up);
 735        return fetch("$rev_committed=$commit")->{revision};
 736}
 737
 738# read the entire log into a temporary file (which is removed ASAP)
 739# and store the file handle + parser state
 740sub svn_log_raw {
 741        my (@log_args) = @_;
 742        my $log_fh = IO::File->new_tmpfile or croak $!;
 743        my $pid = fork;
 744        defined $pid or croak $!;
 745        if (!$pid) {
 746                open STDOUT, '>&', $log_fh or croak $!;
 747                exec (qw(svn log), @log_args) or croak $!
 748        }
 749        waitpid $pid, 0;
 750        croak if $?;
 751        seek $log_fh, 0, 0 or croak $!;
 752        return { state => 'sep', fh => $log_fh };
 753}
 754
 755sub next_log_entry {
 756        my $log = shift; # retval of svn_log_raw()
 757        my $ret = undef;
 758        my $fh = $log->{fh};
 759
 760        while (<$fh>) {
 761                chomp;
 762                if (/^\-{72}$/) {
 763                        if ($log->{state} eq 'msg') {
 764                                if ($ret->{lines}) {
 765                                        $ret->{msg} .= $_."\n";
 766                                        unless(--$ret->{lines}) {
 767                                                $log->{state} = 'sep';
 768                                        }
 769                                } else {
 770                                        croak "Log parse error at: $_\n",
 771                                                $ret->{revision},
 772                                                "\n";
 773                                }
 774                                next;
 775                        }
 776                        if ($log->{state} ne 'sep') {
 777                                croak "Log parse error at: $_\n",
 778                                        "state: $log->{state}\n",
 779                                        $ret->{revision},
 780                                        "\n";
 781                        }
 782                        $log->{state} = 'rev';
 783
 784                        # if we have an empty log message, put something there:
 785                        if ($ret) {
 786                                $ret->{msg} ||= "\n";
 787                                delete $ret->{lines};
 788                                return $ret;
 789                        }
 790                        next;
 791                }
 792                if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
 793                        my $rev = $1;
 794                        my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
 795                        ($lines) = ($lines =~ /(\d+)/);
 796                        my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 797                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 798                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 799                                         or croak "Failed to parse date: $date\n";
 800                        $ret = {        revision => $rev,
 801                                        date => "$tz $Y-$m-$d $H:$M:$S",
 802                                        author => $author,
 803                                        lines => $lines,
 804                                        msg => '' };
 805                        if (defined $_authors && ! defined $users{$author}) {
 806                                die "Author: $author not defined in ",
 807                                                "$_authors file\n";
 808                        }
 809                        $log->{state} = 'msg_start';
 810                        next;
 811                }
 812                # skip the first blank line of the message:
 813                if ($log->{state} eq 'msg_start' && /^$/) {
 814                        $log->{state} = 'msg';
 815                } elsif ($log->{state} eq 'msg') {
 816                        if ($ret->{lines}) {
 817                                $ret->{msg} .= $_."\n";
 818                                unless (--$ret->{lines}) {
 819                                        $log->{state} = 'sep';
 820                                }
 821                        } else {
 822                                croak "Log parse error at: $_\n",
 823                                        $ret->{revision},"\n";
 824                        }
 825                }
 826        }
 827        return $ret;
 828}
 829
 830sub svn_info {
 831        my $url = shift || $SVN_URL;
 832
 833        my $pid = open my $info_fh, '-|';
 834        defined $pid or croak $!;
 835
 836        if ($pid == 0) {
 837                exec(qw(svn info),$url) or croak $!;
 838        }
 839
 840        my $ret = {};
 841        # only single-lines seem to exist in svn info output
 842        while (<$info_fh>) {
 843                chomp $_;
 844                if (m#^([^:]+)\s*:\s*(\S.*)$#) {
 845                        $ret->{$1} = $2;
 846                        push @{$ret->{-order}}, $1;
 847                }
 848        }
 849        close $info_fh or croak $!;
 850        return $ret;
 851}
 852
 853sub sys { system(@_) == 0 or croak $? }
 854
 855sub git_addremove {
 856        system( "git-diff-files --name-only -z ".
 857                                " | git-update-index --remove -z --stdin && ".
 858                "git-ls-files -z --others ".
 859                        "'--exclude-from=$GIT_DIR/$GIT_SVN/info/exclude'".
 860                                " | git-update-index --add -z --stdin"
 861                ) == 0 or croak $?
 862}
 863
 864sub s_to_file {
 865        my ($str, $file, $mode) = @_;
 866        open my $fd,'>',$file or croak $!;
 867        print $fd $str,"\n" or croak $!;
 868        close $fd or croak $!;
 869        chmod ($mode &~ umask, $file) if (defined $mode);
 870}
 871
 872sub file_to_s {
 873        my $file = shift;
 874        open my $fd,'<',$file or croak "$!: file: $file\n";
 875        local $/;
 876        my $ret = <$fd>;
 877        close $fd or croak $!;
 878        $ret =~ s/\s*$//s;
 879        return $ret;
 880}
 881
 882sub assert_revision_unknown {
 883        my $revno = shift;
 884        if (-f "$REV_DIR/$revno") {
 885                croak "$REV_DIR/$revno already exists! ",
 886                                "Why are we refetching it?";
 887        }
 888}
 889
 890sub trees_eq {
 891        my ($x, $y) = @_;
 892        my @x = safe_qx('git-cat-file','commit',$x);
 893        my @y = safe_qx('git-cat-file','commit',$y);
 894        if (($y[0] ne $x[0]) || $x[0] !~ /^tree $sha1\n$/
 895                                || $y[0] !~ /^tree $sha1\n$/) {
 896                print STDERR "Trees not equal: $y[0] != $x[0]\n";
 897                return 0
 898        }
 899        return 1;
 900}
 901
 902sub assert_revision_eq_or_unknown {
 903        my ($revno, $commit) = @_;
 904        if (-f "$REV_DIR/$revno") {
 905                my $current = file_to_s("$REV_DIR/$revno");
 906                if (($commit ne $current) && !trees_eq($commit, $current)) {
 907                        croak "$REV_DIR/$revno already exists!\n",
 908                                "current: $current\nexpected: $commit\n";
 909                }
 910                return;
 911        }
 912}
 913
 914sub git_commit {
 915        my ($log_msg, @parents) = @_;
 916        assert_revision_unknown($log_msg->{revision});
 917        my $out_fh = IO::File->new_tmpfile or croak $!;
 918
 919        map_tree_joins() if (@_branch_from && !%tree_map);
 920
 921        # commit parents can be conditionally bound to a particular
 922        # svn revision via: "svn_revno=commit_sha1", filter them out here:
 923        my @exec_parents;
 924        foreach my $p (@parents) {
 925                next unless defined $p;
 926                if ($p =~ /^(\d+)=($sha1_short)$/o) {
 927                        if ($1 == $log_msg->{revision}) {
 928                                push @exec_parents, $2;
 929                        }
 930                } else {
 931                        push @exec_parents, $p if $p =~ /$sha1_short/o;
 932                }
 933        }
 934
 935        my $pid = fork;
 936        defined $pid or croak $!;
 937        if ($pid == 0) {
 938                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 939                git_addremove();
 940                chomp(my $tree = `git-write-tree`);
 941                croak if $?;
 942                if (exists $tree_map{$tree}) {
 943                        my %seen_parent = map { $_ => 1 } @exec_parents;
 944                        foreach (@{$tree_map{$tree}}) {
 945                                # MAXPARENT is defined to 16 in commit-tree.c:
 946                                if ($seen_parent{$_} || @exec_parents > 16) {
 947                                        next;
 948                                }
 949                                push @exec_parents, $_;
 950                                $seen_parent{$_} = 1;
 951                        }
 952                }
 953                my $msg_fh = IO::File->new_tmpfile or croak $!;
 954                print $msg_fh $log_msg->{msg}, "\ngit-svn-id: ",
 955                                        "$SVN_URL\@$log_msg->{revision}",
 956                                        " $SVN_UUID\n" or croak $!;
 957                $msg_fh->flush == 0 or croak $!;
 958                seek $msg_fh, 0, 0 or croak $!;
 959
 960                set_commit_env($log_msg);
 961
 962                my @exec = ('git-commit-tree',$tree);
 963                push @exec, '-p', $_  foreach @exec_parents;
 964                open STDIN, '<&', $msg_fh or croak $!;
 965                open STDOUT, '>&', $out_fh or croak $!;
 966                exec @exec or croak $!;
 967        }
 968        waitpid($pid,0);
 969        croak if $?;
 970
 971        $out_fh->flush == 0 or croak $!;
 972        seek $out_fh, 0, 0 or croak $!;
 973        chomp(my $commit = do { local $/; <$out_fh> });
 974        if ($commit !~ /^$sha1$/o) {
 975                croak "Failed to commit, invalid sha1: $commit\n";
 976        }
 977        my @update_ref = ('git-update-ref',"refs/remotes/$GIT_SVN",$commit);
 978        if (my $primary_parent = shift @exec_parents) {
 979                $pid = fork;
 980                defined $pid or croak $!;
 981                if (!$pid) {
 982                        close STDERR;
 983                        close STDOUT;
 984                        exec 'git-rev-parse','--verify',
 985                                                "refs/remotes/$GIT_SVN^0";
 986                }
 987                waitpid $pid, 0;
 988                push @update_ref, $primary_parent unless $?;
 989        }
 990        sys(@update_ref);
 991        sys('git-update-ref',"$GIT_SVN/revs/$log_msg->{revision}",$commit);
 992        print "r$log_msg->{revision} = $commit\n";
 993        return $commit;
 994}
 995
 996sub set_commit_env {
 997        my ($log_msg) = @_;
 998        my $author = $log_msg->{author};
 999        my ($name,$email) = defined $users{$author} ?  @{$users{$author}}
1000                                : ($author,"$author\@$SVN_UUID");
1001        $ENV{GIT_AUTHOR_NAME} = $ENV{GIT_COMMITTER_NAME} = $name;
1002        $ENV{GIT_AUTHOR_EMAIL} = $ENV{GIT_COMMITTER_EMAIL} = $email;
1003        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_msg->{date};
1004}
1005
1006sub apply_mod_line_blob {
1007        my $m = shift;
1008        if ($m->{mode_b} =~ /^120/) {
1009                blob_to_symlink($m->{sha1_b}, $m->{file_b});
1010        } else {
1011                blob_to_file($m->{sha1_b}, $m->{file_b});
1012        }
1013}
1014
1015sub blob_to_symlink {
1016        my ($blob, $link) = @_;
1017        defined $link or croak "\$link not defined!\n";
1018        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
1019        if (-l $link || -f _) {
1020                unlink $link or croak $!;
1021        }
1022
1023        my $dest = `git-cat-file blob $blob`; # no newline, so no chomp
1024        symlink $dest, $link or croak $!;
1025}
1026
1027sub blob_to_file {
1028        my ($blob, $file) = @_;
1029        defined $file or croak "\$file not defined!\n";
1030        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
1031        if (-l $file || -f _) {
1032                unlink $file or croak $!;
1033        }
1034
1035        open my $blob_fh, '>', $file or croak "$!: $file\n";
1036        my $pid = fork;
1037        defined $pid or croak $!;
1038
1039        if ($pid == 0) {
1040                open STDOUT, '>&', $blob_fh or croak $!;
1041                exec('git-cat-file','blob',$blob);
1042        }
1043        waitpid $pid, 0;
1044        croak $? if $?;
1045
1046        close $blob_fh or croak $!;
1047}
1048
1049sub safe_qx {
1050        my $pid = open my $child, '-|';
1051        defined $pid or croak $!;
1052        if ($pid == 0) {
1053                exec(@_) or croak $?;
1054        }
1055        my @ret = (<$child>);
1056        close $child or croak $?;
1057        die $? if $?; # just in case close didn't error out
1058        return wantarray ? @ret : join('',@ret);
1059}
1060
1061sub svn_compat_check {
1062        my @co_help = safe_qx(qw(svn co -h));
1063        unless (grep /ignore-externals/,@co_help) {
1064                print STDERR "W: Installed svn version does not support ",
1065                                "--ignore-externals\n";
1066                $_no_ignore_ext = 1;
1067        }
1068        if (grep /usage: checkout URL\[\@REV\]/,@co_help) {
1069                $_svn_co_url_revs = 1;
1070        }
1071
1072        # I really, really hope nobody hits this...
1073        unless (grep /stop-on-copy/, (safe_qx(qw(svn log -h)))) {
1074                print STDERR <<'';
1075W: The installed svn version does not support the --stop-on-copy flag in
1076   the log command.
1077   Lets hope the directory you're tracking is not a branch or tag
1078   and was never moved within the repository...
1079
1080                $_no_stop_copy = 1;
1081        }
1082}
1083
1084# *sigh*, new versions of svn won't honor -r<rev> without URL@<rev>,
1085# (and they won't honor URL@<rev> without -r<rev>, too!)
1086sub svn_cmd_checkout {
1087        my ($url, $rev, $dir) = @_;
1088        my @cmd = ('svn','co', "-r$rev");
1089        push @cmd, '--ignore-externals' unless $_no_ignore_ext;
1090        $url .= "\@$rev" if $_svn_co_url_revs;
1091        sys(@cmd, $url, $dir);
1092}
1093
1094sub check_upgrade_needed {
1095        my $old = eval {
1096                my $pid = open my $child, '-|';
1097                defined $pid or croak $!;
1098                if ($pid == 0) {
1099                        close STDERR;
1100                        exec('git-rev-parse',"$GIT_SVN-HEAD") or croak $?;
1101                }
1102                my @ret = (<$child>);
1103                close $child or croak $?;
1104                die $? if $?; # just in case close didn't error out
1105                return wantarray ? @ret : join('',@ret);
1106        };
1107        return unless $old;
1108        my $head = eval { safe_qx('git-rev-parse',"refs/remotes/$GIT_SVN") };
1109        if ($@ || !$head) {
1110                print STDERR "Please run: $0 rebuild --upgrade\n";
1111                exit 1;
1112        }
1113}
1114
1115# fills %tree_map with a reverse mapping of trees to commits.  Useful
1116# for finding parents to commit on.
1117sub map_tree_joins {
1118        foreach my $br (@_branch_from) {
1119                my $pid = open my $pipe, '-|';
1120                defined $pid or croak $!;
1121                if ($pid == 0) {
1122                        exec(qw(git-rev-list --pretty=raw), $br) or croak $?;
1123                }
1124                while (<$pipe>) {
1125                        if (/^commit ($sha1)$/o) {
1126                                my $commit = $1;
1127                                my ($tree) = (<$pipe> =~ /^tree ($sha1)$/o);
1128                                unless (defined $tree) {
1129                                        die "Failed to parse commit $commit\n";
1130                                }
1131                                push @{$tree_map{$tree}}, $commit;
1132                        }
1133                }
1134                close $pipe or croak $?;
1135        }
1136}
1137
1138# '<svn username> = real-name <email address>' mapping based on git-svnimport:
1139sub load_authors {
1140        open my $authors, '<', $_authors or die "Can't open $_authors $!\n";
1141        while (<$authors>) {
1142                chomp;
1143                next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
1144                my ($user, $name, $email) = ($1, $2, $3);
1145                $users{$user} = [$name, $email];
1146        }
1147        close $authors or croak $!;
1148}
1149
1150__END__
1151
1152Data structures:
1153
1154$svn_log hashref (as returned by svn_log_raw)
1155{
1156        fh => file handle of the log file,
1157        state => state of the log file parser (sep/msg/rev/msg_start...)
1158}
1159
1160$log_msg hashref as returned by next_log_entry($svn_log)
1161{
1162        msg => 'whitespace-formatted log entry
1163',                                              # trailing newline is preserved
1164        revision => '8',                        # integer
1165        date => '2004-02-24T17:01:44.108345Z',  # commit date
1166        author => 'committer name'
1167};
1168
1169
1170@mods = array of diff-index line hashes, each element represents one line
1171        of diff-index output
1172
1173diff-index line ($m hash)
1174{
1175        mode_a => first column of diff-index output, no leading ':',
1176        mode_b => second column of diff-index output,
1177        sha1_b => sha1sum of the final blob,
1178        chg => change type [MCRADT],
1179        file_a => original file name of a file (iff chg is 'C' or 'R')
1180        file_b => new/current file name of a file (any chg)
1181}
1182;