edfb19c39e969c71c270c3e14378b80780ab438f
   1#!/usr/bin/env perl
   2# Copyright (C) 2006, Eric Wong <normalperson@yhbt.net>
   3# License: GPL v2 or later
   4use warnings;
   5use strict;
   6use vars qw/    $AUTHOR $VERSION
   7                $SVN_URL $SVN_INFO $SVN_WC $SVN_UUID
   8                $GIT_SVN_INDEX $GIT_SVN
   9                $GIT_DIR $REV_DIR/;
  10$AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
  11$VERSION = '0.11.0';
  12
  13use Cwd qw/abs_path/;
  14$GIT_DIR = abs_path($ENV{GIT_DIR} || '.git');
  15$ENV{GIT_DIR} = $GIT_DIR;
  16
  17# make sure the svn binary gives consistent output between locales and TZs:
  18$ENV{TZ} = 'UTC';
  19$ENV{LC_ALL} = 'C';
  20
  21# If SVN:: library support is added, please make the dependencies
  22# optional and preserve the capability to use the command-line client.
  23# use eval { require SVN::... } to make it lazy load
  24# We don't use any modules not in the standard Perl distribution:
  25use Carp qw/croak/;
  26use IO::File qw//;
  27use File::Basename qw/dirname basename/;
  28use File::Path qw/mkpath/;
  29use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
  30use File::Spec qw//;
  31use POSIX qw/strftime/;
  32my $sha1 = qr/[a-f\d]{40}/;
  33my $sha1_short = qr/[a-f\d]{4,40}/;
  34my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
  35        $_find_copies_harder, $_l, $_version, $_upgrade, $_authors);
  36my (@_branch_from, %tree_map, %users);
  37my $_svn_co_url_revs;
  38
  39my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext,
  40                'branch|b=s' => \@_branch_from,
  41                'authors-file|A=s' => \$_authors );
  42my %cmd = (
  43        fetch => [ \&fetch, "Download new revisions from SVN",
  44                        { 'revision|r=s' => \$_revision, %fc_opts } ],
  45        init => [ \&init, "Initialize and fetch (import)", { } ],
  46        commit => [ \&commit, "Commit git revisions to SVN",
  47                        {       'stdin|' => \$_stdin,
  48                                'edit|e' => \$_edit,
  49                                'rmdir' => \$_rmdir,
  50                                'find-copies-harder' => \$_find_copies_harder,
  51                                'l=i' => \$_l,
  52                                %fc_opts,
  53                        } ],
  54        'show-ignore' => [ \&show_ignore, "Show svn:ignore listings", { } ],
  55        rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
  56                        { 'no-ignore-externals' => \$_no_ignore_ext,
  57                          'upgrade' => \$_upgrade } ],
  58);
  59my $cmd;
  60for (my $i = 0; $i < @ARGV; $i++) {
  61        if (defined $cmd{$ARGV[$i]}) {
  62                $cmd = $ARGV[$i];
  63                splice @ARGV, $i, 1;
  64                last;
  65        }
  66};
  67
  68my %opts = %{$cmd{$cmd}->[2]} if (defined $cmd);
  69
  70# convert GetOpt::Long specs for use by git-repo-config
  71foreach my $o (keys %opts) {
  72        my $v = $opts{$o};
  73        my ($key) = ($o =~ /^([a-z\-]+)/);
  74        $key =~ s/-//g;
  75        my $arg = 'git-repo-config';
  76        $arg .= ' --int' if ($o =~ /=i$/);
  77        $arg .= ' --bool' if ($o !~ /=[sfi]$/);
  78        $arg .= " svn.$key"; # $key only matches [a-z\-], always shell-safe
  79        if (ref $v eq 'ARRAY') {
  80                chomp(@$v = `$arg`);
  81        } else {
  82                chomp($$v = `$arg`);
  83                $$v = 0 if $$v eq 'false';
  84        }
  85}
  86
  87GetOptions(%opts, 'help|H|h' => \$_help,
  88                'version|V' => \$_version,
  89                'id|i=s' => \$GIT_SVN) or exit 1;
  90
  91$GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn';
  92$GIT_SVN_INDEX = "$GIT_DIR/$GIT_SVN/index";
  93$SVN_URL = undef;
  94$REV_DIR = "$GIT_DIR/$GIT_SVN/revs";
  95$SVN_WC = "$GIT_DIR/$GIT_SVN/tree";
  96
  97usage(0) if $_help;
  98version() if $_version;
  99usage(1) unless defined $cmd;
 100load_authors() if $_authors;
 101svn_compat_check();
 102$cmd{$cmd}->[0]->(@ARGV);
 103exit 0;
 104
 105####################### primary functions ######################
 106sub usage {
 107        my $exit = shift || 0;
 108        my $fd = $exit ? \*STDERR : \*STDOUT;
 109        print $fd <<"";
 110git-svn - bidirectional operations between a single Subversion tree and git
 111Usage: $0 <command> [options] [arguments]\n
 112
 113        print $fd "Available commands:\n" unless $cmd;
 114
 115        foreach (sort keys %cmd) {
 116                next if $cmd && $cmd ne $_;
 117                print $fd '  ',pack('A13',$_),$cmd{$_}->[1],"\n";
 118                foreach (keys %{$cmd{$_}->[2]}) {
 119                        # prints out arguments as they should be passed:
 120                        my $x = s#=s$## ? '<arg>' : s#=i$## ? '<num>' : '';
 121                        print $fd ' ' x 17, join(', ', map { length $_ > 1 ?
 122                                                        "--$_" : "-$_" }
 123                                                split /\|/,$_)," $x\n";
 124                }
 125        }
 126        print $fd <<"";
 127\nGIT_SVN_ID may be set in the environment or via the --id/-i switch to an
 128arbitrary identifier if you're tracking multiple SVN branches/repositories in
 129one git repository and want to keep them separate.  See git-svn(1) for more
 130information.
 131
 132        exit $exit;
 133}
 134
 135sub version {
 136        print "git-svn version $VERSION\n";
 137        exit 0;
 138}
 139
 140sub rebuild {
 141        $SVN_URL = shift or undef;
 142        my $newest_rev = 0;
 143        if ($_upgrade) {
 144                sys('git-update-ref',"refs/remotes/$GIT_SVN","$GIT_SVN-HEAD");
 145        } else {
 146                check_upgrade_needed();
 147        }
 148
 149        my $pid = open(my $rev_list,'-|');
 150        defined $pid or croak $!;
 151        if ($pid == 0) {
 152                exec("git-rev-list","refs/remotes/$GIT_SVN") or croak $!;
 153        }
 154        my $latest;
 155        while (<$rev_list>) {
 156                chomp;
 157                my $c = $_;
 158                croak "Non-SHA1: $c\n" unless $c =~ /^$sha1$/o;
 159                my @commit = grep(/^git-svn-id: /,`git-cat-file commit $c`);
 160                next if (!@commit); # skip merges
 161                my $id = $commit[$#commit];
 162                my ($url, $rev, $uuid) = ($id =~ /^git-svn-id:\s(\S+?)\@(\d+)
 163                                                \s([a-f\d\-]+)$/x);
 164                if (!$rev || !$uuid || !$url) {
 165                        # some of the original repositories I made had
 166                        # indentifiers like this:
 167                        ($rev, $uuid) = ($id =~/^git-svn-id:\s(\d+)
 168                                                        \@([a-f\d\-]+)/x);
 169                        if (!$rev || !$uuid) {
 170                                croak "Unable to extract revision or UUID from ",
 171                                        "$c, $id\n";
 172                        }
 173                }
 174
 175                # if we merged or otherwise started elsewhere, this is
 176                # how we break out of it
 177                next if (defined $SVN_UUID && ($uuid ne $SVN_UUID));
 178                next if (defined $SVN_URL && defined $url && ($url ne $SVN_URL));
 179
 180                print "r$rev = $c\n";
 181                unless (defined $latest) {
 182                        if (!$SVN_URL && !$url) {
 183                                croak "SVN repository location required: $url\n";
 184                        }
 185                        $SVN_URL ||= $url;
 186                        $SVN_UUID ||= $uuid;
 187                        setup_git_svn();
 188                        $latest = $rev;
 189                }
 190                assert_revision_eq_or_unknown($rev, $c);
 191                sys('git-update-ref',"$GIT_SVN/revs/$rev",$c);
 192                $newest_rev = $rev if ($rev > $newest_rev);
 193        }
 194        close $rev_list or croak $?;
 195        if (!chdir $SVN_WC) {
 196                svn_cmd_checkout($SVN_URL, $latest, $SVN_WC);
 197                chdir $SVN_WC or croak $!;
 198        }
 199
 200        $pid = fork;
 201        defined $pid or croak $!;
 202        if ($pid == 0) {
 203                my @svn_up = qw(svn up);
 204                push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 205                sys(@svn_up,"-r$newest_rev");
 206                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 207                git_addremove();
 208                exec('git-write-tree');
 209        }
 210        waitpid $pid, 0;
 211
 212        if ($_upgrade) {
 213                print STDERR <<"";
 214Keeping deprecated refs/head/$GIT_SVN-HEAD for now.  Please remove it
 215when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
 216
 217        }
 218}
 219
 220sub init {
 221        $SVN_URL = shift or croak "SVN repository location required\n";
 222        unless (-d $GIT_DIR) {
 223                sys('git-init-db');
 224        }
 225        setup_git_svn();
 226}
 227
 228sub fetch {
 229        my (@parents) = @_;
 230        check_upgrade_needed();
 231        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 232        my @log_args = -d $SVN_WC ? ($SVN_WC) : ($SVN_URL);
 233        unless ($_revision) {
 234                $_revision = -d $SVN_WC ? 'BASE:HEAD' : '0:HEAD';
 235        }
 236        push @log_args, "-r$_revision";
 237        push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 238
 239        my $svn_log = svn_log_raw(@log_args);
 240
 241        my $base = next_log_entry($svn_log) or croak "No base revision!\n";
 242        my $last_commit = undef;
 243        unless (-d $SVN_WC) {
 244                svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
 245                chdir $SVN_WC or croak $!;
 246                read_uuid();
 247                $last_commit = git_commit($base, @parents);
 248                assert_svn_wc_clean($base->{revision}, $last_commit);
 249        } else {
 250                chdir $SVN_WC or croak $!;
 251                read_uuid();
 252                $last_commit = file_to_s("$REV_DIR/$base->{revision}");
 253        }
 254        my @svn_up = qw(svn up);
 255        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 256        my $last = $base;
 257        while (my $log_msg = next_log_entry($svn_log)) {
 258                assert_svn_wc_clean($last->{revision}, $last_commit);
 259                if ($last->{revision} >= $log_msg->{revision}) {
 260                        croak "Out of order: last >= current: ",
 261                                "$last->{revision} >= $log_msg->{revision}\n";
 262                }
 263                sys(@svn_up,"-r$log_msg->{revision}");
 264                $last_commit = git_commit($log_msg, $last_commit, @parents);
 265                $last = $log_msg;
 266        }
 267        assert_svn_wc_clean($last->{revision}, $last_commit);
 268        unless (-e "$GIT_DIR/refs/heads/master") {
 269                sys(qw(git-update-ref refs/heads/master),$last_commit);
 270        }
 271        return $last;
 272}
 273
 274sub commit {
 275        my (@commits) = @_;
 276        check_upgrade_needed();
 277        if ($_stdin || !@commits) {
 278                print "Reading from stdin...\n";
 279                @commits = ();
 280                while (<STDIN>) {
 281                        if (/\b($sha1_short)\b/o) {
 282                                unshift @commits, $1;
 283                        }
 284                }
 285        }
 286        my @revs;
 287        foreach my $c (@commits) {
 288                chomp(my @tmp = safe_qx('git-rev-parse',$c));
 289                if (scalar @tmp == 1) {
 290                        push @revs, $tmp[0];
 291                } elsif (scalar @tmp > 1) {
 292                        push @revs, reverse (safe_qx('git-rev-list',@tmp));
 293                } else {
 294                        die "Failed to rev-parse $c\n";
 295                }
 296        }
 297        chomp @revs;
 298
 299        fetch();
 300        chdir $SVN_WC or croak $!;
 301        my $info = svn_info('.');
 302        read_uuid($info);
 303        my $svn_current_rev =  $info->{'Last Changed Rev'};
 304        foreach my $c (@revs) {
 305                my $mods = svn_checkout_tree($svn_current_rev, $c);
 306                if (scalar @$mods == 0) {
 307                        print "Skipping, no changes detected\n";
 308                        next;
 309                }
 310                $svn_current_rev = svn_commit_tree($svn_current_rev, $c);
 311        }
 312        print "Done committing ",scalar @revs," revisions to SVN\n";
 313
 314}
 315
 316sub show_ignore {
 317        require File::Find or die $!;
 318        my $exclude_file = "$GIT_DIR/info/exclude";
 319        open my $fh, '<', $exclude_file or croak $!;
 320        chomp(my @excludes = (<$fh>));
 321        close $fh or croak $!;
 322
 323        $SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
 324        chdir $SVN_WC or croak $!;
 325        my %ign;
 326        File::Find::find({wanted=>sub{if(lstat $_ && -d _ && -d "$_/.svn"){
 327                s#^\./##;
 328                @{$ign{$_}} = safe_qx(qw(svn propget svn:ignore),$_);
 329                }}, no_chdir=>1},'.');
 330
 331        print "\n# /\n";
 332        foreach (@{$ign{'.'}}) { print '/',$_ if /\S/ }
 333        delete $ign{'.'};
 334        foreach my $i (sort keys %ign) {
 335                print "\n# ",$i,"\n";
 336                foreach (@{$ign{$i}}) { print '/',$i,'/',$_ if /\S/ }
 337        }
 338}
 339
 340########################### utility functions #########################
 341
 342sub read_uuid {
 343        return if $SVN_UUID;
 344        my $info = shift || svn_info('.');
 345        $SVN_UUID = $info->{'Repository UUID'} or
 346                                        croak "Repository UUID unreadable\n";
 347        s_to_file($SVN_UUID,"$GIT_DIR/$GIT_SVN/info/uuid");
 348}
 349
 350sub setup_git_svn {
 351        defined $SVN_URL or croak "SVN repository location required\n";
 352        unless (-d $GIT_DIR) {
 353                croak "GIT_DIR=$GIT_DIR does not exist!\n";
 354        }
 355        mkpath(["$GIT_DIR/$GIT_SVN"]);
 356        mkpath(["$GIT_DIR/$GIT_SVN/info"]);
 357        mkpath([$REV_DIR]);
 358        s_to_file($SVN_URL,"$GIT_DIR/$GIT_SVN/info/url");
 359
 360        open my $fd, '>>', "$GIT_DIR/$GIT_SVN/info/exclude" or croak $!;
 361        print $fd '.svn',"\n";
 362        close $fd or croak $!;
 363}
 364
 365sub assert_svn_wc_clean {
 366        my ($svn_rev, $treeish) = @_;
 367        croak "$svn_rev is not an integer!\n" unless ($svn_rev =~ /^\d+$/);
 368        croak "$treeish is not a sha1!\n" unless ($treeish =~ /^$sha1$/o);
 369        my $lcr = svn_info('.')->{'Last Changed Rev'};
 370        if ($svn_rev != $lcr) {
 371                print STDERR "Checking for copy-tree ... ";
 372                # use
 373                my @diff = grep(/^Index: /,(safe_qx(qw(svn diff),
 374                                                "-r$lcr:$svn_rev")));
 375                if (@diff) {
 376                        croak "Nope!  Expected r$svn_rev, got r$lcr\n";
 377                } else {
 378                        print STDERR "OK!\n";
 379                }
 380        }
 381        my @status = grep(!/^Performing status on external/,(`svn status`));
 382        @status = grep(!/^\s*$/,@status);
 383        if (scalar @status) {
 384                print STDERR "Tree ($SVN_WC) is not clean:\n";
 385                print STDERR $_ foreach @status;
 386                croak;
 387        }
 388        assert_tree($treeish);
 389}
 390
 391sub assert_tree {
 392        my ($treeish) = @_;
 393        croak "Not a sha1: $treeish\n" unless $treeish =~ /^$sha1$/o;
 394        chomp(my $type = `git-cat-file -t $treeish`);
 395        my $expected;
 396        while ($type eq 'tag') {
 397                chomp(($treeish, $type) = `git-cat-file tag $treeish`);
 398        }
 399        if ($type eq 'commit') {
 400                $expected = (grep /^tree /,`git-cat-file commit $treeish`)[0];
 401                ($expected) = ($expected =~ /^tree ($sha1)$/);
 402                die "Unable to get tree from $treeish\n" unless $expected;
 403        } elsif ($type eq 'tree') {
 404                $expected = $treeish;
 405        } else {
 406                die "$treeish is a $type, expected tree, tag or commit\n";
 407        }
 408
 409        my $old_index = $ENV{GIT_INDEX_FILE};
 410        my $tmpindex = $GIT_SVN_INDEX.'.assert-tmp';
 411        if (-e $tmpindex) {
 412                unlink $tmpindex or croak $!;
 413        }
 414        $ENV{GIT_INDEX_FILE} = $tmpindex;
 415        git_addremove();
 416        chomp(my $tree = `git-write-tree`);
 417        if ($old_index) {
 418                $ENV{GIT_INDEX_FILE} = $old_index;
 419        } else {
 420                delete $ENV{GIT_INDEX_FILE};
 421        }
 422        if ($tree ne $expected) {
 423                croak "Tree mismatch, Got: $tree, Expected: $expected\n";
 424        }
 425}
 426
 427sub parse_diff_tree {
 428        my $diff_fh = shift;
 429        local $/ = "\0";
 430        my $state = 'meta';
 431        my @mods;
 432        while (<$diff_fh>) {
 433                chomp $_; # this gets rid of the trailing "\0"
 434                if ($state eq 'meta' && /^:(\d{6})\s(\d{6})\s
 435                                        $sha1\s($sha1)\s([MTCRAD])\d*$/xo) {
 436                        push @mods, {   mode_a => $1, mode_b => $2,
 437                                        sha1_b => $3, chg => $4 };
 438                        if ($4 =~ /^(?:C|R)$/) {
 439                                $state = 'file_a';
 440                        } else {
 441                                $state = 'file_b';
 442                        }
 443                } elsif ($state eq 'file_a') {
 444                        my $x = $mods[$#mods] or croak "Empty array\n";
 445                        if ($x->{chg} !~ /^(?:C|R)$/) {
 446                                croak "Error parsing $_, $x->{chg}\n";
 447                        }
 448                        $x->{file_a} = $_;
 449                        $state = 'file_b';
 450                } elsif ($state eq 'file_b') {
 451                        my $x = $mods[$#mods] or croak "Empty array\n";
 452                        if (exists $x->{file_a} && $x->{chg} !~ /^(?:C|R)$/) {
 453                                croak "Error parsing $_, $x->{chg}\n";
 454                        }
 455                        if (!exists $x->{file_a} && $x->{chg} =~ /^(?:C|R)$/) {
 456                                croak "Error parsing $_, $x->{chg}\n";
 457                        }
 458                        $x->{file_b} = $_;
 459                        $state = 'meta';
 460                } else {
 461                        croak "Error parsing $_\n";
 462                }
 463        }
 464        close $diff_fh or croak $!;
 465
 466        return \@mods;
 467}
 468
 469sub svn_check_prop_executable {
 470        my $m = shift;
 471        return if -l $m->{file_b};
 472        if ($m->{mode_b} =~ /755$/) {
 473                chmod((0755 &~ umask),$m->{file_b}) or croak $!;
 474                if ($m->{mode_a} !~ /755$/) {
 475                        sys(qw(svn propset svn:executable 1), $m->{file_b});
 476                }
 477                -x $m->{file_b} or croak "$m->{file_b} is not executable!\n";
 478        } elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
 479                sys(qw(svn propdel svn:executable), $m->{file_b});
 480                chmod((0644 &~ umask),$m->{file_b}) or croak $!;
 481                -x $m->{file_b} and croak "$m->{file_b} is executable!\n";
 482        }
 483}
 484
 485sub svn_ensure_parent_path {
 486        my $dir_b = dirname(shift);
 487        svn_ensure_parent_path($dir_b) if ($dir_b ne File::Spec->curdir);
 488        mkpath([$dir_b]) unless (-d $dir_b);
 489        sys(qw(svn add -N), $dir_b) unless (-d "$dir_b/.svn");
 490}
 491
 492sub precommit_check {
 493        my $mods = shift;
 494        my (%rm_file, %rmdir_check, %added_check);
 495
 496        my %o = ( D => 0, R => 1, C => 2, A => 3, M => 3, T => 3 );
 497        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 498                if ($m->{chg} eq 'R') {
 499                        if (-d $m->{file_b}) {
 500                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 501                        }
 502                        # dir/$file => dir/file/$file
 503                        my $dirname = dirname($m->{file_b});
 504                        while ($dirname ne File::Spec->curdir) {
 505                                if ($dirname ne $m->{file_a}) {
 506                                        $dirname = dirname($dirname);
 507                                        next;
 508                                }
 509                                err_file_to_dir("$m->{file_a} => $m->{file_b}");
 510                        }
 511                        # baz/zzz => baz (baz is a file)
 512                        $dirname = dirname($m->{file_a});
 513                        while ($dirname ne File::Spec->curdir) {
 514                                if ($dirname ne $m->{file_b}) {
 515                                        $dirname = dirname($dirname);
 516                                        next;
 517                                }
 518                                err_dir_to_file("$m->{file_a} => $m->{file_b}");
 519                        }
 520                }
 521                if ($m->{chg} =~ /^(D|R)$/) {
 522                        my $t = $1 eq 'D' ? 'file_b' : 'file_a';
 523                        $rm_file{ $m->{$t} } = 1;
 524                        my $dirname = dirname( $m->{$t} );
 525                        my $basename = basename( $m->{$t} );
 526                        $rmdir_check{$dirname}->{$basename} = 1;
 527                } elsif ($m->{chg} =~ /^(?:A|C)$/) {
 528                        if (-d $m->{file_b}) {
 529                                err_dir_to_file($m->{file_b});
 530                        }
 531                        my $dirname = dirname( $m->{file_b} );
 532                        my $basename = basename( $m->{file_b} );
 533                        $added_check{$dirname}->{$basename} = 1;
 534                        while ($dirname ne File::Spec->curdir) {
 535                                if ($rm_file{$dirname}) {
 536                                        err_file_to_dir($m->{file_b});
 537                                }
 538                                $dirname = dirname $dirname;
 539                        }
 540                }
 541        }
 542        return (\%rmdir_check, \%added_check);
 543
 544        sub err_dir_to_file {
 545                my $file = shift;
 546                print STDERR "Node change from directory to file ",
 547                                "is not supported by Subversion: ",$file,"\n";
 548                exit 1;
 549        }
 550        sub err_file_to_dir {
 551                my $file = shift;
 552                print STDERR "Node change from file to directory ",
 553                                "is not supported by Subversion: ",$file,"\n";
 554                exit 1;
 555        }
 556}
 557
 558sub svn_checkout_tree {
 559        my ($svn_rev, $treeish) = @_;
 560        my $from = file_to_s("$REV_DIR/$svn_rev");
 561        assert_svn_wc_clean($svn_rev,$from);
 562        print "diff-tree $from $treeish\n";
 563        my $pid = open my $diff_fh, '-|';
 564        defined $pid or croak $!;
 565        if ($pid == 0) {
 566                my @diff_tree = qw(git-diff-tree -z -r -C);
 567                push @diff_tree, '--find-copies-harder' if $_find_copies_harder;
 568                push @diff_tree, "-l$_l" if defined $_l;
 569                exec(@diff_tree, $from, $treeish) or croak $!;
 570        }
 571        my $mods = parse_diff_tree($diff_fh);
 572        unless (@$mods) {
 573                # git can do empty commits, but SVN doesn't allow it...
 574                return $mods;
 575        }
 576        my ($rm, $add) = precommit_check($mods);
 577
 578        my %o = ( D => 1, R => 0, C => -1, A => 3, M => 3, T => 3 );
 579        foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
 580                if ($m->{chg} eq 'C') {
 581                        svn_ensure_parent_path( $m->{file_b} );
 582                        sys(qw(svn cp),         $m->{file_a}, $m->{file_b});
 583                        apply_mod_line_blob($m);
 584                        svn_check_prop_executable($m);
 585                } elsif ($m->{chg} eq 'D') {
 586                        sys(qw(svn rm --force), $m->{file_b});
 587                } elsif ($m->{chg} eq 'R') {
 588                        svn_ensure_parent_path( $m->{file_b} );
 589                        sys(qw(svn mv --force), $m->{file_a}, $m->{file_b});
 590                        apply_mod_line_blob($m);
 591                        svn_check_prop_executable($m);
 592                } elsif ($m->{chg} eq 'M') {
 593                        apply_mod_line_blob($m);
 594                        svn_check_prop_executable($m);
 595                } elsif ($m->{chg} eq 'T') {
 596                        sys(qw(svn rm --force),$m->{file_b});
 597                        apply_mod_line_blob($m);
 598                        sys(qw(svn add --force), $m->{file_b});
 599                        svn_check_prop_executable($m);
 600                } elsif ($m->{chg} eq 'A') {
 601                        svn_ensure_parent_path( $m->{file_b} );
 602                        apply_mod_line_blob($m);
 603                        sys(qw(svn add --force), $m->{file_b});
 604                        svn_check_prop_executable($m);
 605                } else {
 606                        croak "Invalid chg: $m->{chg}\n";
 607                }
 608        }
 609
 610        assert_tree($treeish);
 611        if ($_rmdir) { # remove empty directories
 612                handle_rmdir($rm, $add);
 613        }
 614        assert_tree($treeish);
 615        return $mods;
 616}
 617
 618# svn ls doesn't work with respect to the current working tree, but what's
 619# in the repository.  There's not even an option for it... *sigh*
 620# (added files don't show up and removed files remain in the ls listing)
 621sub svn_ls_current {
 622        my ($dir, $rm, $add) = @_;
 623        chomp(my @ls = safe_qx('svn','ls',$dir));
 624        my @ret = ();
 625        foreach (@ls) {
 626                s#/$##; # trailing slashes are evil
 627                push @ret, $_ unless $rm->{$dir}->{$_};
 628        }
 629        if (exists $add->{$dir}) {
 630                push @ret, keys %{$add->{$dir}};
 631        }
 632        return \@ret;
 633}
 634
 635sub handle_rmdir {
 636        my ($rm, $add) = @_;
 637
 638        foreach my $dir (sort {length $b <=> length $a} keys %$rm) {
 639                my $ls = svn_ls_current($dir, $rm, $add);
 640                next if (scalar @$ls);
 641                sys(qw(svn rm --force),$dir);
 642
 643                my $dn = dirname $dir;
 644                $rm->{ $dn }->{ basename $dir } = 1;
 645                $ls = svn_ls_current($dn, $rm, $add);
 646                while (scalar @$ls == 0 && $dn ne File::Spec->curdir) {
 647                        sys(qw(svn rm --force),$dn);
 648                        $dir = basename $dn;
 649                        $dn = dirname $dn;
 650                        $rm->{ $dn }->{ $dir } = 1;
 651                        $ls = svn_ls_current($dn, $rm, $add);
 652                }
 653        }
 654}
 655
 656sub svn_commit_tree {
 657        my ($svn_rev, $commit) = @_;
 658        my $commit_msg = "$GIT_DIR/$GIT_SVN/.svn-commit.tmp.$$";
 659        my %log_msg = ( msg => '' );
 660        open my $msg, '>', $commit_msg or croak $!;
 661
 662        chomp(my $type = `git-cat-file -t $commit`);
 663        if ($type eq 'commit') {
 664                my $pid = open my $msg_fh, '-|';
 665                defined $pid or croak $!;
 666
 667                if ($pid == 0) {
 668                        exec(qw(git-cat-file commit), $commit) or croak $!;
 669                }
 670                my $in_msg = 0;
 671                while (<$msg_fh>) {
 672                        if (!$in_msg) {
 673                                $in_msg = 1 if (/^\s*$/);
 674                        } elsif (/^git-svn-id: /) {
 675                                # skip this, we regenerate the correct one
 676                                # on re-fetch anyways
 677                        } else {
 678                                print $msg $_ or croak $!;
 679                        }
 680                }
 681                close $msg_fh or croak $!;
 682        }
 683        close $msg or croak $!;
 684
 685        if ($_edit || ($type eq 'tree')) {
 686                my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi';
 687                system($editor, $commit_msg);
 688        }
 689
 690        # file_to_s removes all trailing newlines, so just use chomp() here:
 691        open $msg, '<', $commit_msg or croak $!;
 692        { local $/; chomp($log_msg{msg} = <$msg>); }
 693        close $msg or croak $!;
 694
 695        my ($oneline) = ($log_msg{msg} =~ /([^\n\r]+)/);
 696        print "Committing $commit: $oneline\n";
 697
 698        my @ci_output = safe_qx(qw(svn commit -F),$commit_msg);
 699        my ($committed) = grep(/^Committed revision \d+\./,@ci_output);
 700        unlink $commit_msg;
 701        defined $committed or croak
 702                        "Commit output failed to parse committed revision!\n",
 703                        join("\n",@ci_output),"\n";
 704        my ($rev_committed) = ($committed =~ /^Committed revision (\d+)\./);
 705
 706        my @svn_up = qw(svn up);
 707        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
 708        if ($rev_committed == ($svn_rev + 1)) {
 709                push @svn_up, "-r$rev_committed";
 710                sys(@svn_up);
 711                my $info = svn_info('.');
 712                my $date = $info->{'Last Changed Date'} or die "Missing date\n";
 713                if ($info->{'Last Changed Rev'} != $rev_committed) {
 714                        croak "$info->{'Last Changed Rev'} != $rev_committed\n"
 715                }
 716                my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 717                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 718                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 719                                         or croak "Failed to parse date: $date\n";
 720                $log_msg{date} = "$tz $Y-$m-$d $H:$M:$S";
 721                $log_msg{author} = $info->{'Last Changed Author'};
 722                $log_msg{revision} = $rev_committed;
 723                $log_msg{msg} .= "\n";
 724                my $parent = file_to_s("$REV_DIR/$svn_rev");
 725                git_commit(\%log_msg, $parent, $commit);
 726                return $rev_committed;
 727        }
 728        # resync immediately
 729        push @svn_up, "-r$svn_rev";
 730        sys(@svn_up);
 731        return fetch("$rev_committed=$commit")->{revision};
 732}
 733
 734# read the entire log into a temporary file (which is removed ASAP)
 735# and store the file handle + parser state
 736sub svn_log_raw {
 737        my (@log_args) = @_;
 738        my $log_fh = IO::File->new_tmpfile or croak $!;
 739        my $pid = fork;
 740        defined $pid or croak $!;
 741        if (!$pid) {
 742                open STDOUT, '>&', $log_fh or croak $!;
 743                exec (qw(svn log), @log_args) or croak $!
 744        }
 745        waitpid $pid, 0;
 746        croak if $?;
 747        seek $log_fh, 0, 0 or croak $!;
 748        return { state => 'sep', fh => $log_fh };
 749}
 750
 751sub next_log_entry {
 752        my $log = shift; # retval of svn_log_raw()
 753        my $ret = undef;
 754        my $fh = $log->{fh};
 755
 756        while (<$fh>) {
 757                chomp;
 758                if (/^\-{72}$/) {
 759                        if ($log->{state} eq 'msg') {
 760                                if ($ret->{lines}) {
 761                                        $ret->{msg} .= $_."\n";
 762                                        unless(--$ret->{lines}) {
 763                                                $log->{state} = 'sep';
 764                                        }
 765                                } else {
 766                                        croak "Log parse error at: $_\n",
 767                                                $ret->{revision},
 768                                                "\n";
 769                                }
 770                                next;
 771                        }
 772                        if ($log->{state} ne 'sep') {
 773                                croak "Log parse error at: $_\n",
 774                                        "state: $log->{state}\n",
 775                                        $ret->{revision},
 776                                        "\n";
 777                        }
 778                        $log->{state} = 'rev';
 779
 780                        # if we have an empty log message, put something there:
 781                        if ($ret) {
 782                                $ret->{msg} ||= "\n";
 783                                delete $ret->{lines};
 784                                return $ret;
 785                        }
 786                        next;
 787                }
 788                if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
 789                        my $rev = $1;
 790                        my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
 791                        ($lines) = ($lines =~ /(\d+)/);
 792                        my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
 793                                        /(\d{4})\-(\d\d)\-(\d\d)\s
 794                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 795                                         or croak "Failed to parse date: $date\n";
 796                        $ret = {        revision => $rev,
 797                                        date => "$tz $Y-$m-$d $H:$M:$S",
 798                                        author => $author,
 799                                        lines => $lines,
 800                                        msg => '' };
 801                        if (defined $_authors && ! defined $users{$author}) {
 802                                die "Author: $author not defined in ",
 803                                                "$_authors file\n";
 804                        }
 805                        $log->{state} = 'msg_start';
 806                        next;
 807                }
 808                # skip the first blank line of the message:
 809                if ($log->{state} eq 'msg_start' && /^$/) {
 810                        $log->{state} = 'msg';
 811                } elsif ($log->{state} eq 'msg') {
 812                        if ($ret->{lines}) {
 813                                $ret->{msg} .= $_."\n";
 814                                unless (--$ret->{lines}) {
 815                                        $log->{state} = 'sep';
 816                                }
 817                        } else {
 818                                croak "Log parse error at: $_\n",
 819                                        $ret->{revision},"\n";
 820                        }
 821                }
 822        }
 823        return $ret;
 824}
 825
 826sub svn_info {
 827        my $url = shift || $SVN_URL;
 828
 829        my $pid = open my $info_fh, '-|';
 830        defined $pid or croak $!;
 831
 832        if ($pid == 0) {
 833                exec(qw(svn info),$url) or croak $!;
 834        }
 835
 836        my $ret = {};
 837        # only single-lines seem to exist in svn info output
 838        while (<$info_fh>) {
 839                chomp $_;
 840                if (m#^([^:]+)\s*:\s*(\S.*)$#) {
 841                        $ret->{$1} = $2;
 842                        push @{$ret->{-order}}, $1;
 843                }
 844        }
 845        close $info_fh or croak $!;
 846        return $ret;
 847}
 848
 849sub sys { system(@_) == 0 or croak $? }
 850
 851sub git_addremove {
 852        system( "git-diff-files --name-only -z ".
 853                                " | git-update-index --remove -z --stdin && ".
 854                "git-ls-files -z --others ".
 855                        "'--exclude-from=$GIT_DIR/$GIT_SVN/info/exclude'".
 856                                " | git-update-index --add -z --stdin"
 857                ) == 0 or croak $?
 858}
 859
 860sub s_to_file {
 861        my ($str, $file, $mode) = @_;
 862        open my $fd,'>',$file or croak $!;
 863        print $fd $str,"\n" or croak $!;
 864        close $fd or croak $!;
 865        chmod ($mode &~ umask, $file) if (defined $mode);
 866}
 867
 868sub file_to_s {
 869        my $file = shift;
 870        open my $fd,'<',$file or croak "$!: file: $file\n";
 871        local $/;
 872        my $ret = <$fd>;
 873        close $fd or croak $!;
 874        $ret =~ s/\s*$//s;
 875        return $ret;
 876}
 877
 878sub assert_revision_unknown {
 879        my $revno = shift;
 880        if (-f "$REV_DIR/$revno") {
 881                croak "$REV_DIR/$revno already exists! ",
 882                                "Why are we refetching it?";
 883        }
 884}
 885
 886sub trees_eq {
 887        my ($x, $y) = @_;
 888        my @x = safe_qx('git-cat-file','commit',$x);
 889        my @y = safe_qx('git-cat-file','commit',$y);
 890        if (($y[0] ne $x[0]) || $x[0] !~ /^tree $sha1\n$/
 891                                || $y[0] !~ /^tree $sha1\n$/) {
 892                print STDERR "Trees not equal: $y[0] != $x[0]\n";
 893                return 0
 894        }
 895        return 1;
 896}
 897
 898sub assert_revision_eq_or_unknown {
 899        my ($revno, $commit) = @_;
 900        if (-f "$REV_DIR/$revno") {
 901                my $current = file_to_s("$REV_DIR/$revno");
 902                if (($commit ne $current) && !trees_eq($commit, $current)) {
 903                        croak "$REV_DIR/$revno already exists!\n",
 904                                "current: $current\nexpected: $commit\n";
 905                }
 906                return;
 907        }
 908}
 909
 910sub git_commit {
 911        my ($log_msg, @parents) = @_;
 912        assert_revision_unknown($log_msg->{revision});
 913        my $out_fh = IO::File->new_tmpfile or croak $!;
 914
 915        map_tree_joins() if (@_branch_from && !%tree_map);
 916
 917        # commit parents can be conditionally bound to a particular
 918        # svn revision via: "svn_revno=commit_sha1", filter them out here:
 919        my @exec_parents;
 920        foreach my $p (@parents) {
 921                next unless defined $p;
 922                if ($p =~ /^(\d+)=($sha1_short)$/o) {
 923                        if ($1 == $log_msg->{revision}) {
 924                                push @exec_parents, $2;
 925                        }
 926                } else {
 927                        push @exec_parents, $p if $p =~ /$sha1_short/o;
 928                }
 929        }
 930
 931        my $pid = fork;
 932        defined $pid or croak $!;
 933        if ($pid == 0) {
 934                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
 935                git_addremove();
 936                chomp(my $tree = `git-write-tree`);
 937                croak if $?;
 938                if (exists $tree_map{$tree}) {
 939                        my %seen_parent = map { $_ => 1 } @exec_parents;
 940                        foreach (@{$tree_map{$tree}}) {
 941                                # MAXPARENT is defined to 16 in commit-tree.c:
 942                                if ($seen_parent{$_} || @exec_parents > 16) {
 943                                        next;
 944                                }
 945                                push @exec_parents, $_;
 946                                $seen_parent{$_} = 1;
 947                        }
 948                }
 949                my $msg_fh = IO::File->new_tmpfile or croak $!;
 950                print $msg_fh $log_msg->{msg}, "\ngit-svn-id: ",
 951                                        "$SVN_URL\@$log_msg->{revision}",
 952                                        " $SVN_UUID\n" or croak $!;
 953                $msg_fh->flush == 0 or croak $!;
 954                seek $msg_fh, 0, 0 or croak $!;
 955
 956                set_commit_env($log_msg);
 957
 958                my @exec = ('git-commit-tree',$tree);
 959                push @exec, '-p', $_  foreach @exec_parents;
 960                open STDIN, '<&', $msg_fh or croak $!;
 961                open STDOUT, '>&', $out_fh or croak $!;
 962                exec @exec or croak $!;
 963        }
 964        waitpid($pid,0);
 965        croak if $?;
 966
 967        $out_fh->flush == 0 or croak $!;
 968        seek $out_fh, 0, 0 or croak $!;
 969        chomp(my $commit = do { local $/; <$out_fh> });
 970        if ($commit !~ /^$sha1$/o) {
 971                croak "Failed to commit, invalid sha1: $commit\n";
 972        }
 973        my @update_ref = ('git-update-ref',"refs/remotes/$GIT_SVN",$commit);
 974        if (my $primary_parent = shift @exec_parents) {
 975                $pid = fork;
 976                defined $pid or croak $!;
 977                if (!$pid) {
 978                        close STDERR;
 979                        close STDOUT;
 980                        exec 'git-rev-parse','--verify',
 981                                                "refs/remotes/$GIT_SVN^0";
 982                }
 983                waitpid $pid, 0;
 984                push @update_ref, $primary_parent unless $?;
 985        }
 986        sys(@update_ref);
 987        sys('git-update-ref',"$GIT_SVN/revs/$log_msg->{revision}",$commit);
 988        print "r$log_msg->{revision} = $commit\n";
 989        return $commit;
 990}
 991
 992sub set_commit_env {
 993        my ($log_msg) = @_;
 994        my $author = $log_msg->{author};
 995        my ($name,$email) = defined $users{$author} ?  @{$users{$author}}
 996                                : ($author,"$author\@$SVN_UUID");
 997        $ENV{GIT_AUTHOR_NAME} = $ENV{GIT_COMMITTER_NAME} = $name;
 998        $ENV{GIT_AUTHOR_EMAIL} = $ENV{GIT_COMMITTER_EMAIL} = $email;
 999        $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_msg->{date};
1000}
1001
1002sub apply_mod_line_blob {
1003        my $m = shift;
1004        if ($m->{mode_b} =~ /^120/) {
1005                blob_to_symlink($m->{sha1_b}, $m->{file_b});
1006        } else {
1007                blob_to_file($m->{sha1_b}, $m->{file_b});
1008        }
1009}
1010
1011sub blob_to_symlink {
1012        my ($blob, $link) = @_;
1013        defined $link or croak "\$link not defined!\n";
1014        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
1015        if (-l $link || -f _) {
1016                unlink $link or croak $!;
1017        }
1018
1019        my $dest = `git-cat-file blob $blob`; # no newline, so no chomp
1020        symlink $dest, $link or croak $!;
1021}
1022
1023sub blob_to_file {
1024        my ($blob, $file) = @_;
1025        defined $file or croak "\$file not defined!\n";
1026        croak "Not a sha1: $blob\n" unless $blob =~ /^$sha1$/o;
1027        if (-l $file || -f _) {
1028                unlink $file or croak $!;
1029        }
1030
1031        open my $blob_fh, '>', $file or croak "$!: $file\n";
1032        my $pid = fork;
1033        defined $pid or croak $!;
1034
1035        if ($pid == 0) {
1036                open STDOUT, '>&', $blob_fh or croak $!;
1037                exec('git-cat-file','blob',$blob);
1038        }
1039        waitpid $pid, 0;
1040        croak $? if $?;
1041
1042        close $blob_fh or croak $!;
1043}
1044
1045sub safe_qx {
1046        my $pid = open my $child, '-|';
1047        defined $pid or croak $!;
1048        if ($pid == 0) {
1049                exec(@_) or croak $?;
1050        }
1051        my @ret = (<$child>);
1052        close $child or croak $?;
1053        die $? if $?; # just in case close didn't error out
1054        return wantarray ? @ret : join('',@ret);
1055}
1056
1057sub svn_compat_check {
1058        my @co_help = safe_qx(qw(svn co -h));
1059        unless (grep /ignore-externals/,@co_help) {
1060                print STDERR "W: Installed svn version does not support ",
1061                                "--ignore-externals\n";
1062                $_no_ignore_ext = 1;
1063        }
1064        if (grep /usage: checkout URL\[\@REV\]/,@co_help) {
1065                $_svn_co_url_revs = 1;
1066        }
1067
1068        # I really, really hope nobody hits this...
1069        unless (grep /stop-on-copy/, (safe_qx(qw(svn log -h)))) {
1070                print STDERR <<'';
1071W: The installed svn version does not support the --stop-on-copy flag in
1072   the log command.
1073   Lets hope the directory you're tracking is not a branch or tag
1074   and was never moved within the repository...
1075
1076                $_no_stop_copy = 1;
1077        }
1078}
1079
1080# *sigh*, new versions of svn won't honor -r<rev> without URL@<rev>,
1081# (and they won't honor URL@<rev> without -r<rev>, too!)
1082sub svn_cmd_checkout {
1083        my ($url, $rev, $dir) = @_;
1084        my @cmd = ('svn','co', "-r$rev");
1085        push @cmd, '--ignore-externals' unless $_no_ignore_ext;
1086        $url .= "\@$rev" if $_svn_co_url_revs;
1087        sys(@cmd, $url, $dir);
1088}
1089
1090sub check_upgrade_needed {
1091        my $old = eval {
1092                my $pid = open my $child, '-|';
1093                defined $pid or croak $!;
1094                if ($pid == 0) {
1095                        close STDERR;
1096                        exec('git-rev-parse',"$GIT_SVN-HEAD") or croak $?;
1097                }
1098                my @ret = (<$child>);
1099                close $child or croak $?;
1100                die $? if $?; # just in case close didn't error out
1101                return wantarray ? @ret : join('',@ret);
1102        };
1103        return unless $old;
1104        my $head = eval { safe_qx('git-rev-parse',"refs/remotes/$GIT_SVN") };
1105        if ($@ || !$head) {
1106                print STDERR "Please run: $0 rebuild --upgrade\n";
1107                exit 1;
1108        }
1109}
1110
1111# fills %tree_map with a reverse mapping of trees to commits.  Useful
1112# for finding parents to commit on.
1113sub map_tree_joins {
1114        foreach my $br (@_branch_from) {
1115                my $pid = open my $pipe, '-|';
1116                defined $pid or croak $!;
1117                if ($pid == 0) {
1118                        exec(qw(git-rev-list --pretty=raw), $br) or croak $?;
1119                }
1120                while (<$pipe>) {
1121                        if (/^commit ($sha1)$/o) {
1122                                my $commit = $1;
1123                                my ($tree) = (<$pipe> =~ /^tree ($sha1)$/o);
1124                                unless (defined $tree) {
1125                                        die "Failed to parse commit $commit\n";
1126                                }
1127                                push @{$tree_map{$tree}}, $commit;
1128                        }
1129                }
1130                close $pipe or croak $?;
1131        }
1132}
1133
1134# '<svn username> = real-name <email address>' mapping based on git-svnimport:
1135sub load_authors {
1136        open my $authors, '<', $_authors or die "Can't open $_authors $!\n";
1137        while (<$authors>) {
1138                chomp;
1139                next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
1140                my ($user, $name, $email) = ($1, $2, $3);
1141                $users{$user} = [$name, $email];
1142        }
1143        close $authors or croak $!;
1144}
1145
1146__END__
1147
1148Data structures:
1149
1150$svn_log hashref (as returned by svn_log_raw)
1151{
1152        fh => file handle of the log file,
1153        state => state of the log file parser (sep/msg/rev/msg_start...)
1154}
1155
1156$log_msg hashref as returned by next_log_entry($svn_log)
1157{
1158        msg => 'whitespace-formatted log entry
1159',                                              # trailing newline is preserved
1160        revision => '8',                        # integer
1161        date => '2004-02-24T17:01:44.108345Z',  # commit date
1162        author => 'committer name'
1163};
1164
1165
1166@mods = array of diff-index line hashes, each element represents one line
1167        of diff-index output
1168
1169diff-index line ($m hash)
1170{
1171        mode_a => first column of diff-index output, no leading ':',
1172        mode_b => second column of diff-index output,
1173        sha1_b => sha1sum of the final blob,
1174        chg => change type [MCRADT],
1175        file_a => original file name of a file (iff chg is 'C' or 'R')
1176        file_b => new/current file name of a file (any chg)
1177}
1178;