git-svn: add the commit-diff command
[gitweb.git] / contrib / git-svn / git-svn.perl
index 08c30103f5c247559e15ebd4bb20d7177889ed88..4bdc9766cdd04cb8a62f437c71fa04b4738312d7 100755 (executable)
@@ -34,6 +34,8 @@
 use IPC::Open3;
 use Memoize;
 memoize('revisions_eq');
+memoize('cmt_metadata');
+memoize('get_commit_time');
 
 my ($SVN_PATH, $SVN, $SVN_LOG, $_use_lib);
 $_use_lib = 1 unless $ENV{GIT_SVN_NO_LIB};
@@ -44,6 +46,7 @@
 my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
        $_find_copies_harder, $_l, $_cp_similarity, $_cp_remote,
        $_repack, $_repack_nr, $_repack_flags,
+       $_message, $_file,
        $_template, $_shared, $_no_default_regex, $_no_graft_copy,
        $_limit, $_verbose, $_incremental, $_oneline, $_l_fmt, $_show_commit,
        $_version, $_upgrade, $_authors, $_branch_all_refs, @_opt_m);
                'tags|t=s' => \$_tags,
                'branches|b=s' => \$_branches );
 my %init_opts = ( 'template=s' => \$_template, 'shared' => \$_shared );
+my %cmt_opts = ( 'edit|e' => \$_edit,
+               'rmdir' => \$_rmdir,
+               'find-copies-harder' => \$_find_copies_harder,
+               'l=i' => \$_l,
+               'copy-similarity|C=i'=> \$_cp_similarity
+);
 
 # yes, 'native' sets "\n".  Patches to fix this for non-*nix systems welcome:
 my %EOL = ( CR => "\015", LF => "\012", CRLF => "\015\012", native => "\012" );
                          " (requires URL argument)",
                          \%init_opts ],
        commit => [ \&commit, "Commit git revisions to SVN",
-                       {       'stdin|' => \$_stdin,
-                               'edit|e' => \$_edit,
-                               'rmdir' => \$_rmdir,
-                               'find-copies-harder' => \$_find_copies_harder,
-                               'l=i' => \$_l,
-                               'copy-similarity|C=i'=> \$_cp_similarity,
-                               %fc_opts,
-                       } ],
+                       {       'stdin|' => \$_stdin, %cmt_opts, %fc_opts, } ],
        'show-ignore' => [ \&show_ignore, "Show svn:ignore listings",
                        { 'revision|r=i' => \$_revision } ],
        rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)",
@@ -91,6 +93,8 @@
        'graft-branches' => [ \&graft_branches,
                        'Detect merges/branches from already imported history',
                        { 'merge-rx|m' => \@_opt_m,
+                         'branch|b=s' => \@_branch_from,
+                         'branch-all-refs|B' => \$_branch_all_refs,
                          'no-default-regex' => \$_no_default_regex,
                          'no-graft-copy' => \$_no_graft_copy } ],
        'multi-init' => [ \&multi_init,
                          'show-commit' => \$_show_commit,
                          'authors-file|A=s' => \$_authors,
                        } ],
+       'commit-diff' => [ \&commit_diff, 'Commit a diff between two trees',
+                       { 'message|m=s' => \$_message,
+                         'file|F=s' => \$_file,
+                       %cmt_opts } ],
 );
 
 my $cmd;
 init_vars();
 load_authors() if $_authors;
 load_all_refs() if $_branch_all_refs;
-svn_compat_check();
+svn_compat_check() unless $_use_lib;
 migration_check() unless $cmd =~ /^(?:init|rebuild|multi-init)$/;
 $cmd{$cmd}->[0]->(@ARGV);
 exit 0;
@@ -379,7 +387,8 @@ sub fetch_lib {
                        # performance sucks with it enabled, so it's much
                        # faster to fetch revision ranges instead of relying
                        # on the limiter.
-                       $SVN_LOG->get_log( '/'.$SVN_PATH, $min, $max, 0, 1, 1,
+                       libsvn_get_log($SVN_LOG, '/'.$SVN_PATH,
+                                       $min, $max, 0, 1, 1,
                                sub {
                                        my $log_msg;
                                        if ($last_commit) {
@@ -479,11 +488,7 @@ sub commit_lib {
        my @lock = $SVN::Core::VERSION ge '1.2.0' ? (undef, 0) : ();
        my $commit_msg = "$GIT_SVN_DIR/.svn-commit.tmp.$$";
 
-       if (defined $LC_ALL) {
-               $ENV{LC_ALL} = $LC_ALL;
-       } else {
-               delete $ENV{LC_ALL};
-       }
+       set_svn_commit_env();
        foreach my $c (@revs) {
                my $log_msg = get_commit_message($c, $commit_msg);
 
@@ -589,13 +594,14 @@ sub graft_branches {
        my $l_map = read_url_paths();
        my @re = map { qr/$_/is } @_opt_m if @_opt_m;
        unless ($_no_default_regex) {
-               push @re, (     qr/\b(?:merge|merging|merged)\s+(\S.+)/is,
-                               qr/\b(?:from|of)\s+(\S.+)/is );
+               push @re, (qr/\b(?:merge|merging|merged)\s+with\s+([\w\.\-]+)/i,
+                       qr/\b(?:merge|merging|merged)\s+([\w\.\-]+)/i,
+                       qr/\b(?:from|of)\s+([\w\.\-]+)/i );
        }
        foreach my $u (keys %$l_map) {
                if (@re) {
                        foreach my $p (keys %{$l_map->{$u}}) {
-                               graft_merge_msg($grafts,$l_map,$u,$p);
+                               graft_merge_msg($grafts,$l_map,$u,$p,@re);
                        }
                }
                unless ($_no_graft_copy) {
@@ -606,6 +612,7 @@ sub graft_branches {
                        }
                }
        }
+       graft_tree_joins($grafts);
 
        write_grafts($grafts, $comments, $gr_file);
        unlink "$gr_file~$gr_sha1" if $gr_sha1;
@@ -716,6 +723,55 @@ sub show_log {
        print '-' x72,"\n" unless $_incremental || $_oneline;
 }
 
+sub commit_diff_usage {
+       print STDERR "Usage: $0 commit-diff <tree-ish> <tree-ish> [<URL>]\n";
+       exit 1
+}
+
+sub commit_diff {
+       if (!$_use_lib) {
+               print STDERR "commit-diff must be used with SVN libraries\n";
+               exit 1;
+       }
+       my $ta = shift or commit_diff_usage();
+       my $tb = shift or commit_diff_usage();
+       if (!eval { $SVN_URL = shift || file_to_s("$GIT_SVN_DIR/info/url") }) {
+               print STDERR "Needed URL or usable git-svn id command-line\n";
+               commit_diff_usage();
+       }
+       if (defined $_message && defined $_file) {
+               print STDERR "Both --message/-m and --file/-F specified ",
+                               "for the commit message.\n",
+                               "I have no idea what you mean\n";
+               exit 1;
+       }
+       if (defined $_file) {
+               $_message = file_to_s($_message);
+       } else {
+               $_message ||= get_commit_message($tb,
+                                       "$GIT_DIR/.svn-commit.tmp.$$")->{msg};
+       }
+       my $repo;
+       ($repo, $SVN_PATH) = repo_path_split($SVN_URL);
+       $SVN_LOG ||= libsvn_connect($repo);
+       $SVN ||= libsvn_connect($repo);
+       my @lock = $SVN::Core::VERSION ge '1.2.0' ? (undef, 0) : ();
+       my $ed = SVN::Git::Editor->new({        r => $SVN->get_latest_revnum,
+                                               ra => $SVN, c => $tb,
+                                               svn_path => $SVN_PATH
+                                       },
+                               $SVN->get_commit_editor($_message,
+                                       sub {print "Committed $_[0]\n"},@lock)
+                               );
+       my $mods = libsvn_checkout_tree($ta, $tb, $ed);
+       if (@$mods == 0) {
+               print "No changes\n$ta == $tb\n";
+               $ed->abort_edit;
+       } else {
+               $ed->close_edit;
+       }
+}
+
 ########################### utility functions #########################
 
 sub cmt_showable {
@@ -878,6 +934,77 @@ sub common_prefix {
        return '';
 }
 
+# grafts set here are 'stronger' in that they're based on actual tree
+# matches, and won't be deleted from merge-base checking in write_grafts()
+sub graft_tree_joins {
+       my $grafts = shift;
+       map_tree_joins() if (@_branch_from && !%tree_map);
+       return unless %tree_map;
+
+       git_svn_each(sub {
+               my $i = shift;
+               defined(my $pid = open my $fh, '-|') or croak $!;
+               if (!$pid) {
+                       exec qw/git-rev-list --pretty=raw/,
+                                       "refs/remotes/$i" or croak $!;
+               }
+               while (<$fh>) {
+                       next unless /^commit ($sha1)$/o;
+                       my $c = $1;
+                       my ($t) = (<$fh> =~ /^tree ($sha1)$/o);
+                       next unless $tree_map{$t};
+
+                       my $l;
+                       do {
+                               $l = readline $fh;
+                       } until ($l =~ /^committer (?:.+) (\d+) ([\-\+]?\d+)$/);
+
+                       my ($s, $tz) = ($1, $2);
+                       if ($tz =~ s/^\+//) {
+                               $s += tz_to_s_offset($tz);
+                       } elsif ($tz =~ s/^\-//) {
+                               $s -= tz_to_s_offset($tz);
+                       }
+
+                       my ($url_a, $r_a, $uuid_a) = cmt_metadata($c);
+
+                       foreach my $p (@{$tree_map{$t}}) {
+                               next if $p eq $c;
+                               my $mb = eval {
+                                       safe_qx('git-merge-base', $c, $p)
+                               };
+                               next unless ($@ || $?);
+                               if (defined $r_a) {
+                                       # see if SVN says it's a relative
+                                       my ($url_b, $r_b, $uuid_b) =
+                                                       cmt_metadata($p);
+                                       next if (defined $url_b &&
+                                                       defined $url_a &&
+                                                       ($url_a eq $url_b) &&
+                                                       ($uuid_a eq $uuid_b));
+                                       if ($uuid_a eq $uuid_b) {
+                                               if ($r_b < $r_a) {
+                                                       $grafts->{$c}->{$p} = 2;
+                                                       next;
+                                               } elsif ($r_b > $r_a) {
+                                                       $grafts->{$p}->{$c} = 2;
+                                                       next;
+                                               }
+                                       }
+                               }
+                               my $ct = get_commit_time($p);
+                               if ($ct < $s) {
+                                       $grafts->{$c}->{$p} = 2;
+                               } elsif ($ct > $s) {
+                                       $grafts->{$p}->{$c} = 2;
+                               }
+                               # what should we do when $ct == $s ?
+                       }
+               }
+               close $fh or croak $?;
+       });
+}
+
 # this isn't funky-filename safe, but good enough for now...
 sub graft_file_copy_cmd {
        my ($grafts, $l_map, $u) = @_;
@@ -924,7 +1051,7 @@ sub graft_file_copy_lib {
        $SVN::Error::handler = \&libsvn_skip_unknown_revs;
        while (1) {
                my $pool = SVN::Pool->new;
-               $SVN_LOG->get_log( "/$path", $min, $max, 0, 1, 1,
+               libsvn_get_log($SVN_LOG, "/$path", $min, $max, 0, 1, 1,
                        sub {
                                libsvn_graft_file_copies($grafts, $tree_paths,
                                                        $path, @_);
@@ -956,7 +1083,7 @@ sub process_merge_msg_matches {
                my $re = qr/\Q$w\E/i;
                foreach (keys %{$l_map->{$u}}) {
                        if (/$re/) {
-                               push @strong, $_;
+                               push @strong, $l_map->{$u}->{$_};
                                last;
                        }
                }
@@ -965,7 +1092,7 @@ sub process_merge_msg_matches {
                $re = qr/\Q$w\E/i;
                foreach (keys %{$l_map->{$u}}) {
                        if (/$re/) {
-                               push @strong, $_;
+                               push @strong, $l_map->{$u}->{$_};
                                last;
                        }
                }
@@ -978,7 +1105,7 @@ sub process_merge_msg_matches {
                return unless defined $rev;
        }
        foreach my $m (@strong) {
-               my ($r0, $s0) = find_rev_before($rev, $m);
+               my ($r0, $s0) = find_rev_before($rev, $m, 1);
                $grafts->{$c->{c}}->{$s0} = 1 if defined $s0;
        }
 }
@@ -1392,7 +1519,6 @@ sub get_commit_message {
        my %log_msg = ( msg => '' );
        open my $msg, '>', $commit_msg or croak $!;
 
-       print "commit: $commit\n";
        chomp(my $type = `git-cat-file -t $commit`);
        if ($type eq 'commit') {
                my $pid = open my $msg_fh, '-|';
@@ -1429,6 +1555,14 @@ sub get_commit_message {
        return \%log_msg;
 }
 
+sub set_svn_commit_env {
+       if (defined $LC_ALL) {
+               $ENV{LC_ALL} = $LC_ALL;
+       } else {
+               delete $ENV{LC_ALL};
+       }
+}
+
 sub svn_commit_tree {
        my ($last, $commit) = @_;
        my $commit_msg = "$GIT_SVN_DIR/.svn-commit.tmp.$$";
@@ -1436,11 +1570,7 @@ sub svn_commit_tree {
        my ($oneline) = ($log_msg->{msg} =~ /([^\n\r]+)/);
        print "Committing $commit: $oneline\n";
 
-       if (defined $LC_ALL) {
-               $ENV{LC_ALL} = $LC_ALL;
-       } else {
-               delete $ENV{LC_ALL};
-       }
+       set_svn_commit_env();
        my @ci_output = safe_qx(qw(svn commit -F),$commit_msg);
        $ENV{LC_ALL} = 'C';
        unlink $commit_msg;
@@ -1790,7 +1920,26 @@ sub git_commit {
                restore_index($index);
        }
        if (exists $tree_map{$tree}) {
-               push @tmp_parents, @{$tree_map{$tree}};
+               foreach my $p (@{$tree_map{$tree}}) {
+                       my $skip;
+                       foreach (@tmp_parents) {
+                               # see if a common parent is found
+                               my $mb = eval {
+                                       safe_qx('git-merge-base', $_, $p)
+                               };
+                               next if ($@ || $?);
+                               $skip = 1;
+                               last;
+                       }
+                       next if $skip;
+                       my ($url_p, $r_p, $uuid_p) = cmt_metadata($p);
+                       next if (($SVN_UUID eq $uuid_p) &&
+                                               ($log_msg->{revision} > $r_p));
+                       next if (defined $url_p && defined $SVN_URL &&
+                                               ($SVN_UUID eq $uuid_p) &&
+                                               ($url_p eq $SVN_URL));
+                       push @tmp_parents, $p;
+               }
        }
        foreach (@tmp_parents) {
                next if $seen_parent{$_};
@@ -2118,6 +2267,7 @@ sub init_vars {
        $GIT_SVN_INDEX = "$GIT_SVN_DIR/index";
        $SVN_URL = undef;
        $SVN_WC = "$GIT_SVN_DIR/tree";
+       %tree_map = ();
 }
 
 # convert GetOpt::Long specs for use by git-repo-config
@@ -2185,6 +2335,7 @@ sub write_grafts {
                        print $fh $_ foreach @{$comments->{$c}};
                }
                my $p = $grafts->{$c};
+               my %x; # real parents
                delete $p->{$c}; # commits are not self-reproducing...
                my $pid = open my $ch, '-|';
                defined $pid or croak $!;
@@ -2192,13 +2343,41 @@ sub write_grafts {
                        exec(qw/git-cat-file commit/, $c) or croak $!;
                }
                while (<$ch>) {
-                       if (/^parent ([a-f\d]{40})/) {
-                               $p->{$1} = 1;
+                       if (/^parent ($sha1)/) {
+                               $x{$1} = $p->{$1} = 1;
                        } else {
-                               last unless /^\S/i;
+                               last unless /^\S/;
                        }
                }
                close $ch; # breaking the pipe
+
+               # if real parents are the only ones in the grafts, drop it
+               next if join(' ',sort keys %$p) eq join(' ',sort keys %x);
+
+               my (@ip, @jp, $mb);
+               my %del = %x;
+               @ip = @jp = keys %$p;
+               foreach my $i (@ip) {
+                       next if $del{$i} || $p->{$i} == 2;
+                       foreach my $j (@jp) {
+                               next if $i eq $j || $del{$j} || $p->{$j} == 2;
+                               $mb = eval { safe_qx('git-merge-base',$i,$j) };
+                               next unless $mb;
+                               chomp $mb;
+                               next if $x{$mb};
+                               if ($mb eq $j) {
+                                       delete $p->{$i};
+                                       $del{$i} = 1;
+                               } elsif ($mb eq $i) {
+                                       delete $p->{$j};
+                                       $del{$j} = 1;
+                               }
+                       }
+               }
+
+               # if real parents are the only ones in the grafts, drop it
+               next if join(' ',sort keys %$p) eq join(' ',sort keys %x);
+
                print $fh $c, ' ', join(' ', sort keys %$p),"\n";
        }
        if ($comments->{'END'}) {
@@ -2218,7 +2397,7 @@ sub read_url_paths {
 }
 
 sub extract_metadata {
-       my $id = shift;
+       my $id = shift or return (undef, undef, undef);
        my ($url, $rev, $uuid) = ($id =~ /^git-svn-id:\s(\S+?)\@(\d+)
                                                        \s([a-f\d\-]+)$/x);
        if (!$rev || !$uuid || !$url) {
@@ -2229,6 +2408,31 @@ sub extract_metadata {
        return ($url, $rev, $uuid);
 }
 
+sub cmt_metadata {
+       return extract_metadata((grep(/^git-svn-id: /,
+               safe_qx(qw/git-cat-file commit/, shift)))[-1]);
+}
+
+sub get_commit_time {
+       my $cmt = shift;
+       defined(my $pid = open my $fh, '-|') or croak $!;
+       if (!$pid) {
+               exec qw/git-rev-list --pretty=raw -n1/, $cmt or croak $!;
+       }
+       while (<$fh>) {
+               /^committer\s(?:.+) (\d+) ([\-\+]?\d+)$/ or next;
+               my ($s, $tz) = ($1, $2);
+               if ($tz =~ s/^\+//) {
+                       $s += tz_to_s_offset($tz);
+               } elsif ($tz =~ s/^\-//) {
+                       $s -= tz_to_s_offset($tz);
+               }
+               close $fh;
+               return $s;
+       }
+       die "Can't get commit time for commit: $cmt\n";
+}
+
 sub tz_to_s_offset {
        my ($tz) = @_;
        $tz =~ s/(\d\d)$//;
@@ -2358,8 +2562,8 @@ sub libsvn_load {
        return unless $_use_lib;
        $_use_lib = eval {
                require SVN::Core;
-               if ($SVN::Core::VERSION lt '1.2.1') {
-                       die "Need SVN::Core 1.2.1 or better ",
+               if ($SVN::Core::VERSION lt '1.1.0') {
+                       die "Need SVN::Core 1.1.0 or better ",
                                        "(got $SVN::Core::VERSION) ",
                                        "Falling back to command-line svn\n";
                }
@@ -2392,9 +2596,15 @@ sub libsvn_get_file {
        my $pool = SVN::Pool->new;
        defined($pid = open3($in, $out, '>&STDERR',
                                qw/git-hash-object -w --stdin/)) or croak $!;
-       my ($r, $props) = $SVN->get_file($f, $rev, $in, $pool);
+       # redirect STDOUT for SVN 1.1.x compatibility
+       open my $stdout, '>&', \*STDOUT or croak $!;
+       open STDOUT, '>&', $in or croak $!;
+       $| = 1; # not sure if this is necessary, better safe than sorry...
+       my ($r, $props) = $SVN->get_file($f, $rev, \*STDOUT, $pool);
        $in->flush == 0 or croak $!;
+       open STDOUT, '>&', $stdout or croak $!;
        close $in or croak $!;
+       close $stdout or croak $!;
        $pool->clear;
        chomp($hash = do { local $/; <$out> });
        close $out or croak $!;
@@ -2491,8 +2701,7 @@ sub svn_grab_base_rev {
        chomp(my $c = do { local $/; <$fh> });
        close $fh;
        if (defined $c && length $c) {
-               my ($url, $rev, $uuid) = extract_metadata((grep(/^git-svn-id: /,
-                       safe_qx(qw/git-cat-file commit/, $c)))[-1]);
+               my ($url, $rev, $uuid) = cmt_metadata($c);
                return ($rev, $c);
        }
        return (undef, undef);
@@ -2566,7 +2775,8 @@ sub revisions_eq {
        if ($_use_lib) {
                # should be OK to use Pool here (r1 - r0) should be small
                my $pool = SVN::Pool->new;
-               $SVN->get_log("/$path", $r0, $r1, 0, 1, 1, sub {$nr++},$pool);
+               libsvn_get_log($SVN, "/$path", $r0, $r1,
+                               0, 1, 1, sub {$nr++}, $pool);
                $pool->clear;
        } else {
                my ($url, undef) = repo_path_split($SVN_URL);
@@ -2606,6 +2816,14 @@ sub libsvn_find_parent_branch {
        return undef;
 }
 
+sub libsvn_get_log {
+       my ($ra, @args) = @_;
+       if ($SVN::Core::VERSION le '1.2.0') {
+               splice(@args, 3, 1);
+       }
+       $ra->get_log(@args);
+}
+
 sub libsvn_new_tree {
        if (my $log_entry = libsvn_find_parent_branch(@_)) {
                return $log_entry;
@@ -2639,6 +2857,10 @@ sub find_graft_path_parents {
                my $i = $tree_paths->{$x};
                my ($r, $parent) = find_rev_before($r0, $i, 1);
                if (defined $r && defined $parent && revisions_eq($x,$r,$r0)) {
+                       my ($url_b, undef, $uuid_b) = cmt_metadata($c);
+                       my ($url_a, undef, $uuid_a) = cmt_metadata($parent);
+                       next if ($url_a && $url_b && $url_a eq $url_b &&
+                                                       $uuid_b eq $uuid_a);
                        $grafts->{$c}->{$parent} = 1;
                }
        }