git-fetch-pack: Do not use git-rev-list
[gitweb.git] / git-svnimport.perl
index b880297e0c3fdb5778f66311f6eef89c6dd16ed7..20a85724cb1660a3411fa34717cd5f1e98ebd731 100755 (executable)
@@ -5,8 +5,7 @@
 #
 # The basic idea is to pull and analyze SVN changes.
 #
-# Checking out the files is done by a single long-running CVS connection
-# / server process.
+# Checking out the files is done by a single long-running SVN connection.
 #
 # The head revision is on branch "origin" by default.
 # You can change that with the '-o' option.
 use SVN::Core;
 use SVN::Ra;
 
-die "Need CVN:COre 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
+die "Need CVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
 
 $SIG{'PIPE'}="IGNORE";
 $ENV{'TZ'}="UTC";
 
-our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l);
+our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l,$opt_d,$opt_D);
 
 sub usage() {
        print STDERR <<END;
 Usage: ${\basename $0}     # fetch/update GIT from CVS
        [-o branch-for-HEAD] [-h] [-v] [-l max_num_changes]
        [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
-       [-i] [-u] [-s start_chg] [-m] [-M regex] [SVN_URL]
+       [-d|-D] [-i] [-u] [-s start_chg] [-m] [-M regex] [SVN_URL]
 END
        exit(1);
 }
 
-getopts("b:C:hil:mM:o:s:t:T:uv") or usage();
+getopts("b:C:dDhil:mM:o:s:t:T:uv") or usage();
 usage if $opt_h;
 
 my $tag_name = $opt_t || "tags";
 my $trunk_name = $opt_T || "trunk";
 my $branch_name = $opt_b || "branches";
 
-@ARGV <= 1 or usage();
+@ARGV == 1 or @ARGV == 2 or usage();
 
 $opt_o ||= "origin";
+$opt_s ||= 1;
 $opt_l = 100 unless defined $opt_l;
 my $git_tree = $opt_C;
 $git_tree ||= ".";
 
-my $cvs_tree;
-if ($#ARGV == 0) {
-       $cvs_tree = $ARGV[0];
-} elsif (-f 'CVS/Repository') {
-       open my $f, '<', 'CVS/Repository' or 
-           die 'Failed to open CVS/Repository';
-       $cvs_tree = <$f>;
-       chomp $cvs_tree;
-       close $f;
-} else {
-       usage();
-}
+my $svn_url = $ARGV[0];
+my $svn_dir = $ARGV[1];
 
 our @mergerx = ();
 if ($opt_m) {
@@ -117,30 +107,44 @@ sub conn {
 
 sub file {
        my($self,$path,$rev) = @_;
-       my $res;
 
-       my ($fh, $name) = tempfile('gitsvn.XXXXXX', 
+       my ($fh, $name) = tempfile('gitsvn.XXXXXX',
                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 
        print "... $rev $path ...\n" if $opt_v;
        eval { $self->{'svn'}->get_file($path,$rev,$fh); };
-       if ($@ and $@ !~ /Attempted to get checksum/) {
-           # retry
-           $self->conn();
-               eval { $self->{'svn'}->get_file($path,$rev,$fh); };
-       };
-       return () if $@ and $@ !~ /Attempted to get checksum/;
-       die $@ if $@;
+       if($@) {
+               return undef if $@ =~ /Attempted to get checksum/;
+               die $@;
+       }
        close ($fh);
 
-       return ($name, $res);
+       return $name;
 }
 
-
 package main;
+use URI;
 
-my $svn = SVNconn->new($cvs_tree);
+my $svn = $svn_url;
+$svn .= "/$svn_dir" if defined $svn_dir;
+$svn = SVNconn->new($svn);
 
+my $lwp_ua;
+if($opt_d or $opt_D) {
+       $svn_url = URI->new($svn_url)->canonical;
+       if($opt_D) {
+               $svn_dir =~ s#/*$#/#;
+       } else {
+               $svn_dir = "";
+       }
+       if ($svn_url->scheme eq "http") {
+               use LWP::UserAgent;
+               $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []);
+       } else {
+               print STDERR "Warning: not HTTP; turning off direct file access\n";
+               $opt_d=0;
+       }
+}
 
 sub pdate($) {
        my($d) = @_;
@@ -159,9 +163,9 @@ ()
 
 sub get_headref($$) {
     my $name    = shift;
-    my $git_dir = shift; 
+    my $git_dir = shift;
     my $sha;
-    
+
     if (open(C,"$git_dir/refs/heads/$name")) {
        chomp($sha = <C>);
        close(C);
@@ -193,7 +197,7 @@ ($$)
 my $maxnum = 0;
 my $last_rev = "";
 my $last_branch;
-my $current_rev = $opt_s ? ($opt_s-1) : 0;
+my $current_rev = $opt_s-1;
 unless(-d $git_dir) {
        system("git-init-db");
        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
@@ -231,7 +235,7 @@ ($$)
 
        $forward_master =
            $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
-           system('cmp', '-s', "$git_dir/refs/heads/master", 
+           system('cmp', '-s', "$git_dir/refs/heads/master",
                                "$git_dir/refs/heads/$opt_o") == 0;
 
        # populate index
@@ -254,25 +258,6 @@ ($$)
 
 open BRANCHES,">>", "$git_dir/svn2git";
 
-
-## cvsps output:
-#---------------------
-#PatchSet 314
-#Date: 1999/09/18 13:03:59
-#Author: wkoch
-#Branch: STABLE-BRANCH-1-0
-#Ancestor branch: HEAD
-#Tag: (none)
-#Log:
-#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
-#Members:
-#      README:1.57->1.57.2.1
-#      VERSION:1.96->1.96.2.1
-#
-#---------------------
-
-my $state = 0;
-
 sub get_file($$$) {
        my($rev,$branch,$path) = @_;
 
@@ -280,22 +265,45 @@ ($$$)
        my $svnpath;
        $path = "" if $path eq "/"; # this should not happen, but ...
        if($branch eq "/") {
-               $svnpath = "/$trunk_name/$path";
+               $svnpath = "$trunk_name/$path";
        } elsif($branch =~ m#^/#) {
-               $svnpath = "/$tag_name$branch/$path";
+               $svnpath = "$tag_name$branch/$path";
        } else {
-               $svnpath = "/$branch_name/$branch/$path";
+               $svnpath = "$branch_name/$branch/$path";
        }
 
        # now get it
-       my ($name, $res) = eval { $svn->file($svnpath,$rev); };
-       return () unless defined $name;
+       my $name;
+       if($opt_d) {
+               my($req,$res);
+
+               # /svn/!svn/bc/2/django/trunk/django-docs/build.py
+               my $url=$svn_url->clone();
+               $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath");
+               print "... $path...\n" if $opt_v;
+               $req = HTTP::Request->new(GET => $url);
+               $res = $lwp_ua->request($req);
+               if ($res->is_success) {
+                       my $fh;
+                       ($fh, $name) = tempfile('gitsvn.XXXXXX',
+                       DIR => File::Spec->tmpdir(), UNLINK => 1);
+                       print $fh $res->content;
+                       close($fh) or die "Could not write $name: $!\n";
+               } else {
+                       return undef if $res->code == 301; # directory?
+                       die $res->status_line." at $url\n";
+               }
+       } else {
+               $name = $svn->file("/$svnpath",$rev);
+               return undef unless defined $name;
+       }
 
        open my $F, '-|', "git-hash-object", "-w", $name
                or die "Cannot create object: $!\n";
        my $sha = <$F>;
        chomp $sha;
        close $F;
+       unlink $name;
        my $mode = "0644"; # SV does not seem to store any file modes
        return [$mode, $sha, $path];
 }
@@ -318,6 +326,36 @@ ($$)
        return ($branch,$path);
 }
 
+sub copy_subdir($$$$$$) {
+       # Somebody copied a whole subdirectory.
+       # We need to find the index entries from the old version which the
+       # SVN log entry points to, and add them to the new place.
+
+       my($newrev,$newbranch,$path,$oldpath,$rev,$new) = @_;
+       my($branch,$srcpath) = split_path($rev,$oldpath);
+
+       my $gitrev = $branches{$branch}{$rev};
+       unless($gitrev) {
+               print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
+               return;
+       }
+       print "$newrev:$newbranch:$path: copying from $branch:$srcpath @ $rev\n" if $opt_v;
+       $srcpath =~ s#/*$#/#;
+       open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath;
+       local $/ = "\0";
+       while(<$f>) {
+               chomp;
+               my($m,$p) = split(/\t/,$_,2);
+               my($mode,$type,$sha1) = split(/ /,$m);
+               next if $type ne "blob";
+               $p = substr($p,length($srcpath)-1);
+               print "... found $path$p ...\n" if $opt_v;
+               push(@$new,[$mode,$sha1,$path.$p]);
+       }
+       close($f) or
+               print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
+}
+
 sub commit {
        my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
        my($author_name,$author_email,$dest);
@@ -412,10 +450,20 @@ sub commit {
        if($tag and not %$changed_paths) {
                $cid = $rev;
        } else {
-               while(my($path,$action) = each %$changed_paths) {
+               my @paths = sort keys %$changed_paths;
+               foreach my $path(@paths) {
+                       my $action = $changed_paths->{$path};
+
                        if ($action->[0] eq "A") {
                                my $f = get_file($revision,$branch,$path);
-                               push(@new,$f) if $f;
+                               if($f) {
+                                       push(@new,$f) if $f;
+                               } elsif($action->[1]) {
+                                       copy_subdir($revision,$branch,$path,$action->[1],$action->[2],\@new);
+                               } else {
+                                       my $opath = $action->[3];
+                                       print STDERR "$revision: $branch: could not fetch '$opath'\n";
+                               }
                        } elsif ($action->[0] eq "D") {
                                push(@old,$path);
                        } elsif ($action->[0] eq "M") {
@@ -426,16 +474,19 @@ sub commit {
                                push(@old,$path); # remove any old stuff
 
                                # ... and add any new stuff
-                               my($b,$p) = split_path($revision,$action->[1]);
-                               open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p;
-                               local $/ = '\0';
+                               my($b,$srcpath) = split_path($revision,$action->[1]);
+                               $srcpath =~ s#/*$#/#;
+                               open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $srcpath;
+                               local $/ = "\0";
                                while(<$F>) {
                                        chomp;
                                        my($m,$p) = split(/\t/,$_,2);
                                        my($mode,$type,$sha1) = split(/ /,$m);
                                        next if $type ne "blob";
-                                       push(@new,[$mode,$sha1,$p]);
+                                       $p = substr($p,length($srcpath)-1);
+                                       push(@new,[$mode,$sha1,$path.$p]);
                                }
+                               close($F);
                        } else {
                                die "$revision: unknown action '".$action->[0]."' for $path\n";
                        }
@@ -444,7 +495,7 @@ sub commit {
                if(@old) {
                        open my $F, "-|", "git-ls-files", "-z", @old or die $!;
                        @old = ();
-                       local $/ = '\0';
+                       local $/ = "\0";
                        while(<$F>) {
                                chomp;
                                push(@old,$_);
@@ -516,7 +567,7 @@ sub commit {
                                                push @par, '-p', $mparent;
                                                print OUT "Merge parent branch: $mparent\n" if $opt_v;
                                        }
-                               } 
+                               }
                        }
 
                        exec("env",
@@ -553,7 +604,7 @@ sub commit {
                print "... no known parent\n" if $opt_v;
        } elsif(not $tag) {
                print "Writing to refs/heads/$dest\n" if $opt_v;
-               open(C,">$git_dir/refs/heads/$dest") and 
+               open(C,">$git_dir/refs/heads/$dest") and
                print C ("$cid\n") and
                close(C)
                        or die "Cannot write branch $dest for update: $!\n";
@@ -563,7 +614,7 @@ sub commit {
                my($in, $out) = ('','');
                $last_rev = "-" if %$changed_paths;
                # the tag was 'complex', i.e. did not refer to a "real" revision
-               
+
                $dest =~ tr/_/\./ if $opt_u;
 
                my $pid = open2($in, $out, 'git-mktag');
@@ -601,7 +652,7 @@ sub _commit_all {
        ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
        my %p;
        while(my($path,$action) = each %$changed_paths) {
-               $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ];
+               $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ];
        }
        $changed_paths = \%p;
 }
@@ -622,11 +673,12 @@ sub commit_all {
        }
 }
 
-while(++$current_rev < $svn->{'maxrev'}) {
+while(++$current_rev <= $svn->{'maxrev'}) {
        $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
        commit_all();
        if($opt_l and not --$opt_l) {
-               print STDERR "Exiting due to a memory leak. Repeat, please.\n";
+               print STDERR "Stopping, because there is a memory leak (in the SVN library).\n";
+               print STDERR "Please repeat this command; it will continue safely\n";
                last;
        }
 }