1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime dup2 ENOENT);
28use IPC::Open2;
29
30$SIG{'PIPE'}="IGNORE";
31$ENV{'TZ'}="UTC";
32
33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34my (%conv_author_name, %conv_author_email);
35
36sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45END
46 exit(1);
47}
48
49sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
53
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
70 }
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
74 }
75 }
76 # NEEDSWORK: Maybe warn on unrecognized lines?
77 }
78 close ($f);
79}
80
81sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
85
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 }
89 close ($f);
90}
91
92# convert getopts specs for use by git config
93my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
99);
100
101sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
111
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
119 }
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
126 }
127 }
128 }
129}
130
131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132read_repo_config($opts);
133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
134
135# turn the Getopt::Std specification in a Getopt::Long one,
136# with support for multiple -M options
137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139usage if $opt_h;
140
141if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
144}
145@ARGV <= 1 or usage("You can't specify more than one CVS module");
146
147if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149} elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155} elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157} else {
158 usage("CVSROOT needs to be set");
159}
160$opt_s ||= "-";
161$opt_a ||= 0;
162
163my $git_tree = $opt_C;
164$git_tree ||= ".";
165
166my $remote;
167if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170} else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
173}
174
175my $cvs_tree;
176if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178} elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184} else {
185 usage("CVS module has to be specified");
186}
187
188our @mergerx = ();
189if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
191}
192if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
194}
195
196# Remember UTC of our starting time
197# we'll want to avoid importing commits
198# that are too recent
199our $starttime = time();
200
201select(STDERR); $|=1; select(STDOUT);
202
203
204package CVSconn;
205# Basic CVS dialog.
206# We're only interested in connecting and downloading, so ...
207
208use File::Spec;
209use File::Temp qw(tempfile);
210use POSIX qw(strftime dup2);
211
212sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
215
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
219
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
223
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
226
227 return $self;
228}
229
230sub find_password_entry {
231 my ($cvspass, @cvsroot) = @_;
232 my ($file, $delim) = @$cvspass;
233 my $pass;
234 local ($_);
235
236 if (open(my $fh, $file)) {
237 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
238 CVSPASSFILE:
239 while (<$fh>) {
240 chomp;
241 s/^\/\d+\s+//;
242 my ($w, $p) = split($delim,$_,2);
243 for my $cvsroot (@cvsroot) {
244 if ($w eq $cvsroot) {
245 $pass = $p;
246 last CVSPASSFILE;
247 }
248 }
249 }
250 close($fh);
251 }
252 return $pass;
253}
254
255sub conn {
256 my $self = shift;
257 my $repo = $self->{'fullrep'};
258 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
259 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
260
261 my ($proxyhost,$proxyport);
262 if ($param && ($param =~ m/proxy=([^;]+)/)) {
263 $proxyhost = $1;
264 # Default proxyport, if not specified, is 8080.
265 $proxyport = 8080;
266 if ($ENV{"CVS_PROXY_PORT"}) {
267 $proxyport = $ENV{"CVS_PROXY_PORT"};
268 }
269 if ($param =~ m/proxyport=([^;]+)/) {
270 $proxyport = $1;
271 }
272 }
273 $repo ||= '/';
274
275 # if username is not explicit in CVSROOT, then use current user, as cvs would
276 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
277 my $rr2 = "-";
278 unless ($port) {
279 $rr2 = ":pserver:$user\@$serv:$repo";
280 $port=2401;
281 }
282 my $rr = ":pserver:$user\@$serv:$port$repo";
283
284 if ($pass) {
285 $pass = $self->_scramble($pass);
286 } else {
287 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
288 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
289 my @loc = ();
290 foreach my $cvspass (@cvspass) {
291 my $p = find_password_entry($cvspass, $rr, $rr2);
292 if ($p) {
293 push @loc, $cvspass->[0];
294 $pass = $p;
295 }
296 }
297
298 if (1 < @loc) {
299 die("Multiple cvs password files have ".
300 "entries for CVSROOT $opt_d: @loc");
301 } elsif (!$pass) {
302 $pass = "A";
303 }
304 }
305
306 my ($s, $rep);
307 if ($proxyhost) {
308
309 # Use a HTTP Proxy. Only works for HTTP proxies that
310 # don't require user authentication
311 #
312 # See: http://www.ietf.org/rfc/rfc2817.txt
313
314 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
315 die "Socket to $proxyhost: $!\n" unless defined $s;
316 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
317 or die "Write to $proxyhost: $!\n";
318 $s->flush();
319
320 $rep = <$s>;
321
322 # The answer should look like 'HTTP/1.x 2yy ....'
323 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
324 die "Proxy connect: $rep\n";
325 }
326 # Skip up to the empty line of the proxy server output
327 # including the response headers.
328 while ($rep = <$s>) {
329 last if (!defined $rep ||
330 $rep eq "\n" ||
331 $rep eq "\r\n");
332 }
333 } else {
334 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
335 die "Socket to $serv: $!\n" unless defined $s;
336 }
337
338 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
339 or die "Write to $serv: $!\n";
340 $s->flush();
341
342 $rep = <$s>;
343
344 if ($rep ne "I LOVE YOU\n") {
345 $rep="<unknown>" unless $rep;
346 die "AuthReply: $rep\n";
347 }
348 $self->{'socketo'} = $s;
349 $self->{'socketi'} = $s;
350 } else { # local or ext: Fork off our own cvs server.
351 my $pr = IO::Pipe->new();
352 my $pw = IO::Pipe->new();
353 my $pid = fork();
354 die "Fork: $!\n" unless defined $pid;
355 my $cvs = 'cvs';
356 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
357 my $rsh = 'rsh';
358 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
359
360 my @cvs = ($cvs, 'server');
361 my ($local, $user, $host);
362 $local = $repo =~ s/:local://;
363 if (!$local) {
364 $repo =~ s/:ext://;
365 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
366 ($user, $host) = ($1, $2);
367 }
368 if (!$local) {
369 if ($user) {
370 unshift @cvs, $rsh, '-l', $user, $host;
371 } else {
372 unshift @cvs, $rsh, $host;
373 }
374 }
375
376 unless ($pid) {
377 $pr->writer();
378 $pw->reader();
379 dup2($pw->fileno(),0);
380 dup2($pr->fileno(),1);
381 $pr->close();
382 $pw->close();
383 exec(@cvs);
384 }
385 $pw->writer();
386 $pr->reader();
387 $self->{'socketo'} = $pw;
388 $self->{'socketi'} = $pr;
389 }
390 $self->{'socketo'}->write("Root $repo\n");
391
392 # Trial and error says that this probably is the minimum set
393 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
394
395 $self->{'socketo'}->write("valid-requests\n");
396 $self->{'socketo'}->flush();
397
398 my $rep=$self->readline();
399 die "Failed to read from server" unless defined $rep;
400 chomp($rep);
401 if ($rep !~ s/^Valid-requests\s*//) {
402 $rep="<unknown>" unless $rep;
403 die "Expected Valid-requests from server, but got: $rep\n";
404 }
405 chomp(my $res=$self->readline());
406 die "validReply: $res\n" if $res ne "ok";
407
408 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
409 $self->{'repo'} = $repo;
410}
411
412sub readline {
413 my ($self) = @_;
414 return $self->{'socketi'}->getline();
415}
416
417sub _file {
418 # Request a file with a given revision.
419 # Trial and error says this is a good way to do it. :-/
420 my ($self,$fn,$rev) = @_;
421 $self->{'socketo'}->write("Argument -N\n") or return undef;
422 $self->{'socketo'}->write("Argument -P\n") or return undef;
423 # -kk: Linus' version doesn't use it - defaults to off
424 if ($opt_k) {
425 $self->{'socketo'}->write("Argument -kk\n") or return undef;
426 }
427 $self->{'socketo'}->write("Argument -r\n") or return undef;
428 $self->{'socketo'}->write("Argument $rev\n") or return undef;
429 $self->{'socketo'}->write("Argument --\n") or return undef;
430 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
431 $self->{'socketo'}->write("Directory .\n") or return undef;
432 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
433 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
434 $self->{'socketo'}->write("co\n") or return undef;
435 $self->{'socketo'}->flush() or return undef;
436 $self->{'lines'} = 0;
437 return 1;
438}
439sub _line {
440 # Read a line from the server.
441 # ... except that 'line' may be an entire file. ;-)
442 my ($self, $fh) = @_;
443 die "Not in lines" unless defined $self->{'lines'};
444
445 my $line;
446 my $res=0;
447 while (defined($line = $self->readline())) {
448 # M U gnupg-cvs-rep/AUTHORS
449 # Updated gnupg-cvs-rep/
450 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
451 # /AUTHORS/1.1///T1.1
452 # u=rw,g=rw,o=rw
453 # 0
454 # ok
455
456 if ($line =~ s/^(?:Created|Updated) //) {
457 $line = $self->readline(); # path
458 $line = $self->readline(); # Entries line
459 my $mode = $self->readline(); chomp $mode;
460 $self->{'mode'} = $mode;
461 defined (my $cnt = $self->readline())
462 or die "EOF from server after 'Changed'\n";
463 chomp $cnt;
464 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
465 $line="";
466 $res = $self->_fetchfile($fh, $cnt);
467 } elsif ($line =~ s/^ //) {
468 print $fh $line;
469 $res += length($line);
470 } elsif ($line =~ /^M\b/) {
471 # output, do nothing
472 } elsif ($line =~ /^Mbinary\b/) {
473 my $cnt;
474 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
475 chomp $cnt;
476 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
477 $line="";
478 $res += $self->_fetchfile($fh, $cnt);
479 } else {
480 chomp $line;
481 if ($line eq "ok") {
482 # print STDERR "S: ok (".length($res).")\n";
483 return $res;
484 } elsif ($line =~ s/^E //) {
485 # print STDERR "S: $line\n";
486 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
487 $line = $self->readline(); # filename
488 $line = $self->readline(); # OK
489 chomp $line;
490 die "Unknown: $line" if $line ne "ok";
491 return -1;
492 } else {
493 die "Unknown: $line\n";
494 }
495 }
496 }
497 return undef;
498}
499sub file {
500 my ($self,$fn,$rev) = @_;
501 my $res;
502
503 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
504 DIR => File::Spec->tmpdir(), UNLINK => 1);
505
506 $self->_file($fn,$rev) and $res = $self->_line($fh);
507
508 if (!defined $res) {
509 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
510 truncate $fh, 0;
511 $self->conn();
512 $self->_file($fn,$rev) or die "No file command send";
513 $res = $self->_line($fh);
514 die "Retry failed" unless defined $res;
515 }
516 close ($fh);
517
518 return ($name, $res);
519}
520sub _fetchfile {
521 my ($self, $fh, $cnt) = @_;
522 my $res = 0;
523 my $bufsize = 1024 * 1024;
524 while ($cnt) {
525 if ($bufsize > $cnt) {
526 $bufsize = $cnt;
527 }
528 my $buf;
529 my $num = $self->{'socketi'}->read($buf,$bufsize);
530 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
531 print $fh $buf;
532 $res += $num;
533 $cnt -= $num;
534 }
535 return $res;
536}
537
538sub _scramble {
539 my ($self, $pass) = @_;
540 my $scrambled = "A";
541
542 return $scrambled unless $pass;
543
544 my $pass_len = length($pass);
545 my @pass_arr = split("", $pass);
546 my $i;
547
548 # from cvs/src/scramble.c
549 my @shifts = (
550 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
551 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
552 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
553 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
554 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
555 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
556 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
557 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
558 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
559 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
560 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
561 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
562 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
563 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
564 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
565 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
566 );
567
568 for ($i = 0; $i < $pass_len; $i++) {
569 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
570 }
571
572 return $scrambled;
573}
574
575package main;
576
577my $cvs = CVSconn->new($opt_d, $cvs_tree);
578
579
580sub pdate($) {
581 my ($d) = @_;
582 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
583 or die "Unparseable date: $d\n";
584 my $y=$1; $y-=1900 if $y>1900;
585 return timegm($6||0,$5,$4,$3,$2-1,$y);
586}
587
588sub pmode($) {
589 my ($mode) = @_;
590 my $m = 0;
591 my $mm = 0;
592 my $um = 0;
593 for my $x(split(//,$mode)) {
594 if ($x eq ",") {
595 $m |= $mm&$um;
596 $mm = 0;
597 $um = 0;
598 } elsif ($x eq "u") { $um |= 0700;
599 } elsif ($x eq "g") { $um |= 0070;
600 } elsif ($x eq "o") { $um |= 0007;
601 } elsif ($x eq "r") { $mm |= 0444;
602 } elsif ($x eq "w") { $mm |= 0222;
603 } elsif ($x eq "x") { $mm |= 0111;
604 } elsif ($x eq "=") { # do nothing
605 } else { die "Unknown mode: $mode\n";
606 }
607 }
608 $m |= $mm&$um;
609 return $m;
610}
611
612sub getwd() {
613 my $pwd = `pwd`;
614 chomp $pwd;
615 return $pwd;
616}
617
618sub is_sha1 {
619 my $s = shift;
620 return $s =~ /^[a-f0-9]{40}$/;
621}
622
623sub get_headref ($) {
624 my $name = shift;
625 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
626 return undef unless $? == 0;
627 chomp $r;
628 return $r;
629}
630
631my $user_filename_prepend = '';
632sub munge_user_filename {
633 my $name = shift;
634 return File::Spec->file_name_is_absolute($name) ?
635 $name :
636 $user_filename_prepend . $name;
637}
638
639-d $git_tree
640 or mkdir($git_tree,0777)
641 or die "Could not create $git_tree: $!";
642if ($git_tree ne '.') {
643 $user_filename_prepend = getwd() . '/';
644 chdir($git_tree);
645}
646
647my $last_branch = "";
648my $orig_branch = "";
649my %branch_date;
650my $tip_at_start = undef;
651
652my $git_dir = $ENV{"GIT_DIR"} || ".git";
653$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
654$ENV{"GIT_DIR"} = $git_dir;
655my $orig_git_index;
656$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
657
658my %index; # holds filenames of one index per branch
659
660unless (-d $git_dir) {
661 system(qw(git init));
662 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
663 system(qw(git read-tree --empty));
664 die "Cannot init an empty tree: $?\n" if $?;
665
666 $last_branch = $opt_o;
667 $orig_branch = "";
668} else {
669 open(F, "-|", qw(git symbolic-ref HEAD)) or
670 die "Cannot run git symbolic-ref: $!\n";
671 chomp ($last_branch = <F>);
672 $last_branch = basename($last_branch);
673 close(F);
674 unless ($last_branch) {
675 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
676 $last_branch = "master";
677 }
678 $orig_branch = $last_branch;
679 $tip_at_start = `git rev-parse --verify HEAD`;
680
681 # Get the last import timestamps
682 my $fmt = '($ref, $author) = (%(refname), %(author));';
683 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
684 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
685 while (defined(my $entry = <H>)) {
686 my ($ref, $author);
687 eval($entry) || die "cannot eval refs list: $@";
688 my ($head) = ($ref =~ m|^$remote/(.*)|);
689 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
690 $branch_date{$head} = $1;
691 }
692 close(H);
693 if (!exists $branch_date{$opt_o}) {
694 die "Branch '$opt_o' does not exist.\n".
695 "Either use the correct '-o branch' option,\n".
696 "or import to a new repository.\n";
697 }
698}
699
700-d $git_dir
701 or die "Could not create git subdir ($git_dir).\n";
702
703# now we read (and possibly save) author-info as well
704-f "$git_dir/cvs-authors" and
705 read_author_info("$git_dir/cvs-authors");
706if ($opt_A) {
707 read_author_info(munge_user_filename($opt_A));
708 write_author_info("$git_dir/cvs-authors");
709}
710
711# open .git/cvs-revisions, if requested
712open my $revision_map, '>>', "$git_dir/cvs-revisions"
713 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
714 if defined $opt_R;
715
716
717#
718# run cvsps into a file unless we are getting
719# it passed as a file via $opt_P
720#
721my $cvspsfile;
722unless ($opt_P) {
723 print "Running cvsps...\n" if $opt_v;
724 my $pid = open(CVSPS,"-|");
725 my $cvspsfh;
726 die "Cannot fork: $!\n" unless defined $pid;
727 unless ($pid) {
728 my @opt;
729 @opt = split(/,/,$opt_p) if defined $opt_p;
730 unshift @opt, '-z', $opt_z if defined $opt_z;
731 unshift @opt, '-q' unless defined $opt_v;
732 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
733 push @opt, '--cvs-direct';
734 }
735 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
736 die "Could not start cvsps: $!\n";
737 }
738 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
739 DIR => File::Spec->tmpdir());
740 while (<CVSPS>) {
741 print $cvspsfh $_;
742 }
743 close CVSPS;
744 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
745 close $cvspsfh;
746} else {
747 $cvspsfile = munge_user_filename($opt_P);
748}
749
750open(CVS, "<$cvspsfile") or die $!;
751
752## cvsps output:
753#---------------------
754#PatchSet 314
755#Date: 1999/09/18 13:03:59
756#Author: wkoch
757#Branch: STABLE-BRANCH-1-0
758#Ancestor branch: HEAD
759#Tag: (none)
760#Log:
761# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
762#Members:
763# README:1.57->1.57.2.1
764# VERSION:1.96->1.96.2.1
765#
766#---------------------
767
768my $state = 0;
769
770sub update_index (\@\@) {
771 my $old = shift;
772 my $new = shift;
773 open(my $fh, '|-', qw(git update-index -z --index-info))
774 or die "unable to open git update-index: $!";
775 print $fh
776 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
777 @$old),
778 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
779 @$new)
780 or die "unable to write to git update-index: $!";
781 close $fh
782 or die "unable to write to git update-index: $!";
783 $? and die "git update-index reported error: $?";
784}
785
786sub write_tree () {
787 open(my $fh, '-|', qw(git write-tree))
788 or die "unable to open git write-tree: $!";
789 chomp(my $tree = <$fh>);
790 is_sha1($tree)
791 or die "Cannot get tree id ($tree): $!";
792 close($fh)
793 or die "Error running git write-tree: $?\n";
794 print "Tree ID $tree\n" if $opt_v;
795 return $tree;
796}
797
798my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
799my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
800
801# commits that cvsps cannot place anywhere...
802$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
803
804sub commit {
805 if ($branch eq $opt_o && !$index{branch} &&
806 !get_headref("$remote/$branch")) {
807 # looks like an initial commit
808 # use the index primed by git init
809 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
810 $index{$branch} = "$git_dir/index";
811 } else {
812 # use an index per branch to speed up
813 # imports of projects with many branches
814 unless ($index{$branch}) {
815 $index{$branch} = tmpnam();
816 $ENV{GIT_INDEX_FILE} = $index{$branch};
817 if ($ancestor) {
818 system("git", "read-tree", "$remote/$ancestor");
819 } else {
820 system("git", "read-tree", "$remote/$branch");
821 }
822 die "read-tree failed: $?\n" if $?;
823 }
824 }
825 $ENV{GIT_INDEX_FILE} = $index{$branch};
826
827 update_index(@old, @new);
828 @old = @new = ();
829 my $tree = write_tree();
830 my $parent = get_headref("$remote/$last_branch");
831 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
832
833 my @commit_args;
834 push @commit_args, ("-p", $parent) if $parent;
835
836 # loose detection of merges
837 # based on the commit msg
838 foreach my $rx (@mergerx) {
839 next unless $logmsg =~ $rx && $1;
840 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
841 if (my $sha1 = get_headref("$remote/$mparent")) {
842 push @commit_args, '-p', "$remote/$mparent";
843 print "Merge parent branch: $mparent\n" if $opt_v;
844 }
845 }
846
847 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
848 $ENV{GIT_AUTHOR_NAME} = $author_name;
849 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
850 $ENV{GIT_AUTHOR_DATE} = $commit_date;
851 $ENV{GIT_COMMITTER_NAME} = $author_name;
852 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
853 $ENV{GIT_COMMITTER_DATE} = $commit_date;
854 my $pid = open2(my $commit_read, my $commit_write,
855 'git', 'commit-tree', $tree, @commit_args);
856
857 # compatibility with git2cvs
858 substr($logmsg,32767) = "" if length($logmsg) > 32767;
859 $logmsg =~ s/[\s\n]+\z//;
860
861 if (@skipped) {
862 $logmsg .= "\n\n\nSKIPPED:\n\t";
863 $logmsg .= join("\n\t", @skipped) . "\n";
864 @skipped = ();
865 }
866
867 print($commit_write "$logmsg\n") && close($commit_write)
868 or die "Error writing to git commit-tree: $!\n";
869
870 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
871 chomp(my $cid = <$commit_read>);
872 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
873 print "Commit ID $cid\n" if $opt_v;
874 close($commit_read);
875
876 waitpid($pid,0);
877 die "Error running git commit-tree: $?\n" if $?;
878
879 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
880 or die "Cannot write branch $branch for update: $!\n";
881
882 if ($revision_map) {
883 print $revision_map "@$_ $cid\n" for @commit_revisions;
884 }
885 @commit_revisions = ();
886
887 if ($tag) {
888 my ($xtag) = $tag;
889 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
890 $xtag =~ tr/_/\./ if ( $opt_u );
891 $xtag =~ s/[\/]/$opt_s/g;
892 $xtag =~ s/\[//g;
893
894 system('git' , 'tag', '-f', $xtag, $cid) == 0
895 or die "Cannot create tag $xtag: $!\n";
896
897 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
898 }
899};
900
901my $commitcount = 1;
902while (<CVS>) {
903 chomp;
904 if ($state == 0 and /^-+$/) {
905 $state = 1;
906 } elsif ($state == 0) {
907 $state = 1;
908 redo;
909 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
910 $patchset = 0+$_;
911 $state=2;
912 } elsif ($state == 2 and s/^Date:\s+//) {
913 $date = pdate($_);
914 unless ($date) {
915 print STDERR "Could not parse date: $_\n";
916 $state=0;
917 next;
918 }
919 $state=3;
920 } elsif ($state == 3 and s/^Author:\s+//) {
921 s/\s+$//;
922 if (/^(.*?)\s+<(.*)>/) {
923 ($author_name, $author_email) = ($1, $2);
924 } elsif ($conv_author_name{$_}) {
925 $author_name = $conv_author_name{$_};
926 $author_email = $conv_author_email{$_};
927 } else {
928 $author_name = $author_email = $_;
929 }
930 $state = 4;
931 } elsif ($state == 4 and s/^Branch:\s+//) {
932 s/\s+$//;
933 tr/_/\./ if ( $opt_u );
934 s/[\/]/$opt_s/g;
935 $branch = $_;
936 $state = 5;
937 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
938 s/\s+$//;
939 $ancestor = $_;
940 $ancestor = $opt_o if $ancestor eq "HEAD";
941 $state = 6;
942 } elsif ($state == 5) {
943 $ancestor = undef;
944 $state = 6;
945 redo;
946 } elsif ($state == 6 and s/^Tag:\s+//) {
947 s/\s+$//;
948 if ($_ eq "(none)") {
949 $tag = undef;
950 } else {
951 $tag = $_;
952 }
953 $state = 7;
954 } elsif ($state == 7 and /^Log:/) {
955 $logmsg = "";
956 $state = 8;
957 } elsif ($state == 8 and /^Members:/) {
958 $branch = $opt_o if $branch eq "HEAD";
959 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
960 # skip
961 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
962 $state = 11;
963 next;
964 }
965 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
966 # skip if the commit is too recent
967 # given that the cvsps default fuzz is 300s, we give ourselves another
968 # 300s just in case -- this also prevents skipping commits
969 # due to server clock drift
970 print "skip patchset $patchset: $date too recent\n" if $opt_v;
971 $state = 11;
972 next;
973 }
974 if (exists $ignorebranch{$branch}) {
975 print STDERR "Skipping $branch\n";
976 $state = 11;
977 next;
978 }
979 if ($ancestor) {
980 if ($ancestor eq $branch) {
981 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
982 $ancestor = $opt_o;
983 }
984 if (defined get_headref("$remote/$branch")) {
985 print STDERR "Branch $branch already exists!\n";
986 $state=11;
987 next;
988 }
989 my $id = get_headref("$remote/$ancestor");
990 if (!$id) {
991 print STDERR "Branch $ancestor does not exist!\n";
992 $ignorebranch{$branch} = 1;
993 $state=11;
994 next;
995 }
996
997 system(qw(git update-ref -m cvsimport),
998 "$remote/$branch", $id);
999 if($? != 0) {
1000 print STDERR "Could not create branch $branch\n";
1001 $ignorebranch{$branch} = 1;
1002 $state=11;
1003 next;
1004 }
1005 }
1006 $last_branch = $branch if $branch ne $last_branch;
1007 $state = 9;
1008 } elsif ($state == 8) {
1009 $logmsg .= "$_\n";
1010 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1011# VERSION:1.96->1.96.2.1
1012 my $init = ($2 eq "INITIAL");
1013 my $fn = $1;
1014 my $rev = $3;
1015 $fn =~ s#^/+##;
1016 if ($opt_S && $fn =~ m/$opt_S/) {
1017 print "SKIPPING $fn v $rev\n";
1018 push(@skipped, $fn);
1019 next;
1020 }
1021 push @commit_revisions, [$fn, $rev];
1022 print "Fetching $fn v $rev\n" if $opt_v;
1023 my ($tmpname, $size) = $cvs->file($fn,$rev);
1024 if ($size == -1) {
1025 push(@old,$fn);
1026 print "Drop $fn\n" if $opt_v;
1027 } else {
1028 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1029 my $pid = open(my $F, '-|');
1030 die $! unless defined $pid;
1031 if (!$pid) {
1032 exec("git", "hash-object", "-w", $tmpname)
1033 or die "Cannot create object: $!\n";
1034 }
1035 my $sha = <$F>;
1036 chomp $sha;
1037 close $F;
1038 my $mode = pmode($cvs->{'mode'});
1039 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1040 }
1041 unlink($tmpname);
1042 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1043 my $fn = $1;
1044 my $rev = $2;
1045 $fn =~ s#^/+##;
1046 push @commit_revisions, [$fn, $rev];
1047 push(@old,$fn);
1048 print "Delete $fn\n" if $opt_v;
1049 } elsif ($state == 9 and /^\s*$/) {
1050 $state = 10;
1051 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1052 $commitcount++;
1053 if ($opt_L && $commitcount > $opt_L) {
1054 last;
1055 }
1056 commit();
1057 if (($commitcount & 1023) == 0) {
1058 system(qw(git repack -a -d));
1059 }
1060 $state = 1;
1061 } elsif ($state == 11 and /^-+$/) {
1062 $state = 1;
1063 } elsif (/^-+$/) { # end of unknown-line processing
1064 $state = 1;
1065 } elsif ($state != 11) { # ignore stuff when skipping
1066 print STDERR "* UNKNOWN LINE * $_\n";
1067 }
1068}
1069commit() if $branch and $state != 11;
1070
1071unless ($opt_P) {
1072 unlink($cvspsfile);
1073}
1074
1075# The heuristic of repacking every 1024 commits can leave a
1076# lot of unpacked data. If there is more than 1MB worth of
1077# not-packed objects, repack once more.
1078my $line = `git count-objects`;
1079if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1080 my ($n_objects, $kb) = ($1, $2);
1081 1024 < $kb
1082 and system(qw(git repack -a -d));
1083}
1084
1085foreach my $git_index (values %index) {
1086 if ($git_index ne "$git_dir/index") {
1087 unlink($git_index);
1088 }
1089}
1090
1091if (defined $orig_git_index) {
1092 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1093} else {
1094 delete $ENV{GIT_INDEX_FILE};
1095}
1096
1097# Now switch back to the branch we were in before all of this happened
1098if ($orig_branch) {
1099 print "DONE.\n" if $opt_v;
1100 if ($opt_i) {
1101 exit 0;
1102 }
1103 my $tip_at_end = `git rev-parse --verify HEAD`;
1104 if ($tip_at_start ne $tip_at_end) {
1105 for ($tip_at_start, $tip_at_end) { chomp; }
1106 print "Fetched into the current branch.\n" if $opt_v;
1107 system(qw(git read-tree -u -m),
1108 $tip_at_start, $tip_at_end);
1109 die "Fast-forward update failed: $?\n" if $?;
1110 }
1111 else {
1112 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1113 die "Could not merge $opt_o into the current branch.\n" if $?;
1114 }
1115} else {
1116 $orig_branch = "master";
1117 print "DONE; creating $orig_branch branch\n" if $opt_v;
1118 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1119 unless defined get_headref('refs/heads/master');
1120 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1121 if ($opt_r && $opt_o ne 'HEAD');
1122 system('git', 'update-ref', 'HEAD', "$orig_branch");
1123 unless ($opt_i) {
1124 system(qw(git checkout -f));
1125 die "checkout failed: $?\n" if $?;
1126 }
1127}