1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime dup2 ENOENT);
28use IPC::Open2;
29
30$SIG{'PIPE'}="IGNORE";
31$ENV{'TZ'}="UTC";
32
33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34my (%conv_author_name, %conv_author_email);
35
36sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45END
46 exit(1);
47}
48
49sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
53
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
70 }
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
74 }
75 }
76 # NEEDSWORK: Maybe warn on unrecognized lines?
77 }
78 close ($f);
79}
80
81sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
85
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 }
89 close ($f);
90}
91
92# convert getopts specs for use by git config
93my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
99);
100
101sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
111
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
119 }
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
126 }
127 }
128 }
129}
130
131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132read_repo_config($opts);
133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
134
135# turn the Getopt::Std specification in a Getopt::Long one,
136# with support for multiple -M options
137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139usage if $opt_h;
140
141if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
144}
145@ARGV <= 1 or usage("You can't specify more than one CVS module");
146
147if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149} elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155} elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157} else {
158 usage("CVSROOT needs to be set");
159}
160$opt_s ||= "-";
161$opt_a ||= 0;
162
163my $git_tree = $opt_C;
164$git_tree ||= ".";
165
166my $remote;
167if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170} else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
173}
174
175my $cvs_tree;
176if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178} elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184} else {
185 usage("CVS module has to be specified");
186}
187
188our @mergerx = ();
189if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
191}
192if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
194}
195
196# Remember UTC of our starting time
197# we'll want to avoid importing commits
198# that are too recent
199our $starttime = time();
200
201select(STDERR); $|=1; select(STDOUT);
202
203
204package CVSconn;
205# Basic CVS dialog.
206# We're only interested in connecting and downloading, so ...
207
208use File::Spec;
209use File::Temp qw(tempfile);
210use POSIX qw(strftime dup2);
211
212sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
215
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
219
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
223
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
226
227 return $self;
228}
229
230sub conn {
231 my $self = shift;
232 my $repo = $self->{'fullrep'};
233 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
234 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
235
236 my ($proxyhost,$proxyport);
237 if ($param && ($param =~ m/proxy=([^;]+)/)) {
238 $proxyhost = $1;
239 # Default proxyport, if not specified, is 8080.
240 $proxyport = 8080;
241 if ($ENV{"CVS_PROXY_PORT"}) {
242 $proxyport = $ENV{"CVS_PROXY_PORT"};
243 }
244 if ($param =~ m/proxyport=([^;]+)/) {
245 $proxyport = $1;
246 }
247 }
248 $repo ||= '/';
249
250 # if username is not explicit in CVSROOT, then use current user, as cvs would
251 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
252 my $rr2 = "-";
253 unless ($port) {
254 $rr2 = ":pserver:$user\@$serv:$repo";
255 $port=2401;
256 }
257 my $rr = ":pserver:$user\@$serv:$port$repo";
258
259 if ($pass) {
260 $pass = $self->_scramble($pass);
261 } else {
262 open(H,$ENV{'HOME'}."/.cvspass") and do {
263 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
264 while (<H>) {
265 chomp;
266 s/^\/\d+\s+//;
267 my ($w,$p) = split(/\s/,$_,2);
268 if ($w eq $rr or $w eq $rr2) {
269 $pass = $p;
270 last;
271 }
272 }
273 };
274 $pass = "A" unless $pass;
275 }
276
277 my ($s, $rep);
278 if ($proxyhost) {
279
280 # Use a HTTP Proxy. Only works for HTTP proxies that
281 # don't require user authentication
282 #
283 # See: http://www.ietf.org/rfc/rfc2817.txt
284
285 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
286 die "Socket to $proxyhost: $!\n" unless defined $s;
287 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
288 or die "Write to $proxyhost: $!\n";
289 $s->flush();
290
291 $rep = <$s>;
292
293 # The answer should look like 'HTTP/1.x 2yy ....'
294 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
295 die "Proxy connect: $rep\n";
296 }
297 # Skip up to the empty line of the proxy server output
298 # including the response headers.
299 while ($rep = <$s>) {
300 last if (!defined $rep ||
301 $rep eq "\n" ||
302 $rep eq "\r\n");
303 }
304 } else {
305 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
306 die "Socket to $serv: $!\n" unless defined $s;
307 }
308
309 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
310 or die "Write to $serv: $!\n";
311 $s->flush();
312
313 $rep = <$s>;
314
315 if ($rep ne "I LOVE YOU\n") {
316 $rep="<unknown>" unless $rep;
317 die "AuthReply: $rep\n";
318 }
319 $self->{'socketo'} = $s;
320 $self->{'socketi'} = $s;
321 } else { # local or ext: Fork off our own cvs server.
322 my $pr = IO::Pipe->new();
323 my $pw = IO::Pipe->new();
324 my $pid = fork();
325 die "Fork: $!\n" unless defined $pid;
326 my $cvs = 'cvs';
327 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
328 my $rsh = 'rsh';
329 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
330
331 my @cvs = ($cvs, 'server');
332 my ($local, $user, $host);
333 $local = $repo =~ s/:local://;
334 if (!$local) {
335 $repo =~ s/:ext://;
336 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
337 ($user, $host) = ($1, $2);
338 }
339 if (!$local) {
340 if ($user) {
341 unshift @cvs, $rsh, '-l', $user, $host;
342 } else {
343 unshift @cvs, $rsh, $host;
344 }
345 }
346
347 unless ($pid) {
348 $pr->writer();
349 $pw->reader();
350 dup2($pw->fileno(),0);
351 dup2($pr->fileno(),1);
352 $pr->close();
353 $pw->close();
354 exec(@cvs);
355 }
356 $pw->writer();
357 $pr->reader();
358 $self->{'socketo'} = $pw;
359 $self->{'socketi'} = $pr;
360 }
361 $self->{'socketo'}->write("Root $repo\n");
362
363 # Trial and error says that this probably is the minimum set
364 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
365
366 $self->{'socketo'}->write("valid-requests\n");
367 $self->{'socketo'}->flush();
368
369 my $rep=$self->readline();
370 die "Failed to read from server" unless defined $rep;
371 chomp($rep);
372 if ($rep !~ s/^Valid-requests\s*//) {
373 $rep="<unknown>" unless $rep;
374 die "Expected Valid-requests from server, but got: $rep\n";
375 }
376 chomp(my $res=$self->readline());
377 die "validReply: $res\n" if $res ne "ok";
378
379 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
380 $self->{'repo'} = $repo;
381}
382
383sub readline {
384 my ($self) = @_;
385 return $self->{'socketi'}->getline();
386}
387
388sub _file {
389 # Request a file with a given revision.
390 # Trial and error says this is a good way to do it. :-/
391 my ($self,$fn,$rev) = @_;
392 $self->{'socketo'}->write("Argument -N\n") or return undef;
393 $self->{'socketo'}->write("Argument -P\n") or return undef;
394 # -kk: Linus' version doesn't use it - defaults to off
395 if ($opt_k) {
396 $self->{'socketo'}->write("Argument -kk\n") or return undef;
397 }
398 $self->{'socketo'}->write("Argument -r\n") or return undef;
399 $self->{'socketo'}->write("Argument $rev\n") or return undef;
400 $self->{'socketo'}->write("Argument --\n") or return undef;
401 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
402 $self->{'socketo'}->write("Directory .\n") or return undef;
403 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
404 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
405 $self->{'socketo'}->write("co\n") or return undef;
406 $self->{'socketo'}->flush() or return undef;
407 $self->{'lines'} = 0;
408 return 1;
409}
410sub _line {
411 # Read a line from the server.
412 # ... except that 'line' may be an entire file. ;-)
413 my ($self, $fh) = @_;
414 die "Not in lines" unless defined $self->{'lines'};
415
416 my $line;
417 my $res=0;
418 while (defined($line = $self->readline())) {
419 # M U gnupg-cvs-rep/AUTHORS
420 # Updated gnupg-cvs-rep/
421 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
422 # /AUTHORS/1.1///T1.1
423 # u=rw,g=rw,o=rw
424 # 0
425 # ok
426
427 if ($line =~ s/^(?:Created|Updated) //) {
428 $line = $self->readline(); # path
429 $line = $self->readline(); # Entries line
430 my $mode = $self->readline(); chomp $mode;
431 $self->{'mode'} = $mode;
432 defined (my $cnt = $self->readline())
433 or die "EOF from server after 'Changed'\n";
434 chomp $cnt;
435 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
436 $line="";
437 $res = $self->_fetchfile($fh, $cnt);
438 } elsif ($line =~ s/^ //) {
439 print $fh $line;
440 $res += length($line);
441 } elsif ($line =~ /^M\b/) {
442 # output, do nothing
443 } elsif ($line =~ /^Mbinary\b/) {
444 my $cnt;
445 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
446 chomp $cnt;
447 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
448 $line="";
449 $res += $self->_fetchfile($fh, $cnt);
450 } else {
451 chomp $line;
452 if ($line eq "ok") {
453 # print STDERR "S: ok (".length($res).")\n";
454 return $res;
455 } elsif ($line =~ s/^E //) {
456 # print STDERR "S: $line\n";
457 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
458 $line = $self->readline(); # filename
459 $line = $self->readline(); # OK
460 chomp $line;
461 die "Unknown: $line" if $line ne "ok";
462 return -1;
463 } else {
464 die "Unknown: $line\n";
465 }
466 }
467 }
468 return undef;
469}
470sub file {
471 my ($self,$fn,$rev) = @_;
472 my $res;
473
474 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
475 DIR => File::Spec->tmpdir(), UNLINK => 1);
476
477 $self->_file($fn,$rev) and $res = $self->_line($fh);
478
479 if (!defined $res) {
480 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
481 truncate $fh, 0;
482 $self->conn();
483 $self->_file($fn,$rev) or die "No file command send";
484 $res = $self->_line($fh);
485 die "Retry failed" unless defined $res;
486 }
487 close ($fh);
488
489 return ($name, $res);
490}
491sub _fetchfile {
492 my ($self, $fh, $cnt) = @_;
493 my $res = 0;
494 my $bufsize = 1024 * 1024;
495 while ($cnt) {
496 if ($bufsize > $cnt) {
497 $bufsize = $cnt;
498 }
499 my $buf;
500 my $num = $self->{'socketi'}->read($buf,$bufsize);
501 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
502 print $fh $buf;
503 $res += $num;
504 $cnt -= $num;
505 }
506 return $res;
507}
508
509sub _scramble {
510 my ($self, $pass) = @_;
511 my $scrambled = "A";
512
513 return $scrambled unless $pass;
514
515 my $pass_len = length($pass);
516 my @pass_arr = split("", $pass);
517 my $i;
518
519 # from cvs/src/scramble.c
520 my @shifts = (
521 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
522 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
523 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
524 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
525 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
526 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
527 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
528 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
529 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
530 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
531 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
532 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
533 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
534 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
535 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
536 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
537 );
538
539 for ($i = 0; $i < $pass_len; $i++) {
540 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
541 }
542
543 return $scrambled;
544}
545
546package main;
547
548my $cvs = CVSconn->new($opt_d, $cvs_tree);
549
550
551sub pdate($) {
552 my ($d) = @_;
553 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
554 or die "Unparseable date: $d\n";
555 my $y=$1; $y-=1900 if $y>1900;
556 return timegm($6||0,$5,$4,$3,$2-1,$y);
557}
558
559sub pmode($) {
560 my ($mode) = @_;
561 my $m = 0;
562 my $mm = 0;
563 my $um = 0;
564 for my $x(split(//,$mode)) {
565 if ($x eq ",") {
566 $m |= $mm&$um;
567 $mm = 0;
568 $um = 0;
569 } elsif ($x eq "u") { $um |= 0700;
570 } elsif ($x eq "g") { $um |= 0070;
571 } elsif ($x eq "o") { $um |= 0007;
572 } elsif ($x eq "r") { $mm |= 0444;
573 } elsif ($x eq "w") { $mm |= 0222;
574 } elsif ($x eq "x") { $mm |= 0111;
575 } elsif ($x eq "=") { # do nothing
576 } else { die "Unknown mode: $mode\n";
577 }
578 }
579 $m |= $mm&$um;
580 return $m;
581}
582
583sub getwd() {
584 my $pwd = `pwd`;
585 chomp $pwd;
586 return $pwd;
587}
588
589sub is_sha1 {
590 my $s = shift;
591 return $s =~ /^[a-f0-9]{40}$/;
592}
593
594sub get_headref ($) {
595 my $name = shift;
596 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
597 return undef unless $? == 0;
598 chomp $r;
599 return $r;
600}
601
602my $user_filename_prepend = '';
603sub munge_user_filename {
604 my $name = shift;
605 return File::Spec->file_name_is_absolute($name) ?
606 $name :
607 $user_filename_prepend . $name;
608}
609
610-d $git_tree
611 or mkdir($git_tree,0777)
612 or die "Could not create $git_tree: $!";
613if ($git_tree ne '.') {
614 $user_filename_prepend = getwd() . '/';
615 chdir($git_tree);
616}
617
618my $last_branch = "";
619my $orig_branch = "";
620my %branch_date;
621my $tip_at_start = undef;
622
623my $git_dir = $ENV{"GIT_DIR"} || ".git";
624$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
625$ENV{"GIT_DIR"} = $git_dir;
626my $orig_git_index;
627$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
628
629my %index; # holds filenames of one index per branch
630
631unless (-d $git_dir) {
632 system(qw(git init));
633 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
634 system(qw(git read-tree --empty));
635 die "Cannot init an empty tree: $?\n" if $?;
636
637 $last_branch = $opt_o;
638 $orig_branch = "";
639} else {
640 open(F, "-|", qw(git symbolic-ref HEAD)) or
641 die "Cannot run git symbolic-ref: $!\n";
642 chomp ($last_branch = <F>);
643 $last_branch = basename($last_branch);
644 close(F);
645 unless ($last_branch) {
646 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
647 $last_branch = "master";
648 }
649 $orig_branch = $last_branch;
650 $tip_at_start = `git rev-parse --verify HEAD`;
651
652 # Get the last import timestamps
653 my $fmt = '($ref, $author) = (%(refname), %(author));';
654 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
655 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
656 while (defined(my $entry = <H>)) {
657 my ($ref, $author);
658 eval($entry) || die "cannot eval refs list: $@";
659 my ($head) = ($ref =~ m|^$remote/(.*)|);
660 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
661 $branch_date{$head} = $1;
662 }
663 close(H);
664 if (!exists $branch_date{$opt_o}) {
665 die "Branch '$opt_o' does not exist.\n".
666 "Either use the correct '-o branch' option,\n".
667 "or import to a new repository.\n";
668 }
669}
670
671-d $git_dir
672 or die "Could not create git subdir ($git_dir).\n";
673
674# now we read (and possibly save) author-info as well
675-f "$git_dir/cvs-authors" and
676 read_author_info("$git_dir/cvs-authors");
677if ($opt_A) {
678 read_author_info(munge_user_filename($opt_A));
679 write_author_info("$git_dir/cvs-authors");
680}
681
682# open .git/cvs-revisions, if requested
683open my $revision_map, '>>', "$git_dir/cvs-revisions"
684 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
685 if defined $opt_R;
686
687
688#
689# run cvsps into a file unless we are getting
690# it passed as a file via $opt_P
691#
692my $cvspsfile;
693unless ($opt_P) {
694 print "Running cvsps...\n" if $opt_v;
695 my $pid = open(CVSPS,"-|");
696 my $cvspsfh;
697 die "Cannot fork: $!\n" unless defined $pid;
698 unless ($pid) {
699 my @opt;
700 @opt = split(/,/,$opt_p) if defined $opt_p;
701 unshift @opt, '-z', $opt_z if defined $opt_z;
702 unshift @opt, '-q' unless defined $opt_v;
703 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
704 push @opt, '--cvs-direct';
705 }
706 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
707 die "Could not start cvsps: $!\n";
708 }
709 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
710 DIR => File::Spec->tmpdir());
711 while (<CVSPS>) {
712 print $cvspsfh $_;
713 }
714 close CVSPS;
715 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
716 close $cvspsfh;
717} else {
718 $cvspsfile = munge_user_filename($opt_P);
719}
720
721open(CVS, "<$cvspsfile") or die $!;
722
723## cvsps output:
724#---------------------
725#PatchSet 314
726#Date: 1999/09/18 13:03:59
727#Author: wkoch
728#Branch: STABLE-BRANCH-1-0
729#Ancestor branch: HEAD
730#Tag: (none)
731#Log:
732# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
733#Members:
734# README:1.57->1.57.2.1
735# VERSION:1.96->1.96.2.1
736#
737#---------------------
738
739my $state = 0;
740
741sub update_index (\@\@) {
742 my $old = shift;
743 my $new = shift;
744 open(my $fh, '|-', qw(git update-index -z --index-info))
745 or die "unable to open git update-index: $!";
746 print $fh
747 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
748 @$old),
749 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
750 @$new)
751 or die "unable to write to git update-index: $!";
752 close $fh
753 or die "unable to write to git update-index: $!";
754 $? and die "git update-index reported error: $?";
755}
756
757sub write_tree () {
758 open(my $fh, '-|', qw(git write-tree))
759 or die "unable to open git write-tree: $!";
760 chomp(my $tree = <$fh>);
761 is_sha1($tree)
762 or die "Cannot get tree id ($tree): $!";
763 close($fh)
764 or die "Error running git write-tree: $?\n";
765 print "Tree ID $tree\n" if $opt_v;
766 return $tree;
767}
768
769my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
770my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
771
772# commits that cvsps cannot place anywhere...
773$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
774
775sub commit {
776 if ($branch eq $opt_o && !$index{branch} &&
777 !get_headref("$remote/$branch")) {
778 # looks like an initial commit
779 # use the index primed by git init
780 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
781 $index{$branch} = "$git_dir/index";
782 } else {
783 # use an index per branch to speed up
784 # imports of projects with many branches
785 unless ($index{$branch}) {
786 $index{$branch} = tmpnam();
787 $ENV{GIT_INDEX_FILE} = $index{$branch};
788 if ($ancestor) {
789 system("git", "read-tree", "$remote/$ancestor");
790 } else {
791 system("git", "read-tree", "$remote/$branch");
792 }
793 die "read-tree failed: $?\n" if $?;
794 }
795 }
796 $ENV{GIT_INDEX_FILE} = $index{$branch};
797
798 update_index(@old, @new);
799 @old = @new = ();
800 my $tree = write_tree();
801 my $parent = get_headref("$remote/$last_branch");
802 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
803
804 my @commit_args;
805 push @commit_args, ("-p", $parent) if $parent;
806
807 # loose detection of merges
808 # based on the commit msg
809 foreach my $rx (@mergerx) {
810 next unless $logmsg =~ $rx && $1;
811 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
812 if (my $sha1 = get_headref("$remote/$mparent")) {
813 push @commit_args, '-p', "$remote/$mparent";
814 print "Merge parent branch: $mparent\n" if $opt_v;
815 }
816 }
817
818 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
819 $ENV{GIT_AUTHOR_NAME} = $author_name;
820 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
821 $ENV{GIT_AUTHOR_DATE} = $commit_date;
822 $ENV{GIT_COMMITTER_NAME} = $author_name;
823 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
824 $ENV{GIT_COMMITTER_DATE} = $commit_date;
825 my $pid = open2(my $commit_read, my $commit_write,
826 'git', 'commit-tree', $tree, @commit_args);
827
828 # compatibility with git2cvs
829 substr($logmsg,32767) = "" if length($logmsg) > 32767;
830 $logmsg =~ s/[\s\n]+\z//;
831
832 if (@skipped) {
833 $logmsg .= "\n\n\nSKIPPED:\n\t";
834 $logmsg .= join("\n\t", @skipped) . "\n";
835 @skipped = ();
836 }
837
838 print($commit_write "$logmsg\n") && close($commit_write)
839 or die "Error writing to git commit-tree: $!\n";
840
841 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
842 chomp(my $cid = <$commit_read>);
843 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
844 print "Commit ID $cid\n" if $opt_v;
845 close($commit_read);
846
847 waitpid($pid,0);
848 die "Error running git commit-tree: $?\n" if $?;
849
850 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
851 or die "Cannot write branch $branch for update: $!\n";
852
853 if ($revision_map) {
854 print $revision_map "@$_ $cid\n" for @commit_revisions;
855 }
856 @commit_revisions = ();
857
858 if ($tag) {
859 my ($xtag) = $tag;
860 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
861 $xtag =~ tr/_/\./ if ( $opt_u );
862 $xtag =~ s/[\/]/$opt_s/g;
863 $xtag =~ s/\[//g;
864
865 system('git' , 'tag', '-f', $xtag, $cid) == 0
866 or die "Cannot create tag $xtag: $!\n";
867
868 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
869 }
870};
871
872my $commitcount = 1;
873while (<CVS>) {
874 chomp;
875 if ($state == 0 and /^-+$/) {
876 $state = 1;
877 } elsif ($state == 0) {
878 $state = 1;
879 redo;
880 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
881 $patchset = 0+$_;
882 $state=2;
883 } elsif ($state == 2 and s/^Date:\s+//) {
884 $date = pdate($_);
885 unless ($date) {
886 print STDERR "Could not parse date: $_\n";
887 $state=0;
888 next;
889 }
890 $state=3;
891 } elsif ($state == 3 and s/^Author:\s+//) {
892 s/\s+$//;
893 if (/^(.*?)\s+<(.*)>/) {
894 ($author_name, $author_email) = ($1, $2);
895 } elsif ($conv_author_name{$_}) {
896 $author_name = $conv_author_name{$_};
897 $author_email = $conv_author_email{$_};
898 } else {
899 $author_name = $author_email = $_;
900 }
901 $state = 4;
902 } elsif ($state == 4 and s/^Branch:\s+//) {
903 s/\s+$//;
904 tr/_/\./ if ( $opt_u );
905 s/[\/]/$opt_s/g;
906 $branch = $_;
907 $state = 5;
908 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
909 s/\s+$//;
910 $ancestor = $_;
911 $ancestor = $opt_o if $ancestor eq "HEAD";
912 $state = 6;
913 } elsif ($state == 5) {
914 $ancestor = undef;
915 $state = 6;
916 redo;
917 } elsif ($state == 6 and s/^Tag:\s+//) {
918 s/\s+$//;
919 if ($_ eq "(none)") {
920 $tag = undef;
921 } else {
922 $tag = $_;
923 }
924 $state = 7;
925 } elsif ($state == 7 and /^Log:/) {
926 $logmsg = "";
927 $state = 8;
928 } elsif ($state == 8 and /^Members:/) {
929 $branch = $opt_o if $branch eq "HEAD";
930 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
931 # skip
932 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
933 $state = 11;
934 next;
935 }
936 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
937 # skip if the commit is too recent
938 # given that the cvsps default fuzz is 300s, we give ourselves another
939 # 300s just in case -- this also prevents skipping commits
940 # due to server clock drift
941 print "skip patchset $patchset: $date too recent\n" if $opt_v;
942 $state = 11;
943 next;
944 }
945 if (exists $ignorebranch{$branch}) {
946 print STDERR "Skipping $branch\n";
947 $state = 11;
948 next;
949 }
950 if ($ancestor) {
951 if ($ancestor eq $branch) {
952 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
953 $ancestor = $opt_o;
954 }
955 if (defined get_headref("$remote/$branch")) {
956 print STDERR "Branch $branch already exists!\n";
957 $state=11;
958 next;
959 }
960 my $id = get_headref("$remote/$ancestor");
961 if (!$id) {
962 print STDERR "Branch $ancestor does not exist!\n";
963 $ignorebranch{$branch} = 1;
964 $state=11;
965 next;
966 }
967
968 system(qw(git update-ref -m cvsimport),
969 "$remote/$branch", $id);
970 if($? != 0) {
971 print STDERR "Could not create branch $branch\n";
972 $ignorebranch{$branch} = 1;
973 $state=11;
974 next;
975 }
976 }
977 $last_branch = $branch if $branch ne $last_branch;
978 $state = 9;
979 } elsif ($state == 8) {
980 $logmsg .= "$_\n";
981 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
982# VERSION:1.96->1.96.2.1
983 my $init = ($2 eq "INITIAL");
984 my $fn = $1;
985 my $rev = $3;
986 $fn =~ s#^/+##;
987 if ($opt_S && $fn =~ m/$opt_S/) {
988 print "SKIPPING $fn v $rev\n";
989 push(@skipped, $fn);
990 next;
991 }
992 push @commit_revisions, [$fn, $rev];
993 print "Fetching $fn v $rev\n" if $opt_v;
994 my ($tmpname, $size) = $cvs->file($fn,$rev);
995 if ($size == -1) {
996 push(@old,$fn);
997 print "Drop $fn\n" if $opt_v;
998 } else {
999 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1000 my $pid = open(my $F, '-|');
1001 die $! unless defined $pid;
1002 if (!$pid) {
1003 exec("git", "hash-object", "-w", $tmpname)
1004 or die "Cannot create object: $!\n";
1005 }
1006 my $sha = <$F>;
1007 chomp $sha;
1008 close $F;
1009 my $mode = pmode($cvs->{'mode'});
1010 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1011 }
1012 unlink($tmpname);
1013 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1014 my $fn = $1;
1015 my $rev = $2;
1016 $fn =~ s#^/+##;
1017 push @commit_revisions, [$fn, $rev];
1018 push(@old,$fn);
1019 print "Delete $fn\n" if $opt_v;
1020 } elsif ($state == 9 and /^\s*$/) {
1021 $state = 10;
1022 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1023 $commitcount++;
1024 if ($opt_L && $commitcount > $opt_L) {
1025 last;
1026 }
1027 commit();
1028 if (($commitcount & 1023) == 0) {
1029 system(qw(git repack -a -d));
1030 }
1031 $state = 1;
1032 } elsif ($state == 11 and /^-+$/) {
1033 $state = 1;
1034 } elsif (/^-+$/) { # end of unknown-line processing
1035 $state = 1;
1036 } elsif ($state != 11) { # ignore stuff when skipping
1037 print STDERR "* UNKNOWN LINE * $_\n";
1038 }
1039}
1040commit() if $branch and $state != 11;
1041
1042unless ($opt_P) {
1043 unlink($cvspsfile);
1044}
1045
1046# The heuristic of repacking every 1024 commits can leave a
1047# lot of unpacked data. If there is more than 1MB worth of
1048# not-packed objects, repack once more.
1049my $line = `git count-objects`;
1050if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1051 my ($n_objects, $kb) = ($1, $2);
1052 1024 < $kb
1053 and system(qw(git repack -a -d));
1054}
1055
1056foreach my $git_index (values %index) {
1057 if ($git_index ne "$git_dir/index") {
1058 unlink($git_index);
1059 }
1060}
1061
1062if (defined $orig_git_index) {
1063 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1064} else {
1065 delete $ENV{GIT_INDEX_FILE};
1066}
1067
1068# Now switch back to the branch we were in before all of this happened
1069if ($orig_branch) {
1070 print "DONE.\n" if $opt_v;
1071 if ($opt_i) {
1072 exit 0;
1073 }
1074 my $tip_at_end = `git rev-parse --verify HEAD`;
1075 if ($tip_at_start ne $tip_at_end) {
1076 for ($tip_at_start, $tip_at_end) { chomp; }
1077 print "Fetched into the current branch.\n" if $opt_v;
1078 system(qw(git read-tree -u -m),
1079 $tip_at_start, $tip_at_end);
1080 die "Fast-forward update failed: $?\n" if $?;
1081 }
1082 else {
1083 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1084 die "Could not merge $opt_o into the current branch.\n" if $?;
1085 }
1086} else {
1087 $orig_branch = "master";
1088 print "DONE; creating $orig_branch branch\n" if $opt_v;
1089 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1090 unless defined get_headref('refs/heads/master');
1091 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1092 if ($opt_r && $opt_o ne 'HEAD');
1093 system('git', 'update-ref', 'HEAD', "$orig_branch");
1094 unless ($opt_i) {
1095 system(qw(git checkout -f));
1096 die "checkout failed: $?\n" if $?;
1097 }
1098}