1#!/usr/bin/perl -w
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use strict;
17use warnings;
18use Getopt::Long;
19use File::Spec;
20use File::Temp qw(tempfile tmpnam);
21use File::Path qw(mkpath);
22use File::Basename qw(basename dirname);
23use Time::Local;
24use IO::Socket;
25use IO::Pipe;
26use POSIX qw(strftime dup2 ENOENT);
27use IPC::Open2;
28
29$SIG{'PIPE'}="IGNORE";
30$ENV{'TZ'}="UTC";
31
32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
33my (%conv_author_name, %conv_author_email);
34
35sub usage(;$) {
36 my $msg = shift;
37 print(STDERR "Error: $msg\n") if $msg;
38 print STDERR <<END;
39Usage: git cvsimport # fetch/update GIT from CVS
40 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
41 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
42 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
43 [-r remote] [CVS_module]
44END
45 exit(1);
46}
47
48sub read_author_info($) {
49 my ($file) = @_;
50 my $user;
51 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
52
53 while (<$f>) {
54 # Expected format is this:
55 # exon=Andreas Ericsson <ae@op5.se>
56 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
57 $user = $1;
58 $conv_author_name{$user} = $2;
59 $conv_author_email{$user} = $3;
60 }
61 # However, we also read from CVSROOT/users format
62 # to ease migration.
63 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
64 my $mapped;
65 ($user, $mapped) = ($1, $3);
66 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
67 $conv_author_name{$user} = $1;
68 $conv_author_email{$user} = $2;
69 }
70 elsif ($mapped =~ /^<?(.*)>?$/) {
71 $conv_author_name{$user} = $user;
72 $conv_author_email{$user} = $1;
73 }
74 }
75 # NEEDSWORK: Maybe warn on unrecognized lines?
76 }
77 close ($f);
78}
79
80sub write_author_info($) {
81 my ($file) = @_;
82 open my $f, '>', $file or
83 die("Failed to open $file for writing: $!");
84
85 foreach (keys %conv_author_name) {
86 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
87 }
88 close ($f);
89}
90
91# convert getopts specs for use by git config
92sub read_repo_config {
93 # Split the string between characters, unless there is a ':'
94 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
95 my @opts = split(/ *(?!:)/, shift);
96 foreach my $o (@opts) {
97 my $key = $o;
98 $key =~ s/://g;
99 my $arg = 'git config';
100 $arg .= ' --bool' if ($o !~ /:$/);
101
102 chomp(my $tmp = `$arg --get cvsimport.$key`);
103 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
104 no strict 'refs';
105 my $opt_name = "opt_" . $key;
106 if (!$$opt_name) {
107 $$opt_name = $tmp;
108 }
109 }
110 }
111}
112
113my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
114read_repo_config($opts);
115Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
116
117# turn the Getopt::Std specification in a Getopt::Long one,
118# with support for multiple -M options
119GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
120 or usage();
121usage if $opt_h;
122
123if (@ARGV == 0) {
124 chomp(my $module = `git config --get cvsimport.module`);
125 push(@ARGV, $module) if $? == 0;
126}
127@ARGV <= 1 or usage("You can't specify more than one CVS module");
128
129if ($opt_d) {
130 $ENV{"CVSROOT"} = $opt_d;
131} elsif (-f 'CVS/Root') {
132 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
133 $opt_d = <$f>;
134 chomp $opt_d;
135 close $f;
136 $ENV{"CVSROOT"} = $opt_d;
137} elsif ($ENV{"CVSROOT"}) {
138 $opt_d = $ENV{"CVSROOT"};
139} else {
140 usage("CVSROOT needs to be set");
141}
142$opt_s ||= "-";
143$opt_a ||= 0;
144
145my $git_tree = $opt_C;
146$git_tree ||= ".";
147
148my $remote;
149if (defined $opt_r) {
150 $remote = 'refs/remotes/' . $opt_r;
151 $opt_o ||= "master";
152} else {
153 $opt_o ||= "origin";
154 $remote = 'refs/heads';
155}
156
157my $cvs_tree;
158if ($#ARGV == 0) {
159 $cvs_tree = $ARGV[0];
160} elsif (-f 'CVS/Repository') {
161 open my $f, '<', 'CVS/Repository' or
162 die 'Failed to open CVS/Repository';
163 $cvs_tree = <$f>;
164 chomp $cvs_tree;
165 close $f;
166} else {
167 usage("CVS module has to be specified");
168}
169
170our @mergerx = ();
171if ($opt_m) {
172 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
173}
174if (@opt_M) {
175 push (@mergerx, map { qr/$_/ } @opt_M);
176}
177
178# Remember UTC of our starting time
179# we'll want to avoid importing commits
180# that are too recent
181our $starttime = time();
182
183select(STDERR); $|=1; select(STDOUT);
184
185
186package CVSconn;
187# Basic CVS dialog.
188# We're only interested in connecting and downloading, so ...
189
190use File::Spec;
191use File::Temp qw(tempfile);
192use POSIX qw(strftime dup2);
193
194sub new {
195 my ($what,$repo,$subdir) = @_;
196 $what=ref($what) if ref($what);
197
198 my $self = {};
199 $self->{'buffer'} = "";
200 bless($self,$what);
201
202 $repo =~ s#/+$##;
203 $self->{'fullrep'} = $repo;
204 $self->conn();
205
206 $self->{'subdir'} = $subdir;
207 $self->{'lines'} = undef;
208
209 return $self;
210}
211
212sub conn {
213 my $self = shift;
214 my $repo = $self->{'fullrep'};
215 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
216 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
217
218 my ($proxyhost,$proxyport);
219 if ($param && ($param =~ m/proxy=([^;]+)/)) {
220 $proxyhost = $1;
221 # Default proxyport, if not specified, is 8080.
222 $proxyport = 8080;
223 if ($ENV{"CVS_PROXY_PORT"}) {
224 $proxyport = $ENV{"CVS_PROXY_PORT"};
225 }
226 if ($param =~ m/proxyport=([^;]+)/) {
227 $proxyport = $1;
228 }
229 }
230 $repo ||= '/';
231
232 # if username is not explicit in CVSROOT, then use current user, as cvs would
233 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
234 my $rr2 = "-";
235 unless ($port) {
236 $rr2 = ":pserver:$user\@$serv:$repo";
237 $port=2401;
238 }
239 my $rr = ":pserver:$user\@$serv:$port$repo";
240
241 if ($pass) {
242 $pass = $self->_scramble($pass);
243 } else {
244 open(H,$ENV{'HOME'}."/.cvspass") and do {
245 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
246 while (<H>) {
247 chomp;
248 s/^\/\d+\s+//;
249 my ($w,$p) = split(/\s/,$_,2);
250 if ($w eq $rr or $w eq $rr2) {
251 $pass = $p;
252 last;
253 }
254 }
255 };
256 }
257
258 my ($s, $rep);
259 if ($proxyhost) {
260
261 # Use a HTTP Proxy. Only works for HTTP proxies that
262 # don't require user authentication
263 #
264 # See: http://www.ietf.org/rfc/rfc2817.txt
265
266 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
267 die "Socket to $proxyhost: $!\n" unless defined $s;
268 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
269 or die "Write to $proxyhost: $!\n";
270 $s->flush();
271
272 $rep = <$s>;
273
274 # The answer should look like 'HTTP/1.x 2yy ....'
275 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
276 die "Proxy connect: $rep\n";
277 }
278 # Skip up to the empty line of the proxy server output
279 # including the response headers.
280 while ($rep = <$s>) {
281 last if (!defined $rep ||
282 $rep eq "\n" ||
283 $rep eq "\r\n");
284 }
285 } else {
286 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
287 die "Socket to $serv: $!\n" unless defined $s;
288 }
289
290 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
291 or die "Write to $serv: $!\n";
292 $s->flush();
293
294 $rep = <$s>;
295
296 if ($rep ne "I LOVE YOU\n") {
297 $rep="<unknown>" unless $rep;
298 die "AuthReply: $rep\n";
299 }
300 $self->{'socketo'} = $s;
301 $self->{'socketi'} = $s;
302 } else { # local or ext: Fork off our own cvs server.
303 my $pr = IO::Pipe->new();
304 my $pw = IO::Pipe->new();
305 my $pid = fork();
306 die "Fork: $!\n" unless defined $pid;
307 my $cvs = 'cvs';
308 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
309 my $rsh = 'rsh';
310 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
311
312 my @cvs = ($cvs, 'server');
313 my ($local, $user, $host);
314 $local = $repo =~ s/:local://;
315 if (!$local) {
316 $repo =~ s/:ext://;
317 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
318 ($user, $host) = ($1, $2);
319 }
320 if (!$local) {
321 if ($user) {
322 unshift @cvs, $rsh, '-l', $user, $host;
323 } else {
324 unshift @cvs, $rsh, $host;
325 }
326 }
327
328 unless ($pid) {
329 $pr->writer();
330 $pw->reader();
331 dup2($pw->fileno(),0);
332 dup2($pr->fileno(),1);
333 $pr->close();
334 $pw->close();
335 exec(@cvs);
336 }
337 $pw->writer();
338 $pr->reader();
339 $self->{'socketo'} = $pw;
340 $self->{'socketi'} = $pr;
341 }
342 $self->{'socketo'}->write("Root $repo\n");
343
344 # Trial and error says that this probably is the minimum set
345 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
346
347 $self->{'socketo'}->write("valid-requests\n");
348 $self->{'socketo'}->flush();
349
350 chomp(my $rep=$self->readline());
351 if ($rep !~ s/^Valid-requests\s*//) {
352 $rep="<unknown>" unless $rep;
353 die "Expected Valid-requests from server, but got: $rep\n";
354 }
355 chomp(my $res=$self->readline());
356 die "validReply: $res\n" if $res ne "ok";
357
358 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
359 $self->{'repo'} = $repo;
360}
361
362sub readline {
363 my ($self) = @_;
364 return $self->{'socketi'}->getline();
365}
366
367sub _file {
368 # Request a file with a given revision.
369 # Trial and error says this is a good way to do it. :-/
370 my ($self,$fn,$rev) = @_;
371 $self->{'socketo'}->write("Argument -N\n") or return undef;
372 $self->{'socketo'}->write("Argument -P\n") or return undef;
373 # -kk: Linus' version doesn't use it - defaults to off
374 if ($opt_k) {
375 $self->{'socketo'}->write("Argument -kk\n") or return undef;
376 }
377 $self->{'socketo'}->write("Argument -r\n") or return undef;
378 $self->{'socketo'}->write("Argument $rev\n") or return undef;
379 $self->{'socketo'}->write("Argument --\n") or return undef;
380 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
381 $self->{'socketo'}->write("Directory .\n") or return undef;
382 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
383 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
384 $self->{'socketo'}->write("co\n") or return undef;
385 $self->{'socketo'}->flush() or return undef;
386 $self->{'lines'} = 0;
387 return 1;
388}
389sub _line {
390 # Read a line from the server.
391 # ... except that 'line' may be an entire file. ;-)
392 my ($self, $fh) = @_;
393 die "Not in lines" unless defined $self->{'lines'};
394
395 my $line;
396 my $res=0;
397 while (defined($line = $self->readline())) {
398 # M U gnupg-cvs-rep/AUTHORS
399 # Updated gnupg-cvs-rep/
400 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
401 # /AUTHORS/1.1///T1.1
402 # u=rw,g=rw,o=rw
403 # 0
404 # ok
405
406 if ($line =~ s/^(?:Created|Updated) //) {
407 $line = $self->readline(); # path
408 $line = $self->readline(); # Entries line
409 my $mode = $self->readline(); chomp $mode;
410 $self->{'mode'} = $mode;
411 defined (my $cnt = $self->readline())
412 or die "EOF from server after 'Changed'\n";
413 chomp $cnt;
414 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
415 $line="";
416 $res = $self->_fetchfile($fh, $cnt);
417 } elsif ($line =~ s/^ //) {
418 print $fh $line;
419 $res += length($line);
420 } elsif ($line =~ /^M\b/) {
421 # output, do nothing
422 } elsif ($line =~ /^Mbinary\b/) {
423 my $cnt;
424 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
425 chomp $cnt;
426 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
427 $line="";
428 $res += $self->_fetchfile($fh, $cnt);
429 } else {
430 chomp $line;
431 if ($line eq "ok") {
432 # print STDERR "S: ok (".length($res).")\n";
433 return $res;
434 } elsif ($line =~ s/^E //) {
435 # print STDERR "S: $line\n";
436 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
437 $line = $self->readline(); # filename
438 $line = $self->readline(); # OK
439 chomp $line;
440 die "Unknown: $line" if $line ne "ok";
441 return -1;
442 } else {
443 die "Unknown: $line\n";
444 }
445 }
446 }
447 return undef;
448}
449sub file {
450 my ($self,$fn,$rev) = @_;
451 my $res;
452
453 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
454 DIR => File::Spec->tmpdir(), UNLINK => 1);
455
456 $self->_file($fn,$rev) and $res = $self->_line($fh);
457
458 if (!defined $res) {
459 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
460 truncate $fh, 0;
461 $self->conn();
462 $self->_file($fn,$rev) or die "No file command send";
463 $res = $self->_line($fh);
464 die "Retry failed" unless defined $res;
465 }
466 close ($fh);
467
468 return ($name, $res);
469}
470sub _fetchfile {
471 my ($self, $fh, $cnt) = @_;
472 my $res = 0;
473 my $bufsize = 1024 * 1024;
474 while ($cnt) {
475 if ($bufsize > $cnt) {
476 $bufsize = $cnt;
477 }
478 my $buf;
479 my $num = $self->{'socketi'}->read($buf,$bufsize);
480 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
481 print $fh $buf;
482 $res += $num;
483 $cnt -= $num;
484 }
485 return $res;
486}
487
488sub _scramble {
489 my ($self, $pass) = @_;
490 my $scrambled = "A";
491
492 return $scrambled unless $pass;
493
494 my $pass_len = length($pass);
495 my @pass_arr = split("", $pass);
496 my $i;
497
498 # from cvs/src/scramble.c
499 my @shifts = (
500 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
501 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
502 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
503 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
504 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
505 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
506 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
507 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
508 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
509 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
510 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
511 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
512 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
513 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
514 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
515 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
516 );
517
518 for ($i = 0; $i < $pass_len; $i++) {
519 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
520 }
521
522 return $scrambled;
523}
524
525package main;
526
527my $cvs = CVSconn->new($opt_d, $cvs_tree);
528
529
530sub pdate($) {
531 my ($d) = @_;
532 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
533 or die "Unparseable date: $d\n";
534 my $y=$1; $y-=1900 if $y>1900;
535 return timegm($6||0,$5,$4,$3,$2-1,$y);
536}
537
538sub pmode($) {
539 my ($mode) = @_;
540 my $m = 0;
541 my $mm = 0;
542 my $um = 0;
543 for my $x(split(//,$mode)) {
544 if ($x eq ",") {
545 $m |= $mm&$um;
546 $mm = 0;
547 $um = 0;
548 } elsif ($x eq "u") { $um |= 0700;
549 } elsif ($x eq "g") { $um |= 0070;
550 } elsif ($x eq "o") { $um |= 0007;
551 } elsif ($x eq "r") { $mm |= 0444;
552 } elsif ($x eq "w") { $mm |= 0222;
553 } elsif ($x eq "x") { $mm |= 0111;
554 } elsif ($x eq "=") { # do nothing
555 } else { die "Unknown mode: $mode\n";
556 }
557 }
558 $m |= $mm&$um;
559 return $m;
560}
561
562sub getwd() {
563 my $pwd = `pwd`;
564 chomp $pwd;
565 return $pwd;
566}
567
568sub is_sha1 {
569 my $s = shift;
570 return $s =~ /^[a-f0-9]{40}$/;
571}
572
573sub get_headref ($) {
574 my $name = shift;
575 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
576 return undef unless $? == 0;
577 chomp $r;
578 return $r;
579}
580
581-d $git_tree
582 or mkdir($git_tree,0777)
583 or die "Could not create $git_tree: $!";
584chdir($git_tree);
585
586my $last_branch = "";
587my $orig_branch = "";
588my %branch_date;
589my $tip_at_start = undef;
590
591my $git_dir = $ENV{"GIT_DIR"} || ".git";
592$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
593$ENV{"GIT_DIR"} = $git_dir;
594my $orig_git_index;
595$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
596
597my %index; # holds filenames of one index per branch
598
599unless (-d $git_dir) {
600 system("git-init");
601 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
602 system("git-read-tree");
603 die "Cannot init an empty tree: $?\n" if $?;
604
605 $last_branch = $opt_o;
606 $orig_branch = "";
607} else {
608 open(F, "git-symbolic-ref HEAD |") or
609 die "Cannot run git-symbolic-ref: $!\n";
610 chomp ($last_branch = <F>);
611 $last_branch = basename($last_branch);
612 close(F);
613 unless ($last_branch) {
614 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
615 $last_branch = "master";
616 }
617 $orig_branch = $last_branch;
618 $tip_at_start = `git-rev-parse --verify HEAD`;
619
620 # Get the last import timestamps
621 my $fmt = '($ref, $author) = (%(refname), %(author));';
622 open(H, "git-for-each-ref --perl --format='$fmt' $remote |") or
623 die "Cannot run git-for-each-ref: $!\n";
624 while (defined(my $entry = <H>)) {
625 my ($ref, $author);
626 eval($entry) || die "cannot eval refs list: $@";
627 my ($head) = ($ref =~ m|^$remote/(.*)|);
628 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
629 $branch_date{$head} = $1;
630 }
631 close(H);
632 if (!exists $branch_date{$opt_o}) {
633 die "Branch '$opt_o' does not exist.\n".
634 "Either use the correct '-o branch' option,\n".
635 "or import to a new repository.\n";
636 }
637}
638
639-d $git_dir
640 or die "Could not create git subdir ($git_dir).\n";
641
642# now we read (and possibly save) author-info as well
643-f "$git_dir/cvs-authors" and
644 read_author_info("$git_dir/cvs-authors");
645if ($opt_A) {
646 read_author_info($opt_A);
647 write_author_info("$git_dir/cvs-authors");
648}
649
650
651#
652# run cvsps into a file unless we are getting
653# it passed as a file via $opt_P
654#
655my $cvspsfile;
656unless ($opt_P) {
657 print "Running cvsps...\n" if $opt_v;
658 my $pid = open(CVSPS,"-|");
659 my $cvspsfh;
660 die "Cannot fork: $!\n" unless defined $pid;
661 unless ($pid) {
662 my @opt;
663 @opt = split(/,/,$opt_p) if defined $opt_p;
664 unshift @opt, '-z', $opt_z if defined $opt_z;
665 unshift @opt, '-q' unless defined $opt_v;
666 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
667 push @opt, '--cvs-direct';
668 }
669 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
670 die "Could not start cvsps: $!\n";
671 }
672 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
673 DIR => File::Spec->tmpdir());
674 while (<CVSPS>) {
675 print $cvspsfh $_;
676 }
677 close CVSPS;
678 $? == 0 or die "git-cvsimport: fatal: cvsps reported error\n";
679 close $cvspsfh;
680} else {
681 $cvspsfile = $opt_P;
682}
683
684open(CVS, "<$cvspsfile") or die $!;
685
686## cvsps output:
687#---------------------
688#PatchSet 314
689#Date: 1999/09/18 13:03:59
690#Author: wkoch
691#Branch: STABLE-BRANCH-1-0
692#Ancestor branch: HEAD
693#Tag: (none)
694#Log:
695# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
696#Members:
697# README:1.57->1.57.2.1
698# VERSION:1.96->1.96.2.1
699#
700#---------------------
701
702my $state = 0;
703
704sub update_index (\@\@) {
705 my $old = shift;
706 my $new = shift;
707 open(my $fh, '|-', qw(git-update-index -z --index-info))
708 or die "unable to open git-update-index: $!";
709 print $fh
710 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
711 @$old),
712 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
713 @$new)
714 or die "unable to write to git-update-index: $!";
715 close $fh
716 or die "unable to write to git-update-index: $!";
717 $? and die "git-update-index reported error: $?";
718}
719
720sub write_tree () {
721 open(my $fh, '-|', qw(git-write-tree))
722 or die "unable to open git-write-tree: $!";
723 chomp(my $tree = <$fh>);
724 is_sha1($tree)
725 or die "Cannot get tree id ($tree): $!";
726 close($fh)
727 or die "Error running git-write-tree: $?\n";
728 print "Tree ID $tree\n" if $opt_v;
729 return $tree;
730}
731
732my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
733my (@old,@new,@skipped,%ignorebranch);
734
735# commits that cvsps cannot place anywhere...
736$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
737
738sub commit {
739 if ($branch eq $opt_o && !$index{branch} &&
740 !get_headref("$remote/$branch")) {
741 # looks like an initial commit
742 # use the index primed by git-init
743 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
744 $index{$branch} = "$git_dir/index";
745 } else {
746 # use an index per branch to speed up
747 # imports of projects with many branches
748 unless ($index{$branch}) {
749 $index{$branch} = tmpnam();
750 $ENV{GIT_INDEX_FILE} = $index{$branch};
751 if ($ancestor) {
752 system("git-read-tree", "$remote/$ancestor");
753 } else {
754 system("git-read-tree", "$remote/$branch");
755 }
756 die "read-tree failed: $?\n" if $?;
757 }
758 }
759 $ENV{GIT_INDEX_FILE} = $index{$branch};
760
761 update_index(@old, @new);
762 @old = @new = ();
763 my $tree = write_tree();
764 my $parent = get_headref("$remote/$last_branch");
765 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
766
767 my @commit_args;
768 push @commit_args, ("-p", $parent) if $parent;
769
770 # loose detection of merges
771 # based on the commit msg
772 foreach my $rx (@mergerx) {
773 next unless $logmsg =~ $rx && $1;
774 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
775 if (my $sha1 = get_headref("$remote/$mparent")) {
776 push @commit_args, '-p', "$remote/$mparent";
777 print "Merge parent branch: $mparent\n" if $opt_v;
778 }
779 }
780
781 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
782 $ENV{GIT_AUTHOR_NAME} = $author_name;
783 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
784 $ENV{GIT_AUTHOR_DATE} = $commit_date;
785 $ENV{GIT_COMMITTER_NAME} = $author_name;
786 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
787 $ENV{GIT_COMMITTER_DATE} = $commit_date;
788 my $pid = open2(my $commit_read, my $commit_write,
789 'git-commit-tree', $tree, @commit_args);
790
791 # compatibility with git2cvs
792 substr($logmsg,32767) = "" if length($logmsg) > 32767;
793 $logmsg =~ s/[\s\n]+\z//;
794
795 if (@skipped) {
796 $logmsg .= "\n\n\nSKIPPED:\n\t";
797 $logmsg .= join("\n\t", @skipped) . "\n";
798 @skipped = ();
799 }
800
801 print($commit_write "$logmsg\n") && close($commit_write)
802 or die "Error writing to git-commit-tree: $!\n";
803
804 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
805 chomp(my $cid = <$commit_read>);
806 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
807 print "Commit ID $cid\n" if $opt_v;
808 close($commit_read);
809
810 waitpid($pid,0);
811 die "Error running git-commit-tree: $?\n" if $?;
812
813 system('git-update-ref', "$remote/$branch", $cid) == 0
814 or die "Cannot write branch $branch for update: $!\n";
815
816 if ($tag) {
817 my ($xtag) = $tag;
818 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
819 $xtag =~ tr/_/\./ if ( $opt_u );
820 $xtag =~ s/[\/]/$opt_s/g;
821 $xtag =~ s/\[//g;
822
823 system('git-tag', '-f', $xtag, $cid) == 0
824 or die "Cannot create tag $xtag: $!\n";
825
826 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
827 }
828};
829
830my $commitcount = 1;
831while (<CVS>) {
832 chomp;
833 if ($state == 0 and /^-+$/) {
834 $state = 1;
835 } elsif ($state == 0) {
836 $state = 1;
837 redo;
838 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
839 $patchset = 0+$_;
840 $state=2;
841 } elsif ($state == 2 and s/^Date:\s+//) {
842 $date = pdate($_);
843 unless ($date) {
844 print STDERR "Could not parse date: $_\n";
845 $state=0;
846 next;
847 }
848 $state=3;
849 } elsif ($state == 3 and s/^Author:\s+//) {
850 s/\s+$//;
851 if (/^(.*?)\s+<(.*)>/) {
852 ($author_name, $author_email) = ($1, $2);
853 } elsif ($conv_author_name{$_}) {
854 $author_name = $conv_author_name{$_};
855 $author_email = $conv_author_email{$_};
856 } else {
857 $author_name = $author_email = $_;
858 }
859 $state = 4;
860 } elsif ($state == 4 and s/^Branch:\s+//) {
861 s/\s+$//;
862 tr/_/\./ if ( $opt_u );
863 s/[\/]/$opt_s/g;
864 $branch = $_;
865 $state = 5;
866 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
867 s/\s+$//;
868 $ancestor = $_;
869 $ancestor = $opt_o if $ancestor eq "HEAD";
870 $state = 6;
871 } elsif ($state == 5) {
872 $ancestor = undef;
873 $state = 6;
874 redo;
875 } elsif ($state == 6 and s/^Tag:\s+//) {
876 s/\s+$//;
877 if ($_ eq "(none)") {
878 $tag = undef;
879 } else {
880 $tag = $_;
881 }
882 $state = 7;
883 } elsif ($state == 7 and /^Log:/) {
884 $logmsg = "";
885 $state = 8;
886 } elsif ($state == 8 and /^Members:/) {
887 $branch = $opt_o if $branch eq "HEAD";
888 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
889 # skip
890 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
891 $state = 11;
892 next;
893 }
894 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
895 # skip if the commit is too recent
896 # given that the cvsps default fuzz is 300s, we give ourselves another
897 # 300s just in case -- this also prevents skipping commits
898 # due to server clock drift
899 print "skip patchset $patchset: $date too recent\n" if $opt_v;
900 $state = 11;
901 next;
902 }
903 if (exists $ignorebranch{$branch}) {
904 print STDERR "Skipping $branch\n";
905 $state = 11;
906 next;
907 }
908 if ($ancestor) {
909 if ($ancestor eq $branch) {
910 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
911 $ancestor = $opt_o;
912 }
913 if (defined get_headref("$remote/$branch")) {
914 print STDERR "Branch $branch already exists!\n";
915 $state=11;
916 next;
917 }
918 my $id = get_headref("$remote/$ancestor");
919 if (!$id) {
920 print STDERR "Branch $ancestor does not exist!\n";
921 $ignorebranch{$branch} = 1;
922 $state=11;
923 next;
924 }
925
926 system(qw(git update-ref -m cvsimport),
927 "$remote/$branch", $id);
928 if($? != 0) {
929 print STDERR "Could not create branch $branch\n";
930 $ignorebranch{$branch} = 1;
931 $state=11;
932 next;
933 }
934 }
935 $last_branch = $branch if $branch ne $last_branch;
936 $state = 9;
937 } elsif ($state == 8) {
938 $logmsg .= "$_\n";
939 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
940# VERSION:1.96->1.96.2.1
941 my $init = ($2 eq "INITIAL");
942 my $fn = $1;
943 my $rev = $3;
944 $fn =~ s#^/+##;
945 if ($opt_S && $fn =~ m/$opt_S/) {
946 print "SKIPPING $fn v $rev\n";
947 push(@skipped, $fn);
948 next;
949 }
950 print "Fetching $fn v $rev\n" if $opt_v;
951 my ($tmpname, $size) = $cvs->file($fn,$rev);
952 if ($size == -1) {
953 push(@old,$fn);
954 print "Drop $fn\n" if $opt_v;
955 } else {
956 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
957 my $pid = open(my $F, '-|');
958 die $! unless defined $pid;
959 if (!$pid) {
960 exec("git-hash-object", "-w", $tmpname)
961 or die "Cannot create object: $!\n";
962 }
963 my $sha = <$F>;
964 chomp $sha;
965 close $F;
966 my $mode = pmode($cvs->{'mode'});
967 push(@new,[$mode, $sha, $fn]); # may be resurrected!
968 }
969 unlink($tmpname);
970 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
971 my $fn = $1;
972 $fn =~ s#^/+##;
973 push(@old,$fn);
974 print "Delete $fn\n" if $opt_v;
975 } elsif ($state == 9 and /^\s*$/) {
976 $state = 10;
977 } elsif (($state == 9 or $state == 10) and /^-+$/) {
978 $commitcount++;
979 if ($opt_L && $commitcount > $opt_L) {
980 last;
981 }
982 commit();
983 if (($commitcount & 1023) == 0) {
984 system("git repack -a -d");
985 }
986 $state = 1;
987 } elsif ($state == 11 and /^-+$/) {
988 $state = 1;
989 } elsif (/^-+$/) { # end of unknown-line processing
990 $state = 1;
991 } elsif ($state != 11) { # ignore stuff when skipping
992 print STDERR "* UNKNOWN LINE * $_\n";
993 }
994}
995commit() if $branch and $state != 11;
996
997unless ($opt_P) {
998 unlink($cvspsfile);
999}
1000
1001# The heuristic of repacking every 1024 commits can leave a
1002# lot of unpacked data. If there is more than 1MB worth of
1003# not-packed objects, repack once more.
1004my $line = `git-count-objects`;
1005if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1006 my ($n_objects, $kb) = ($1, $2);
1007 1024 < $kb
1008 and system("git repack -a -d");
1009}
1010
1011foreach my $git_index (values %index) {
1012 if ($git_index ne "$git_dir/index") {
1013 unlink($git_index);
1014 }
1015}
1016
1017if (defined $orig_git_index) {
1018 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1019} else {
1020 delete $ENV{GIT_INDEX_FILE};
1021}
1022
1023# Now switch back to the branch we were in before all of this happened
1024if ($orig_branch) {
1025 print "DONE.\n" if $opt_v;
1026 if ($opt_i) {
1027 exit 0;
1028 }
1029 my $tip_at_end = `git-rev-parse --verify HEAD`;
1030 if ($tip_at_start ne $tip_at_end) {
1031 for ($tip_at_start, $tip_at_end) { chomp; }
1032 print "Fetched into the current branch.\n" if $opt_v;
1033 system(qw(git-read-tree -u -m),
1034 $tip_at_start, $tip_at_end);
1035 die "Fast-forward update failed: $?\n" if $?;
1036 }
1037 else {
1038 system(qw(git-merge cvsimport HEAD), "$remote/$opt_o");
1039 die "Could not merge $opt_o into the current branch.\n" if $?;
1040 }
1041} else {
1042 $orig_branch = "master";
1043 print "DONE; creating $orig_branch branch\n" if $opt_v;
1044 system("git-update-ref", "refs/heads/master", "$remote/$opt_o")
1045 unless defined get_headref('refs/heads/master');
1046 system("git-symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1047 if ($opt_r && $opt_o ne 'HEAD');
1048 system('git-update-ref', 'HEAD', "$orig_branch");
1049 unless ($opt_i) {
1050 system('git checkout -f');
1051 die "checkout failed: $?\n" if $?;
1052 }
1053}