ceb119d89104648c37bf0bcd770acadf796cd9d1
1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime dup2 ENOENT);
28use IPC::Open2;
29
30$SIG{'PIPE'}="IGNORE";
31$ENV{'TZ'}="UTC";
32
33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34my (%conv_author_name, %conv_author_email, %conv_author_tz);
35
36sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45END
46 exit(1);
47}
48
49sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
53
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # or with an optional timezone:
63 # spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
64 elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
65 $user = $1;
66 $conv_author_name{$user} = $2;
67 $conv_author_email{$user} = $3;
68 $conv_author_tz{$user} = $4;
69 }
70 # However, we also read from CVSROOT/users format
71 # to ease migration.
72 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
73 my $mapped;
74 ($user, $mapped) = ($1, $3);
75 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
76 $conv_author_name{$user} = $1;
77 $conv_author_email{$user} = $2;
78 }
79 elsif ($mapped =~ /^<?(.*)>?$/) {
80 $conv_author_name{$user} = $user;
81 $conv_author_email{$user} = $1;
82 }
83 }
84 # NEEDSWORK: Maybe warn on unrecognized lines?
85 }
86 close ($f);
87}
88
89sub write_author_info($) {
90 my ($file) = @_;
91 open my $f, '>', $file or
92 die("Failed to open $file for writing: $!");
93
94 foreach (keys %conv_author_name) {
95 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
96 print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
97 print $f "\n";
98 }
99 close ($f);
100}
101
102# convert getopts specs for use by git config
103my %longmap = (
104 'A:' => 'authors-file',
105 'M:' => 'merge-regex',
106 'P:' => undef,
107 'R' => 'track-revisions',
108 'S:' => 'ignore-paths',
109);
110
111sub read_repo_config {
112 # Split the string between characters, unless there is a ':'
113 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
114 my @opts = split(/ *(?!:)/, shift);
115 foreach my $o (@opts) {
116 my $key = $o;
117 $key =~ s/://g;
118 my $arg = 'git config';
119 $arg .= ' --bool' if ($o !~ /:$/);
120 my $ckey = $key;
121
122 if (exists $longmap{$o}) {
123 # An uppercase option like -R cannot be
124 # expressed in the configuration, as the
125 # variable names are downcased.
126 $ckey = $longmap{$o};
127 next if (! defined $ckey);
128 $ckey =~ s/-//g;
129 }
130 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
131 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
132 no strict 'refs';
133 my $opt_name = "opt_" . $key;
134 if (!$$opt_name) {
135 $$opt_name = $tmp;
136 }
137 }
138 }
139}
140
141my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
142read_repo_config($opts);
143Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
144
145# turn the Getopt::Std specification in a Getopt::Long one,
146# with support for multiple -M options
147GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
148 or usage();
149usage if $opt_h;
150
151if (@ARGV == 0) {
152 chomp(my $module = `git config --get cvsimport.module`);
153 push(@ARGV, $module) if $? == 0;
154}
155@ARGV <= 1 or usage("You can't specify more than one CVS module");
156
157if ($opt_d) {
158 $ENV{"CVSROOT"} = $opt_d;
159} elsif (-f 'CVS/Root') {
160 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
161 $opt_d = <$f>;
162 chomp $opt_d;
163 close $f;
164 $ENV{"CVSROOT"} = $opt_d;
165} elsif ($ENV{"CVSROOT"}) {
166 $opt_d = $ENV{"CVSROOT"};
167} else {
168 usage("CVSROOT needs to be set");
169}
170$opt_s ||= "-";
171$opt_a ||= 0;
172
173my $git_tree = $opt_C;
174$git_tree ||= ".";
175
176my $remote;
177if (defined $opt_r) {
178 $remote = 'refs/remotes/' . $opt_r;
179 $opt_o ||= "master";
180} else {
181 $opt_o ||= "origin";
182 $remote = 'refs/heads';
183}
184
185my $cvs_tree;
186if ($#ARGV == 0) {
187 $cvs_tree = $ARGV[0];
188} elsif (-f 'CVS/Repository') {
189 open my $f, '<', 'CVS/Repository' or
190 die 'Failed to open CVS/Repository';
191 $cvs_tree = <$f>;
192 chomp $cvs_tree;
193 close $f;
194} else {
195 usage("CVS module has to be specified");
196}
197
198our @mergerx = ();
199if ($opt_m) {
200 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
201}
202if (@opt_M) {
203 push (@mergerx, map { qr/$_/ } @opt_M);
204}
205
206# Remember UTC of our starting time
207# we'll want to avoid importing commits
208# that are too recent
209our $starttime = time();
210
211select(STDERR); $|=1; select(STDOUT);
212
213
214package CVSconn;
215# Basic CVS dialog.
216# We're only interested in connecting and downloading, so ...
217
218use File::Spec;
219use File::Temp qw(tempfile);
220use POSIX qw(strftime dup2);
221
222sub new {
223 my ($what,$repo,$subdir) = @_;
224 $what=ref($what) if ref($what);
225
226 my $self = {};
227 $self->{'buffer'} = "";
228 bless($self,$what);
229
230 $repo =~ s#/+$##;
231 $self->{'fullrep'} = $repo;
232 $self->conn();
233
234 $self->{'subdir'} = $subdir;
235 $self->{'lines'} = undef;
236
237 return $self;
238}
239
240sub find_password_entry {
241 my ($cvspass, @cvsroot) = @_;
242 my ($file, $delim) = @$cvspass;
243 my $pass;
244 local ($_);
245
246 if (open(my $fh, $file)) {
247 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
248 CVSPASSFILE:
249 while (<$fh>) {
250 chomp;
251 s/^\/\d+\s+//;
252 my ($w, $p) = split($delim,$_,2);
253 for my $cvsroot (@cvsroot) {
254 if ($w eq $cvsroot) {
255 $pass = $p;
256 last CVSPASSFILE;
257 }
258 }
259 }
260 close($fh);
261 }
262 return $pass;
263}
264
265sub conn {
266 my $self = shift;
267 my $repo = $self->{'fullrep'};
268 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
269 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
270
271 my ($proxyhost,$proxyport);
272 if ($param && ($param =~ m/proxy=([^;]+)/)) {
273 $proxyhost = $1;
274 # Default proxyport, if not specified, is 8080.
275 $proxyport = 8080;
276 if ($ENV{"CVS_PROXY_PORT"}) {
277 $proxyport = $ENV{"CVS_PROXY_PORT"};
278 }
279 if ($param =~ m/proxyport=([^;]+)/) {
280 $proxyport = $1;
281 }
282 }
283 $repo ||= '/';
284
285 # if username is not explicit in CVSROOT, then use current user, as cvs would
286 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
287 my $rr2 = "-";
288 unless ($port) {
289 $rr2 = ":pserver:$user\@$serv:$repo";
290 $port=2401;
291 }
292 my $rr = ":pserver:$user\@$serv:$port$repo";
293
294 if ($pass) {
295 $pass = $self->_scramble($pass);
296 } else {
297 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
298 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
299 my @loc = ();
300 foreach my $cvspass (@cvspass) {
301 my $p = find_password_entry($cvspass, $rr, $rr2);
302 if ($p) {
303 push @loc, $cvspass->[0];
304 $pass = $p;
305 }
306 }
307
308 if (1 < @loc) {
309 die("Multiple cvs password files have ".
310 "entries for CVSROOT $opt_d: @loc");
311 } elsif (!$pass) {
312 $pass = "A";
313 }
314 }
315
316 my ($s, $rep);
317 if ($proxyhost) {
318
319 # Use a HTTP Proxy. Only works for HTTP proxies that
320 # don't require user authentication
321 #
322 # See: http://www.ietf.org/rfc/rfc2817.txt
323
324 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
325 die "Socket to $proxyhost: $!\n" unless defined $s;
326 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
327 or die "Write to $proxyhost: $!\n";
328 $s->flush();
329
330 $rep = <$s>;
331
332 # The answer should look like 'HTTP/1.x 2yy ....'
333 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
334 die "Proxy connect: $rep\n";
335 }
336 # Skip up to the empty line of the proxy server output
337 # including the response headers.
338 while ($rep = <$s>) {
339 last if (!defined $rep ||
340 $rep eq "\n" ||
341 $rep eq "\r\n");
342 }
343 } else {
344 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
345 die "Socket to $serv: $!\n" unless defined $s;
346 }
347
348 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
349 or die "Write to $serv: $!\n";
350 $s->flush();
351
352 $rep = <$s>;
353
354 if ($rep ne "I LOVE YOU\n") {
355 $rep="<unknown>" unless $rep;
356 die "AuthReply: $rep\n";
357 }
358 $self->{'socketo'} = $s;
359 $self->{'socketi'} = $s;
360 } else { # local or ext: Fork off our own cvs server.
361 my $pr = IO::Pipe->new();
362 my $pw = IO::Pipe->new();
363 my $pid = fork();
364 die "Fork: $!\n" unless defined $pid;
365 my $cvs = 'cvs';
366 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
367 my $rsh = 'rsh';
368 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
369
370 my @cvs = ($cvs, 'server');
371 my ($local, $user, $host);
372 $local = $repo =~ s/:local://;
373 if (!$local) {
374 $repo =~ s/:ext://;
375 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
376 ($user, $host) = ($1, $2);
377 }
378 if (!$local) {
379 if ($user) {
380 unshift @cvs, $rsh, '-l', $user, $host;
381 } else {
382 unshift @cvs, $rsh, $host;
383 }
384 }
385
386 unless ($pid) {
387 $pr->writer();
388 $pw->reader();
389 dup2($pw->fileno(),0);
390 dup2($pr->fileno(),1);
391 $pr->close();
392 $pw->close();
393 exec(@cvs);
394 }
395 $pw->writer();
396 $pr->reader();
397 $self->{'socketo'} = $pw;
398 $self->{'socketi'} = $pr;
399 }
400 $self->{'socketo'}->write("Root $repo\n");
401
402 # Trial and error says that this probably is the minimum set
403 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
404
405 $self->{'socketo'}->write("valid-requests\n");
406 $self->{'socketo'}->flush();
407
408 my $rep=$self->readline();
409 die "Failed to read from server" unless defined $rep;
410 chomp($rep);
411 if ($rep !~ s/^Valid-requests\s*//) {
412 $rep="<unknown>" unless $rep;
413 die "Expected Valid-requests from server, but got: $rep\n";
414 }
415 chomp(my $res=$self->readline());
416 die "validReply: $res\n" if $res ne "ok";
417
418 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
419 $self->{'repo'} = $repo;
420}
421
422sub readline {
423 my ($self) = @_;
424 return $self->{'socketi'}->getline();
425}
426
427sub _file {
428 # Request a file with a given revision.
429 # Trial and error says this is a good way to do it. :-/
430 my ($self,$fn,$rev) = @_;
431 $self->{'socketo'}->write("Argument -N\n") or return undef;
432 $self->{'socketo'}->write("Argument -P\n") or return undef;
433 # -kk: Linus' version doesn't use it - defaults to off
434 if ($opt_k) {
435 $self->{'socketo'}->write("Argument -kk\n") or return undef;
436 }
437 $self->{'socketo'}->write("Argument -r\n") or return undef;
438 $self->{'socketo'}->write("Argument $rev\n") or return undef;
439 $self->{'socketo'}->write("Argument --\n") or return undef;
440 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
441 $self->{'socketo'}->write("Directory .\n") or return undef;
442 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
443 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
444 $self->{'socketo'}->write("co\n") or return undef;
445 $self->{'socketo'}->flush() or return undef;
446 $self->{'lines'} = 0;
447 return 1;
448}
449sub _line {
450 # Read a line from the server.
451 # ... except that 'line' may be an entire file. ;-)
452 my ($self, $fh) = @_;
453 die "Not in lines" unless defined $self->{'lines'};
454
455 my $line;
456 my $res=0;
457 while (defined($line = $self->readline())) {
458 # M U gnupg-cvs-rep/AUTHORS
459 # Updated gnupg-cvs-rep/
460 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
461 # /AUTHORS/1.1///T1.1
462 # u=rw,g=rw,o=rw
463 # 0
464 # ok
465
466 if ($line =~ s/^(?:Created|Updated) //) {
467 $line = $self->readline(); # path
468 $line = $self->readline(); # Entries line
469 my $mode = $self->readline(); chomp $mode;
470 $self->{'mode'} = $mode;
471 defined (my $cnt = $self->readline())
472 or die "EOF from server after 'Changed'\n";
473 chomp $cnt;
474 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
475 $line="";
476 $res = $self->_fetchfile($fh, $cnt);
477 } elsif ($line =~ s/^ //) {
478 print $fh $line;
479 $res += length($line);
480 } elsif ($line =~ /^M\b/) {
481 # output, do nothing
482 } elsif ($line =~ /^Mbinary\b/) {
483 my $cnt;
484 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
485 chomp $cnt;
486 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
487 $line="";
488 $res += $self->_fetchfile($fh, $cnt);
489 } else {
490 chomp $line;
491 if ($line eq "ok") {
492 # print STDERR "S: ok (".length($res).")\n";
493 return $res;
494 } elsif ($line =~ s/^E //) {
495 # print STDERR "S: $line\n";
496 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
497 $line = $self->readline(); # filename
498 $line = $self->readline(); # OK
499 chomp $line;
500 die "Unknown: $line" if $line ne "ok";
501 return -1;
502 } else {
503 die "Unknown: $line\n";
504 }
505 }
506 }
507 return undef;
508}
509sub file {
510 my ($self,$fn,$rev) = @_;
511 my $res;
512
513 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
514 DIR => File::Spec->tmpdir(), UNLINK => 1);
515
516 $self->_file($fn,$rev) and $res = $self->_line($fh);
517
518 if (!defined $res) {
519 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
520 truncate $fh, 0;
521 $self->conn();
522 $self->_file($fn,$rev) or die "No file command send";
523 $res = $self->_line($fh);
524 die "Retry failed" unless defined $res;
525 }
526 close ($fh);
527
528 return ($name, $res);
529}
530sub _fetchfile {
531 my ($self, $fh, $cnt) = @_;
532 my $res = 0;
533 my $bufsize = 1024 * 1024;
534 while ($cnt) {
535 if ($bufsize > $cnt) {
536 $bufsize = $cnt;
537 }
538 my $buf;
539 my $num = $self->{'socketi'}->read($buf,$bufsize);
540 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
541 print $fh $buf;
542 $res += $num;
543 $cnt -= $num;
544 }
545 return $res;
546}
547
548sub _scramble {
549 my ($self, $pass) = @_;
550 my $scrambled = "A";
551
552 return $scrambled unless $pass;
553
554 my $pass_len = length($pass);
555 my @pass_arr = split("", $pass);
556 my $i;
557
558 # from cvs/src/scramble.c
559 my @shifts = (
560 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
561 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
562 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
563 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
564 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
565 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
566 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
567 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
568 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
569 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
570 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
571 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
572 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
573 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
574 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
575 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
576 );
577
578 for ($i = 0; $i < $pass_len; $i++) {
579 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
580 }
581
582 return $scrambled;
583}
584
585package main;
586
587my $cvs = CVSconn->new($opt_d, $cvs_tree);
588
589
590sub pdate($) {
591 my ($d) = @_;
592 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
593 or die "Unparseable date: $d\n";
594 my $y=$1; $y-=1900 if $y>1900;
595 return timegm($6||0,$5,$4,$3,$2-1,$y);
596}
597
598sub pmode($) {
599 my ($mode) = @_;
600 my $m = 0;
601 my $mm = 0;
602 my $um = 0;
603 for my $x(split(//,$mode)) {
604 if ($x eq ",") {
605 $m |= $mm&$um;
606 $mm = 0;
607 $um = 0;
608 } elsif ($x eq "u") { $um |= 0700;
609 } elsif ($x eq "g") { $um |= 0070;
610 } elsif ($x eq "o") { $um |= 0007;
611 } elsif ($x eq "r") { $mm |= 0444;
612 } elsif ($x eq "w") { $mm |= 0222;
613 } elsif ($x eq "x") { $mm |= 0111;
614 } elsif ($x eq "=") { # do nothing
615 } else { die "Unknown mode: $mode\n";
616 }
617 }
618 $m |= $mm&$um;
619 return $m;
620}
621
622sub getwd() {
623 my $pwd = `pwd`;
624 chomp $pwd;
625 return $pwd;
626}
627
628sub is_sha1 {
629 my $s = shift;
630 return $s =~ /^[a-f0-9]{40}$/;
631}
632
633sub get_headref ($) {
634 my $name = shift;
635 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
636 return undef unless $? == 0;
637 chomp $r;
638 return $r;
639}
640
641my $user_filename_prepend = '';
642sub munge_user_filename {
643 my $name = shift;
644 return File::Spec->file_name_is_absolute($name) ?
645 $name :
646 $user_filename_prepend . $name;
647}
648
649-d $git_tree
650 or mkdir($git_tree,0777)
651 or die "Could not create $git_tree: $!";
652if ($git_tree ne '.') {
653 $user_filename_prepend = getwd() . '/';
654 chdir($git_tree);
655}
656
657my $last_branch = "";
658my $orig_branch = "";
659my %branch_date;
660my $tip_at_start = undef;
661
662my $git_dir = $ENV{"GIT_DIR"} || ".git";
663$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
664$ENV{"GIT_DIR"} = $git_dir;
665my $orig_git_index;
666$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
667
668my %index; # holds filenames of one index per branch
669
670unless (-d $git_dir) {
671 system(qw(git init));
672 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
673 system(qw(git read-tree --empty));
674 die "Cannot init an empty tree: $?\n" if $?;
675
676 $last_branch = $opt_o;
677 $orig_branch = "";
678} else {
679 open(F, "-|", qw(git symbolic-ref HEAD)) or
680 die "Cannot run git symbolic-ref: $!\n";
681 chomp ($last_branch = <F>);
682 $last_branch = basename($last_branch);
683 close(F);
684 unless ($last_branch) {
685 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
686 $last_branch = "master";
687 }
688 $orig_branch = $last_branch;
689 $tip_at_start = `git rev-parse --verify HEAD`;
690
691 # Get the last import timestamps
692 my $fmt = '($ref, $author) = (%(refname), %(author));';
693 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
694 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
695 while (defined(my $entry = <H>)) {
696 my ($ref, $author);
697 eval($entry) || die "cannot eval refs list: $@";
698 my ($head) = ($ref =~ m|^$remote/(.*)|);
699 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
700 $branch_date{$head} = $1;
701 }
702 close(H);
703 if (!exists $branch_date{$opt_o}) {
704 die "Branch '$opt_o' does not exist.\n".
705 "Either use the correct '-o branch' option,\n".
706 "or import to a new repository.\n";
707 }
708}
709
710-d $git_dir
711 or die "Could not create git subdir ($git_dir).\n";
712
713# now we read (and possibly save) author-info as well
714-f "$git_dir/cvs-authors" and
715 read_author_info("$git_dir/cvs-authors");
716if ($opt_A) {
717 read_author_info(munge_user_filename($opt_A));
718 write_author_info("$git_dir/cvs-authors");
719}
720
721# open .git/cvs-revisions, if requested
722open my $revision_map, '>>', "$git_dir/cvs-revisions"
723 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
724 if defined $opt_R;
725
726
727#
728# run cvsps into a file unless we are getting
729# it passed as a file via $opt_P
730#
731my $cvspsfile;
732unless ($opt_P) {
733 print "Running cvsps...\n" if $opt_v;
734 my $pid = open(CVSPS,"-|");
735 my $cvspsfh;
736 die "Cannot fork: $!\n" unless defined $pid;
737 unless ($pid) {
738 my @opt;
739 @opt = split(/,/,$opt_p) if defined $opt_p;
740 unshift @opt, '-z', $opt_z if defined $opt_z;
741 unshift @opt, '-q' unless defined $opt_v;
742 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
743 push @opt, '--cvs-direct';
744 }
745 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
746 die "Could not start cvsps: $!\n";
747 }
748 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
749 DIR => File::Spec->tmpdir());
750 while (<CVSPS>) {
751 print $cvspsfh $_;
752 }
753 close CVSPS;
754 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
755 close $cvspsfh;
756} else {
757 $cvspsfile = munge_user_filename($opt_P);
758}
759
760open(CVS, "<$cvspsfile") or die $!;
761
762## cvsps output:
763#---------------------
764#PatchSet 314
765#Date: 1999/09/18 13:03:59
766#Author: wkoch
767#Branch: STABLE-BRANCH-1-0
768#Ancestor branch: HEAD
769#Tag: (none)
770#Log:
771# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
772#Members:
773# README:1.57->1.57.2.1
774# VERSION:1.96->1.96.2.1
775#
776#---------------------
777
778my $state = 0;
779
780sub update_index (\@\@) {
781 my $old = shift;
782 my $new = shift;
783 open(my $fh, '|-', qw(git update-index -z --index-info))
784 or die "unable to open git update-index: $!";
785 print $fh
786 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
787 @$old),
788 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
789 @$new)
790 or die "unable to write to git update-index: $!";
791 close $fh
792 or die "unable to write to git update-index: $!";
793 $? and die "git update-index reported error: $?";
794}
795
796sub write_tree () {
797 open(my $fh, '-|', qw(git write-tree))
798 or die "unable to open git write-tree: $!";
799 chomp(my $tree = <$fh>);
800 is_sha1($tree)
801 or die "Cannot get tree id ($tree): $!";
802 close($fh)
803 or die "Error running git write-tree: $?\n";
804 print "Tree ID $tree\n" if $opt_v;
805 return $tree;
806}
807
808my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
809my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
810
811# commits that cvsps cannot place anywhere...
812$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
813
814sub commit {
815 if ($branch eq $opt_o && !$index{branch} &&
816 !get_headref("$remote/$branch")) {
817 # looks like an initial commit
818 # use the index primed by git init
819 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
820 $index{$branch} = "$git_dir/index";
821 } else {
822 # use an index per branch to speed up
823 # imports of projects with many branches
824 unless ($index{$branch}) {
825 $index{$branch} = tmpnam();
826 $ENV{GIT_INDEX_FILE} = $index{$branch};
827 if ($ancestor) {
828 system("git", "read-tree", "$remote/$ancestor");
829 } else {
830 system("git", "read-tree", "$remote/$branch");
831 }
832 die "read-tree failed: $?\n" if $?;
833 }
834 }
835 $ENV{GIT_INDEX_FILE} = $index{$branch};
836
837 update_index(@old, @new);
838 @old = @new = ();
839 my $tree = write_tree();
840 my $parent = get_headref("$remote/$last_branch");
841 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
842
843 my @commit_args;
844 push @commit_args, ("-p", $parent) if $parent;
845
846 # loose detection of merges
847 # based on the commit msg
848 foreach my $rx (@mergerx) {
849 next unless $logmsg =~ $rx && $1;
850 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
851 if (my $sha1 = get_headref("$remote/$mparent")) {
852 push @commit_args, '-p', "$remote/$mparent";
853 print "Merge parent branch: $mparent\n" if $opt_v;
854 }
855 }
856
857 $ENV{'TZ'}=$author_tz;
858 my $commit_date = strftime("%s %z", localtime($date));
859 $ENV{'TZ'}="UTC";
860 $ENV{GIT_AUTHOR_NAME} = $author_name;
861 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
862 $ENV{GIT_AUTHOR_DATE} = $commit_date;
863 $ENV{GIT_COMMITTER_NAME} = $author_name;
864 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
865 $ENV{GIT_COMMITTER_DATE} = $commit_date;
866 my $pid = open2(my $commit_read, my $commit_write,
867 'git', 'commit-tree', $tree, @commit_args);
868
869 # compatibility with git2cvs
870 substr($logmsg,32767) = "" if length($logmsg) > 32767;
871 $logmsg =~ s/[\s\n]+\z//;
872
873 if (@skipped) {
874 $logmsg .= "\n\n\nSKIPPED:\n\t";
875 $logmsg .= join("\n\t", @skipped) . "\n";
876 @skipped = ();
877 }
878
879 print($commit_write "$logmsg\n") && close($commit_write)
880 or die "Error writing to git commit-tree: $!\n";
881
882 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
883 chomp(my $cid = <$commit_read>);
884 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
885 print "Commit ID $cid\n" if $opt_v;
886 close($commit_read);
887
888 waitpid($pid,0);
889 die "Error running git commit-tree: $?\n" if $?;
890
891 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
892 or die "Cannot write branch $branch for update: $!\n";
893
894 if ($revision_map) {
895 print $revision_map "@$_ $cid\n" for @commit_revisions;
896 }
897 @commit_revisions = ();
898
899 if ($tag) {
900 my ($xtag) = $tag;
901 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
902 $xtag =~ tr/_/\./ if ( $opt_u );
903 $xtag =~ s/[\/]/$opt_s/g;
904
905 # See refs.c for these rules.
906 # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
907 $xtag =~ s/[ ~\^:\\\*\?\[]//g;
908 # Other bad strings for tags:
909 # (See check_refname_component in refs.c.)
910 1 while $xtag =~ s/
911 (?: \.\. # Tag cannot contain '..'.
912 | \@{ # Tag cannot contain '@{'.
913 | ^ - # Tag cannot begin with '-'.
914 | \.lock $ # Tag cannot end with '.lock'.
915 | ^ \. # Tag cannot begin...
916 | \. $ # ...or end with '.'
917 )//xg;
918 # Tag cannot be empty.
919 if ($xtag eq '') {
920 warn("warning: ignoring tag '$tag'",
921 " with invalid tagname\n");
922 return;
923 }
924
925 if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
926 # We did our best to sanitize the tag, but still failed
927 # for whatever reason. Bail out, and give the user
928 # enough information to understand if/how we should
929 # improve the translation in the future.
930 if ($tag ne $xtag) {
931 print "Translated '$tag' tag to '$xtag'\n";
932 }
933 die "Cannot create tag $xtag: $!\n";
934 }
935
936 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
937 }
938};
939
940my $commitcount = 1;
941while (<CVS>) {
942 chomp;
943 if ($state == 0 and /^-+$/) {
944 $state = 1;
945 } elsif ($state == 0) {
946 $state = 1;
947 redo;
948 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
949 $patchset = 0+$_;
950 $state=2;
951 } elsif ($state == 2 and s/^Date:\s+//) {
952 $date = pdate($_);
953 unless ($date) {
954 print STDERR "Could not parse date: $_\n";
955 $state=0;
956 next;
957 }
958 $state=3;
959 } elsif ($state == 3 and s/^Author:\s+//) {
960 $author_tz = "UTC";
961 s/\s+$//;
962 if (/^(.*?)\s+<(.*)>/) {
963 ($author_name, $author_email) = ($1, $2);
964 } elsif ($conv_author_name{$_}) {
965 $author_name = $conv_author_name{$_};
966 $author_email = $conv_author_email{$_};
967 $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
968 } else {
969 $author_name = $author_email = $_;
970 }
971 $state = 4;
972 } elsif ($state == 4 and s/^Branch:\s+//) {
973 s/\s+$//;
974 tr/_/\./ if ( $opt_u );
975 s/[\/]/$opt_s/g;
976 $branch = $_;
977 $state = 5;
978 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
979 s/\s+$//;
980 $ancestor = $_;
981 $ancestor = $opt_o if $ancestor eq "HEAD";
982 $state = 6;
983 } elsif ($state == 5) {
984 $ancestor = undef;
985 $state = 6;
986 redo;
987 } elsif ($state == 6 and s/^Tag:\s+//) {
988 s/\s+$//;
989 if ($_ eq "(none)") {
990 $tag = undef;
991 } else {
992 $tag = $_;
993 }
994 $state = 7;
995 } elsif ($state == 7 and /^Log:/) {
996 $logmsg = "";
997 $state = 8;
998 } elsif ($state == 8 and /^Members:/) {
999 $branch = $opt_o if $branch eq "HEAD";
1000 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1001 # skip
1002 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1003 $state = 11;
1004 next;
1005 }
1006 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1007 # skip if the commit is too recent
1008 # given that the cvsps default fuzz is 300s, we give ourselves another
1009 # 300s just in case -- this also prevents skipping commits
1010 # due to server clock drift
1011 print "skip patchset $patchset: $date too recent\n" if $opt_v;
1012 $state = 11;
1013 next;
1014 }
1015 if (exists $ignorebranch{$branch}) {
1016 print STDERR "Skipping $branch\n";
1017 $state = 11;
1018 next;
1019 }
1020 if ($ancestor) {
1021 if ($ancestor eq $branch) {
1022 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1023 $ancestor = $opt_o;
1024 }
1025 if (defined get_headref("$remote/$branch")) {
1026 print STDERR "Branch $branch already exists!\n";
1027 $state=11;
1028 next;
1029 }
1030 my $id = get_headref("$remote/$ancestor");
1031 if (!$id) {
1032 print STDERR "Branch $ancestor does not exist!\n";
1033 $ignorebranch{$branch} = 1;
1034 $state=11;
1035 next;
1036 }
1037
1038 system(qw(git update-ref -m cvsimport),
1039 "$remote/$branch", $id);
1040 if($? != 0) {
1041 print STDERR "Could not create branch $branch\n";
1042 $ignorebranch{$branch} = 1;
1043 $state=11;
1044 next;
1045 }
1046 }
1047 $last_branch = $branch if $branch ne $last_branch;
1048 $state = 9;
1049 } elsif ($state == 8) {
1050 $logmsg .= "$_\n";
1051 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1052# VERSION:1.96->1.96.2.1
1053 my $init = ($2 eq "INITIAL");
1054 my $fn = $1;
1055 my $rev = $3;
1056 $fn =~ s#^/+##;
1057 if ($opt_S && $fn =~ m/$opt_S/) {
1058 print "SKIPPING $fn v $rev\n";
1059 push(@skipped, $fn);
1060 next;
1061 }
1062 push @commit_revisions, [$fn, $rev];
1063 print "Fetching $fn v $rev\n" if $opt_v;
1064 my ($tmpname, $size) = $cvs->file($fn,$rev);
1065 if ($size == -1) {
1066 push(@old,$fn);
1067 print "Drop $fn\n" if $opt_v;
1068 } else {
1069 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1070 my $pid = open(my $F, '-|');
1071 die $! unless defined $pid;
1072 if (!$pid) {
1073 exec("git", "hash-object", "-w", $tmpname)
1074 or die "Cannot create object: $!\n";
1075 }
1076 my $sha = <$F>;
1077 chomp $sha;
1078 close $F;
1079 my $mode = pmode($cvs->{'mode'});
1080 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1081 }
1082 unlink($tmpname);
1083 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1084 my $fn = $1;
1085 my $rev = $2;
1086 $fn =~ s#^/+##;
1087 push @commit_revisions, [$fn, $rev];
1088 push(@old,$fn);
1089 print "Delete $fn\n" if $opt_v;
1090 } elsif ($state == 9 and /^\s*$/) {
1091 $state = 10;
1092 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1093 $commitcount++;
1094 if ($opt_L && $commitcount > $opt_L) {
1095 last;
1096 }
1097 commit();
1098 if (($commitcount & 1023) == 0) {
1099 system(qw(git repack -a -d));
1100 }
1101 $state = 1;
1102 } elsif ($state == 11 and /^-+$/) {
1103 $state = 1;
1104 } elsif (/^-+$/) { # end of unknown-line processing
1105 $state = 1;
1106 } elsif ($state != 11) { # ignore stuff when skipping
1107 print STDERR "* UNKNOWN LINE * $_\n";
1108 }
1109}
1110commit() if $branch and $state != 11;
1111
1112unless ($opt_P) {
1113 unlink($cvspsfile);
1114}
1115
1116# The heuristic of repacking every 1024 commits can leave a
1117# lot of unpacked data. If there is more than 1MB worth of
1118# not-packed objects, repack once more.
1119my $line = `git count-objects`;
1120if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1121 my ($n_objects, $kb) = ($1, $2);
1122 1024 < $kb
1123 and system(qw(git repack -a -d));
1124}
1125
1126foreach my $git_index (values %index) {
1127 if ($git_index ne "$git_dir/index") {
1128 unlink($git_index);
1129 }
1130}
1131
1132if (defined $orig_git_index) {
1133 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1134} else {
1135 delete $ENV{GIT_INDEX_FILE};
1136}
1137
1138# Now switch back to the branch we were in before all of this happened
1139if ($orig_branch) {
1140 print "DONE.\n" if $opt_v;
1141 if ($opt_i) {
1142 exit 0;
1143 }
1144 my $tip_at_end = `git rev-parse --verify HEAD`;
1145 if ($tip_at_start ne $tip_at_end) {
1146 for ($tip_at_start, $tip_at_end) { chomp; }
1147 print "Fetched into the current branch.\n" if $opt_v;
1148 system(qw(git read-tree -u -m),
1149 $tip_at_start, $tip_at_end);
1150 die "Fast-forward update failed: $?\n" if $?;
1151 }
1152 else {
1153 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1154 die "Could not merge $opt_o into the current branch.\n" if $?;
1155 }
1156} else {
1157 $orig_branch = "master";
1158 print "DONE; creating $orig_branch branch\n" if $opt_v;
1159 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1160 unless defined get_headref('refs/heads/master');
1161 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1162 if ($opt_r && $opt_o ne 'HEAD');
1163 system('git', 'update-ref', 'HEAD', "$orig_branch");
1164 unless ($opt_i) {
1165 system(qw(git checkout -f));
1166 die "checkout failed: $?\n" if $?;
1167 }
1168}