1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime tzset dup2 ENOENT);
28use IPC::Open2;
29use Git qw(get_tz_offset);
30
31$SIG{'PIPE'}="IGNORE";
32set_timezone('UTC');
33
34our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
35my (%conv_author_name, %conv_author_email, %conv_author_tz);
36
37sub usage(;$) {
38 my $msg = shift;
39 print(STDERR "Error: $msg\n") if $msg;
40 print STDERR <<END;
41usage: git cvsimport # fetch/update GIT from CVS
42 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
43 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
44 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
45 [-r remote] [-R] [CVS_module]
46END
47 exit(1);
48}
49
50sub read_author_info($) {
51 my ($file) = @_;
52 my $user;
53 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54
55 while (<$f>) {
56 # Expected format is this:
57 # exon=Andreas Ericsson <ae@op5.se>
58 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
59 $user = $1;
60 $conv_author_name{$user} = $2;
61 $conv_author_email{$user} = $3;
62 }
63 # or with an optional timezone:
64 # spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
65 elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
66 $user = $1;
67 $conv_author_name{$user} = $2;
68 $conv_author_email{$user} = $3;
69 $conv_author_tz{$user} = $4;
70 }
71 # However, we also read from CVSROOT/users format
72 # to ease migration.
73 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
74 my $mapped;
75 ($user, $mapped) = ($1, $3);
76 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
77 $conv_author_name{$user} = $1;
78 $conv_author_email{$user} = $2;
79 }
80 elsif ($mapped =~ /^<?(.*)>?$/) {
81 $conv_author_name{$user} = $user;
82 $conv_author_email{$user} = $1;
83 }
84 }
85 # NEEDSWORK: Maybe warn on unrecognized lines?
86 }
87 close ($f);
88}
89
90sub write_author_info($) {
91 my ($file) = @_;
92 open my $f, '>', $file or
93 die("Failed to open $file for writing: $!");
94
95 foreach (keys %conv_author_name) {
96 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
97 print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
98 print $f "\n";
99 }
100 close ($f);
101}
102
103# Versions of perl before 5.10.0 may not automatically check $TZ each
104# time localtime is run (most platforms will do so only the first time).
105# We can work around this by using tzset() to update the internal
106# variable whenever we change the environment.
107sub set_timezone {
108 $ENV{TZ} = shift;
109 tzset();
110}
111
112# convert getopts specs for use by git config
113my %longmap = (
114 'A:' => 'authors-file',
115 'M:' => 'merge-regex',
116 'P:' => undef,
117 'R' => 'track-revisions',
118 'S:' => 'ignore-paths',
119);
120
121sub read_repo_config {
122 # Split the string between characters, unless there is a ':'
123 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
124 my @opts = split(/ *(?!:)/, shift);
125 foreach my $o (@opts) {
126 my $key = $o;
127 $key =~ s/://g;
128 my $arg = 'git config';
129 $arg .= ' --bool' if ($o !~ /:$/);
130 my $ckey = $key;
131
132 if (exists $longmap{$o}) {
133 # An uppercase option like -R cannot be
134 # expressed in the configuration, as the
135 # variable names are downcased.
136 $ckey = $longmap{$o};
137 next if (! defined $ckey);
138 $ckey =~ s/-//g;
139 }
140 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
141 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
142 no strict 'refs';
143 my $opt_name = "opt_" . $key;
144 if (!$$opt_name) {
145 $$opt_name = $tmp;
146 }
147 }
148 }
149}
150
151my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
152read_repo_config($opts);
153Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
154
155# turn the Getopt::Std specification in a Getopt::Long one,
156# with support for multiple -M options
157GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
158 or usage();
159usage if $opt_h;
160
161if (@ARGV == 0) {
162 chomp(my $module = `git config --get cvsimport.module`);
163 push(@ARGV, $module) if $? == 0;
164}
165@ARGV <= 1 or usage("You can't specify more than one CVS module");
166
167if ($opt_d) {
168 $ENV{"CVSROOT"} = $opt_d;
169} elsif (-f 'CVS/Root') {
170 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
171 $opt_d = <$f>;
172 chomp $opt_d;
173 close $f;
174 $ENV{"CVSROOT"} = $opt_d;
175} elsif ($ENV{"CVSROOT"}) {
176 $opt_d = $ENV{"CVSROOT"};
177} else {
178 usage("CVSROOT needs to be set");
179}
180$opt_s ||= "-";
181$opt_a ||= 0;
182
183my $git_tree = $opt_C;
184$git_tree ||= ".";
185
186my $remote;
187if (defined $opt_r) {
188 $remote = 'refs/remotes/' . $opt_r;
189 $opt_o ||= "master";
190} else {
191 $opt_o ||= "origin";
192 $remote = 'refs/heads';
193}
194
195my $cvs_tree;
196if ($#ARGV == 0) {
197 $cvs_tree = $ARGV[0];
198} elsif (-f 'CVS/Repository') {
199 open my $f, '<', 'CVS/Repository' or
200 die 'Failed to open CVS/Repository';
201 $cvs_tree = <$f>;
202 chomp $cvs_tree;
203 close $f;
204} else {
205 usage("CVS module has to be specified");
206}
207
208our @mergerx = ();
209if ($opt_m) {
210 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
211}
212if (@opt_M) {
213 push (@mergerx, map { qr/$_/ } @opt_M);
214}
215
216# Remember UTC of our starting time
217# we'll want to avoid importing commits
218# that are too recent
219our $starttime = time();
220
221select(STDERR); $|=1; select(STDOUT);
222
223
224package CVSconn;
225# Basic CVS dialog.
226# We're only interested in connecting and downloading, so ...
227
228use File::Spec;
229use File::Temp qw(tempfile);
230use POSIX qw(strftime dup2);
231
232sub new {
233 my ($what,$repo,$subdir) = @_;
234 $what=ref($what) if ref($what);
235
236 my $self = {};
237 $self->{'buffer'} = "";
238 bless($self,$what);
239
240 $repo =~ s#/+$##;
241 $self->{'fullrep'} = $repo;
242 $self->conn();
243
244 $self->{'subdir'} = $subdir;
245 $self->{'lines'} = undef;
246
247 return $self;
248}
249
250sub find_password_entry {
251 my ($cvspass, @cvsroot) = @_;
252 my ($file, $delim) = @$cvspass;
253 my $pass;
254 local ($_);
255
256 if (open(my $fh, $file)) {
257 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
258 CVSPASSFILE:
259 while (<$fh>) {
260 chomp;
261 s/^\/\d+\s+//;
262 my ($w, $p) = split($delim,$_,2);
263 for my $cvsroot (@cvsroot) {
264 if ($w eq $cvsroot) {
265 $pass = $p;
266 last CVSPASSFILE;
267 }
268 }
269 }
270 close($fh);
271 }
272 return $pass;
273}
274
275sub conn {
276 my $self = shift;
277 my $repo = $self->{'fullrep'};
278 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
279 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
280
281 my ($proxyhost,$proxyport);
282 if ($param && ($param =~ m/proxy=([^;]+)/)) {
283 $proxyhost = $1;
284 # Default proxyport, if not specified, is 8080.
285 $proxyport = 8080;
286 if ($ENV{"CVS_PROXY_PORT"}) {
287 $proxyport = $ENV{"CVS_PROXY_PORT"};
288 }
289 if ($param =~ m/proxyport=([^;]+)/) {
290 $proxyport = $1;
291 }
292 }
293 $repo ||= '/';
294
295 # if username is not explicit in CVSROOT, then use current user, as cvs would
296 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
297 my $rr2 = "-";
298 unless ($port) {
299 $rr2 = ":pserver:$user\@$serv:$repo";
300 $port=2401;
301 }
302 my $rr = ":pserver:$user\@$serv:$port$repo";
303
304 if ($pass) {
305 $pass = $self->_scramble($pass);
306 } else {
307 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
308 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
309 my @loc = ();
310 foreach my $cvspass (@cvspass) {
311 my $p = find_password_entry($cvspass, $rr, $rr2);
312 if ($p) {
313 push @loc, $cvspass->[0];
314 $pass = $p;
315 }
316 }
317
318 if (1 < @loc) {
319 die("Multiple cvs password files have ".
320 "entries for CVSROOT $opt_d: @loc");
321 } elsif (!$pass) {
322 $pass = "A";
323 }
324 }
325
326 my ($s, $rep);
327 if ($proxyhost) {
328
329 # Use a HTTP Proxy. Only works for HTTP proxies that
330 # don't require user authentication
331 #
332 # See: http://www.ietf.org/rfc/rfc2817.txt
333
334 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
335 die "Socket to $proxyhost: $!\n" unless defined $s;
336 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
337 or die "Write to $proxyhost: $!\n";
338 $s->flush();
339
340 $rep = <$s>;
341
342 # The answer should look like 'HTTP/1.x 2yy ....'
343 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
344 die "Proxy connect: $rep\n";
345 }
346 # Skip up to the empty line of the proxy server output
347 # including the response headers.
348 while ($rep = <$s>) {
349 last if (!defined $rep ||
350 $rep eq "\n" ||
351 $rep eq "\r\n");
352 }
353 } else {
354 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
355 die "Socket to $serv: $!\n" unless defined $s;
356 }
357
358 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
359 or die "Write to $serv: $!\n";
360 $s->flush();
361
362 $rep = <$s>;
363
364 if ($rep ne "I LOVE YOU\n") {
365 $rep="<unknown>" unless $rep;
366 die "AuthReply: $rep\n";
367 }
368 $self->{'socketo'} = $s;
369 $self->{'socketi'} = $s;
370 } else { # local or ext: Fork off our own cvs server.
371 my $pr = IO::Pipe->new();
372 my $pw = IO::Pipe->new();
373 my $pid = fork();
374 die "Fork: $!\n" unless defined $pid;
375 my $cvs = 'cvs';
376 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
377 my $rsh = 'rsh';
378 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
379
380 my @cvs = ($cvs, 'server');
381 my ($local, $user, $host);
382 $local = $repo =~ s/:local://;
383 if (!$local) {
384 $repo =~ s/:ext://;
385 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
386 ($user, $host) = ($1, $2);
387 }
388 if (!$local) {
389 if ($user) {
390 unshift @cvs, $rsh, '-l', $user, $host;
391 } else {
392 unshift @cvs, $rsh, $host;
393 }
394 }
395
396 unless ($pid) {
397 $pr->writer();
398 $pw->reader();
399 dup2($pw->fileno(),0);
400 dup2($pr->fileno(),1);
401 $pr->close();
402 $pw->close();
403 exec(@cvs);
404 }
405 $pw->writer();
406 $pr->reader();
407 $self->{'socketo'} = $pw;
408 $self->{'socketi'} = $pr;
409 }
410 $self->{'socketo'}->write("Root $repo\n");
411
412 # Trial and error says that this probably is the minimum set
413 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
414
415 $self->{'socketo'}->write("valid-requests\n");
416 $self->{'socketo'}->flush();
417
418 my $rep=$self->readline();
419 die "Failed to read from server" unless defined $rep;
420 chomp($rep);
421 if ($rep !~ s/^Valid-requests\s*//) {
422 $rep="<unknown>" unless $rep;
423 die "Expected Valid-requests from server, but got: $rep\n";
424 }
425 chomp(my $res=$self->readline());
426 die "validReply: $res\n" if $res ne "ok";
427
428 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
429 $self->{'repo'} = $repo;
430}
431
432sub readline {
433 my ($self) = @_;
434 return $self->{'socketi'}->getline();
435}
436
437sub _file {
438 # Request a file with a given revision.
439 # Trial and error says this is a good way to do it. :-/
440 my ($self,$fn,$rev) = @_;
441 $self->{'socketo'}->write("Argument -N\n") or return undef;
442 $self->{'socketo'}->write("Argument -P\n") or return undef;
443 # -kk: Linus' version doesn't use it - defaults to off
444 if ($opt_k) {
445 $self->{'socketo'}->write("Argument -kk\n") or return undef;
446 }
447 $self->{'socketo'}->write("Argument -r\n") or return undef;
448 $self->{'socketo'}->write("Argument $rev\n") or return undef;
449 $self->{'socketo'}->write("Argument --\n") or return undef;
450 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
451 $self->{'socketo'}->write("Directory .\n") or return undef;
452 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
453 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
454 $self->{'socketo'}->write("co\n") or return undef;
455 $self->{'socketo'}->flush() or return undef;
456 $self->{'lines'} = 0;
457 return 1;
458}
459sub _line {
460 # Read a line from the server.
461 # ... except that 'line' may be an entire file. ;-)
462 my ($self, $fh) = @_;
463 die "Not in lines" unless defined $self->{'lines'};
464
465 my $line;
466 my $res=0;
467 while (defined($line = $self->readline())) {
468 # M U gnupg-cvs-rep/AUTHORS
469 # Updated gnupg-cvs-rep/
470 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
471 # /AUTHORS/1.1///T1.1
472 # u=rw,g=rw,o=rw
473 # 0
474 # ok
475
476 if ($line =~ s/^(?:Created|Updated) //) {
477 $line = $self->readline(); # path
478 $line = $self->readline(); # Entries line
479 my $mode = $self->readline(); chomp $mode;
480 $self->{'mode'} = $mode;
481 defined (my $cnt = $self->readline())
482 or die "EOF from server after 'Changed'\n";
483 chomp $cnt;
484 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
485 $line="";
486 $res = $self->_fetchfile($fh, $cnt);
487 } elsif ($line =~ s/^ //) {
488 print $fh $line;
489 $res += length($line);
490 } elsif ($line =~ /^M\b/) {
491 # output, do nothing
492 } elsif ($line =~ /^Mbinary\b/) {
493 my $cnt;
494 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
495 chomp $cnt;
496 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
497 $line="";
498 $res += $self->_fetchfile($fh, $cnt);
499 } else {
500 chomp $line;
501 if ($line eq "ok") {
502 # print STDERR "S: ok (".length($res).")\n";
503 return $res;
504 } elsif ($line =~ s/^E //) {
505 # print STDERR "S: $line\n";
506 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
507 $line = $self->readline(); # filename
508 $line = $self->readline(); # OK
509 chomp $line;
510 die "Unknown: $line" if $line ne "ok";
511 return -1;
512 } else {
513 die "Unknown: $line\n";
514 }
515 }
516 }
517 return undef;
518}
519sub file {
520 my ($self,$fn,$rev) = @_;
521 my $res;
522
523 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
524 DIR => File::Spec->tmpdir(), UNLINK => 1);
525
526 $self->_file($fn,$rev) and $res = $self->_line($fh);
527
528 if (!defined $res) {
529 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
530 truncate $fh, 0;
531 $self->conn();
532 $self->_file($fn,$rev) or die "No file command send";
533 $res = $self->_line($fh);
534 die "Retry failed" unless defined $res;
535 }
536 close ($fh);
537
538 return ($name, $res);
539}
540sub _fetchfile {
541 my ($self, $fh, $cnt) = @_;
542 my $res = 0;
543 my $bufsize = 1024 * 1024;
544 while ($cnt) {
545 if ($bufsize > $cnt) {
546 $bufsize = $cnt;
547 }
548 my $buf;
549 my $num = $self->{'socketi'}->read($buf,$bufsize);
550 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
551 print $fh $buf;
552 $res += $num;
553 $cnt -= $num;
554 }
555 return $res;
556}
557
558sub _scramble {
559 my ($self, $pass) = @_;
560 my $scrambled = "A";
561
562 return $scrambled unless $pass;
563
564 my $pass_len = length($pass);
565 my @pass_arr = split("", $pass);
566 my $i;
567
568 # from cvs/src/scramble.c
569 my @shifts = (
570 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
571 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
572 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
573 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
574 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
575 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
576 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
577 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
578 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
579 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
580 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
581 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
582 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
583 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
584 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
585 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
586 );
587
588 for ($i = 0; $i < $pass_len; $i++) {
589 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
590 }
591
592 return $scrambled;
593}
594
595package main;
596
597my $cvs = CVSconn->new($opt_d, $cvs_tree);
598
599
600sub pdate($) {
601 my ($d) = @_;
602 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
603 or die "Unparseable date: $d\n";
604 my $y=$1; $y-=1900 if $y>1900;
605 return timegm($6||0,$5,$4,$3,$2-1,$y);
606}
607
608sub pmode($) {
609 my ($mode) = @_;
610 my $m = 0;
611 my $mm = 0;
612 my $um = 0;
613 for my $x(split(//,$mode)) {
614 if ($x eq ",") {
615 $m |= $mm&$um;
616 $mm = 0;
617 $um = 0;
618 } elsif ($x eq "u") { $um |= 0700;
619 } elsif ($x eq "g") { $um |= 0070;
620 } elsif ($x eq "o") { $um |= 0007;
621 } elsif ($x eq "r") { $mm |= 0444;
622 } elsif ($x eq "w") { $mm |= 0222;
623 } elsif ($x eq "x") { $mm |= 0111;
624 } elsif ($x eq "=") { # do nothing
625 } else { die "Unknown mode: $mode\n";
626 }
627 }
628 $m |= $mm&$um;
629 return $m;
630}
631
632sub getwd() {
633 my $pwd = `pwd`;
634 chomp $pwd;
635 return $pwd;
636}
637
638sub is_sha1 {
639 my $s = shift;
640 return $s =~ /^[a-f0-9]{40}$/;
641}
642
643sub get_headref ($) {
644 my $name = shift;
645 $name =~ s/'/'\\''/;
646 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
647 return undef unless $? == 0;
648 chomp $r;
649 return $r;
650}
651
652my $user_filename_prepend = '';
653sub munge_user_filename {
654 my $name = shift;
655 return File::Spec->file_name_is_absolute($name) ?
656 $name :
657 $user_filename_prepend . $name;
658}
659
660-d $git_tree
661 or mkdir($git_tree,0777)
662 or die "Could not create $git_tree: $!";
663if ($git_tree ne '.') {
664 $user_filename_prepend = getwd() . '/';
665 chdir($git_tree);
666}
667
668my $last_branch = "";
669my $orig_branch = "";
670my %branch_date;
671my $tip_at_start = undef;
672
673my $git_dir = $ENV{"GIT_DIR"} || ".git";
674$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
675$ENV{"GIT_DIR"} = $git_dir;
676my $orig_git_index;
677$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
678
679my %index; # holds filenames of one index per branch
680
681unless (-d $git_dir) {
682 system(qw(git init));
683 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
684 system(qw(git read-tree --empty));
685 die "Cannot init an empty tree: $?\n" if $?;
686
687 $last_branch = $opt_o;
688 $orig_branch = "";
689} else {
690 open(F, "-|", qw(git symbolic-ref HEAD)) or
691 die "Cannot run git symbolic-ref: $!\n";
692 chomp ($last_branch = <F>);
693 $last_branch = basename($last_branch);
694 close(F);
695 unless ($last_branch) {
696 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
697 $last_branch = "master";
698 }
699 $orig_branch = $last_branch;
700 $tip_at_start = `git rev-parse --verify HEAD`;
701
702 # Get the last import timestamps
703 my $fmt = '($ref, $author) = (%(refname), %(author));';
704 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
705 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
706 while (defined(my $entry = <H>)) {
707 my ($ref, $author);
708 eval($entry) || die "cannot eval refs list: $@";
709 my ($head) = ($ref =~ m|^$remote/(.*)|);
710 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
711 $branch_date{$head} = $1;
712 }
713 close(H);
714 if (!exists $branch_date{$opt_o}) {
715 die "Branch '$opt_o' does not exist.\n".
716 "Either use the correct '-o branch' option,\n".
717 "or import to a new repository.\n";
718 }
719}
720
721-d $git_dir
722 or die "Could not create git subdir ($git_dir).\n";
723
724# now we read (and possibly save) author-info as well
725-f "$git_dir/cvs-authors" and
726 read_author_info("$git_dir/cvs-authors");
727if ($opt_A) {
728 read_author_info(munge_user_filename($opt_A));
729 write_author_info("$git_dir/cvs-authors");
730}
731
732# open .git/cvs-revisions, if requested
733open my $revision_map, '>>', "$git_dir/cvs-revisions"
734 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
735 if defined $opt_R;
736
737
738#
739# run cvsps into a file unless we are getting
740# it passed as a file via $opt_P
741#
742my $cvspsfile;
743unless ($opt_P) {
744 print "Running cvsps...\n" if $opt_v;
745 my $pid = open(CVSPS,"-|");
746 my $cvspsfh;
747 die "Cannot fork: $!\n" unless defined $pid;
748 unless ($pid) {
749 my @opt;
750 @opt = split(/,/,$opt_p) if defined $opt_p;
751 unshift @opt, '-z', $opt_z if defined $opt_z;
752 unshift @opt, '-q' unless defined $opt_v;
753 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
754 push @opt, '--cvs-direct';
755 }
756 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
757 die "Could not start cvsps: $!\n";
758 }
759 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
760 DIR => File::Spec->tmpdir());
761 while (<CVSPS>) {
762 print $cvspsfh $_;
763 }
764 close CVSPS;
765 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
766 close $cvspsfh;
767} else {
768 $cvspsfile = munge_user_filename($opt_P);
769}
770
771open(CVS, "<$cvspsfile") or die $!;
772
773## cvsps output:
774#---------------------
775#PatchSet 314
776#Date: 1999/09/18 13:03:59
777#Author: wkoch
778#Branch: STABLE-BRANCH-1-0
779#Ancestor branch: HEAD
780#Tag: (none)
781#Log:
782# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
783#Members:
784# README:1.57->1.57.2.1
785# VERSION:1.96->1.96.2.1
786#
787#---------------------
788
789my $state = 0;
790
791sub update_index (\@\@) {
792 my $old = shift;
793 my $new = shift;
794 open(my $fh, '|-', qw(git update-index -z --index-info))
795 or die "unable to open git update-index: $!";
796 print $fh
797 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
798 @$old),
799 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
800 @$new)
801 or die "unable to write to git update-index: $!";
802 close $fh
803 or die "unable to write to git update-index: $!";
804 $? and die "git update-index reported error: $?";
805}
806
807sub write_tree () {
808 open(my $fh, '-|', qw(git write-tree))
809 or die "unable to open git write-tree: $!";
810 chomp(my $tree = <$fh>);
811 is_sha1($tree)
812 or die "Cannot get tree id ($tree): $!";
813 close($fh)
814 or die "Error running git write-tree: $?\n";
815 print "Tree ID $tree\n" if $opt_v;
816 return $tree;
817}
818
819my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
820my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
821
822# commits that cvsps cannot place anywhere...
823$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
824
825sub commit {
826 if ($branch eq $opt_o && !$index{branch} &&
827 !get_headref("$remote/$branch")) {
828 # looks like an initial commit
829 # use the index primed by git init
830 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
831 $index{$branch} = "$git_dir/index";
832 } else {
833 # use an index per branch to speed up
834 # imports of projects with many branches
835 unless ($index{$branch}) {
836 $index{$branch} = tmpnam();
837 $ENV{GIT_INDEX_FILE} = $index{$branch};
838 if ($ancestor) {
839 system("git", "read-tree", "$remote/$ancestor");
840 } else {
841 system("git", "read-tree", "$remote/$branch");
842 }
843 die "read-tree failed: $?\n" if $?;
844 }
845 }
846 $ENV{GIT_INDEX_FILE} = $index{$branch};
847
848 update_index(@old, @new);
849 @old = @new = ();
850 my $tree = write_tree();
851 my $parent = get_headref("$remote/$last_branch");
852 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
853
854 my @commit_args;
855 push @commit_args, ("-p", $parent) if $parent;
856
857 # loose detection of merges
858 # based on the commit msg
859 foreach my $rx (@mergerx) {
860 next unless $logmsg =~ $rx && $1;
861 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
862 if (my $sha1 = get_headref("$remote/$mparent")) {
863 push @commit_args, '-p', "$remote/$mparent";
864 print "Merge parent branch: $mparent\n" if $opt_v;
865 }
866 }
867
868 set_timezone($author_tz);
869 # $date is in the seconds since epoch format
870 my $tz_offset = get_tz_offset($date);
871 my $commit_date = "$date $tz_offset";
872 set_timezone('UTC');
873 $ENV{GIT_AUTHOR_NAME} = $author_name;
874 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
875 $ENV{GIT_AUTHOR_DATE} = $commit_date;
876 $ENV{GIT_COMMITTER_NAME} = $author_name;
877 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
878 $ENV{GIT_COMMITTER_DATE} = $commit_date;
879 my $pid = open2(my $commit_read, my $commit_write,
880 'git', 'commit-tree', $tree, @commit_args);
881
882 # compatibility with git2cvs
883 substr($logmsg,32767) = "" if length($logmsg) > 32767;
884 $logmsg =~ s/[\s\n]+\z//;
885
886 if (@skipped) {
887 $logmsg .= "\n\n\nSKIPPED:\n\t";
888 $logmsg .= join("\n\t", @skipped) . "\n";
889 @skipped = ();
890 }
891
892 print($commit_write "$logmsg\n") && close($commit_write)
893 or die "Error writing to git commit-tree: $!\n";
894
895 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
896 chomp(my $cid = <$commit_read>);
897 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
898 print "Commit ID $cid\n" if $opt_v;
899 close($commit_read);
900
901 waitpid($pid,0);
902 die "Error running git commit-tree: $?\n" if $?;
903
904 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
905 or die "Cannot write branch $branch for update: $!\n";
906
907 if ($revision_map) {
908 print $revision_map "@$_ $cid\n" for @commit_revisions;
909 }
910 @commit_revisions = ();
911
912 if ($tag) {
913 my ($xtag) = $tag;
914 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
915 $xtag =~ tr/_/\./ if ( $opt_u );
916 $xtag =~ s/[\/]/$opt_s/g;
917
918 # See refs.c for these rules.
919 # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
920 $xtag =~ s/[ ~\^:\\\*\?\[]//g;
921 # Other bad strings for tags:
922 # (See check_refname_component in refs.c.)
923 1 while $xtag =~ s/
924 (?: \.\. # Tag cannot contain '..'.
925 | \@\{ # Tag cannot contain '@{'.
926 | ^ - # Tag cannot begin with '-'.
927 | \.lock $ # Tag cannot end with '.lock'.
928 | ^ \. # Tag cannot begin...
929 | \. $ # ...or end with '.'
930 )//xg;
931 # Tag cannot be empty.
932 if ($xtag eq '') {
933 warn("warning: ignoring tag '$tag'",
934 " with invalid tagname\n");
935 return;
936 }
937
938 if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
939 # We did our best to sanitize the tag, but still failed
940 # for whatever reason. Bail out, and give the user
941 # enough information to understand if/how we should
942 # improve the translation in the future.
943 if ($tag ne $xtag) {
944 print "Translated '$tag' tag to '$xtag'\n";
945 }
946 die "Cannot create tag $xtag: $!\n";
947 }
948
949 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
950 }
951};
952
953my $commitcount = 1;
954while (<CVS>) {
955 chomp;
956 if ($state == 0 and /^-+$/) {
957 $state = 1;
958 } elsif ($state == 0) {
959 $state = 1;
960 redo;
961 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
962 $patchset = 0+$_;
963 $state=2;
964 } elsif ($state == 2 and s/^Date:\s+//) {
965 $date = pdate($_);
966 unless ($date) {
967 print STDERR "Could not parse date: $_\n";
968 $state=0;
969 next;
970 }
971 $state=3;
972 } elsif ($state == 3 and s/^Author:\s+//) {
973 $author_tz = "UTC";
974 s/\s+$//;
975 if (/^(.*?)\s+<(.*)>/) {
976 ($author_name, $author_email) = ($1, $2);
977 } elsif ($conv_author_name{$_}) {
978 $author_name = $conv_author_name{$_};
979 $author_email = $conv_author_email{$_};
980 $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
981 } else {
982 $author_name = $author_email = $_;
983 }
984 $state = 4;
985 } elsif ($state == 4 and s/^Branch:\s+//) {
986 s/\s+$//;
987 tr/_/\./ if ( $opt_u );
988 s/[\/]/$opt_s/g;
989 $branch = $_;
990 $state = 5;
991 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
992 s/\s+$//;
993 $ancestor = $_;
994 $ancestor = $opt_o if $ancestor eq "HEAD";
995 $state = 6;
996 } elsif ($state == 5) {
997 $ancestor = undef;
998 $state = 6;
999 redo;
1000 } elsif ($state == 6 and s/^Tag:\s+//) {
1001 s/\s+$//;
1002 if ($_ eq "(none)") {
1003 $tag = undef;
1004 } else {
1005 $tag = $_;
1006 }
1007 $state = 7;
1008 } elsif ($state == 7 and /^Log:/) {
1009 $logmsg = "";
1010 $state = 8;
1011 } elsif ($state == 8 and /^Members:/) {
1012 $branch = $opt_o if $branch eq "HEAD";
1013 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1014 # skip
1015 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1016 $state = 11;
1017 next;
1018 }
1019 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1020 # skip if the commit is too recent
1021 # given that the cvsps default fuzz is 300s, we give ourselves another
1022 # 300s just in case -- this also prevents skipping commits
1023 # due to server clock drift
1024 print "skip patchset $patchset: $date too recent\n" if $opt_v;
1025 $state = 11;
1026 next;
1027 }
1028 if (exists $ignorebranch{$branch}) {
1029 print STDERR "Skipping $branch\n";
1030 $state = 11;
1031 next;
1032 }
1033 if ($ancestor) {
1034 if ($ancestor eq $branch) {
1035 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1036 $ancestor = $opt_o;
1037 }
1038 if (defined get_headref("$remote/$branch")) {
1039 print STDERR "Branch $branch already exists!\n";
1040 $state=11;
1041 next;
1042 }
1043 my $id = get_headref("$remote/$ancestor");
1044 if (!$id) {
1045 print STDERR "Branch $ancestor does not exist!\n";
1046 $ignorebranch{$branch} = 1;
1047 $state=11;
1048 next;
1049 }
1050
1051 system(qw(git update-ref -m cvsimport),
1052 "$remote/$branch", $id);
1053 if($? != 0) {
1054 print STDERR "Could not create branch $branch\n";
1055 $ignorebranch{$branch} = 1;
1056 $state=11;
1057 next;
1058 }
1059 }
1060 $last_branch = $branch if $branch ne $last_branch;
1061 $state = 9;
1062 } elsif ($state == 8) {
1063 $logmsg .= "$_\n";
1064 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1065# VERSION:1.96->1.96.2.1
1066 my $init = ($2 eq "INITIAL");
1067 my $fn = $1;
1068 my $rev = $3;
1069 $fn =~ s#^/+##;
1070 if ($opt_S && $fn =~ m/$opt_S/) {
1071 print "SKIPPING $fn v $rev\n";
1072 push(@skipped, $fn);
1073 next;
1074 }
1075 push @commit_revisions, [$fn, $rev];
1076 print "Fetching $fn v $rev\n" if $opt_v;
1077 my ($tmpname, $size) = $cvs->file($fn,$rev);
1078 if ($size == -1) {
1079 push(@old,$fn);
1080 print "Drop $fn\n" if $opt_v;
1081 } else {
1082 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1083 my $pid = open(my $F, '-|');
1084 die $! unless defined $pid;
1085 if (!$pid) {
1086 exec("git", "hash-object", "-w", $tmpname)
1087 or die "Cannot create object: $!\n";
1088 }
1089 my $sha = <$F>;
1090 chomp $sha;
1091 close $F;
1092 my $mode = pmode($cvs->{'mode'});
1093 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1094 }
1095 unlink($tmpname);
1096 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1097 my $fn = $1;
1098 my $rev = $2;
1099 $fn =~ s#^/+##;
1100 push @commit_revisions, [$fn, $rev];
1101 push(@old,$fn);
1102 print "Delete $fn\n" if $opt_v;
1103 } elsif ($state == 9 and /^\s*$/) {
1104 $state = 10;
1105 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1106 $commitcount++;
1107 if ($opt_L && $commitcount > $opt_L) {
1108 last;
1109 }
1110 commit();
1111 if (($commitcount & 1023) == 0) {
1112 system(qw(git repack -a -d));
1113 }
1114 $state = 1;
1115 } elsif ($state == 11 and /^-+$/) {
1116 $state = 1;
1117 } elsif (/^-+$/) { # end of unknown-line processing
1118 $state = 1;
1119 } elsif ($state != 11) { # ignore stuff when skipping
1120 print STDERR "* UNKNOWN LINE * $_\n";
1121 }
1122}
1123commit() if $branch and $state != 11;
1124
1125unless ($opt_P) {
1126 unlink($cvspsfile);
1127}
1128
1129# The heuristic of repacking every 1024 commits can leave a
1130# lot of unpacked data. If there is more than 1MB worth of
1131# not-packed objects, repack once more.
1132my $line = `git count-objects`;
1133if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1134 my ($n_objects, $kb) = ($1, $2);
1135 1024 < $kb
1136 and system(qw(git repack -a -d));
1137}
1138
1139foreach my $git_index (values %index) {
1140 if ($git_index ne "$git_dir/index") {
1141 unlink($git_index);
1142 }
1143}
1144
1145if (defined $orig_git_index) {
1146 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1147} else {
1148 delete $ENV{GIT_INDEX_FILE};
1149}
1150
1151# Now switch back to the branch we were in before all of this happened
1152if ($orig_branch) {
1153 print "DONE.\n" if $opt_v;
1154 if ($opt_i) {
1155 exit 0;
1156 }
1157 my $tip_at_end = `git rev-parse --verify HEAD`;
1158 if ($tip_at_start ne $tip_at_end) {
1159 for ($tip_at_start, $tip_at_end) { chomp; }
1160 print "Fetched into the current branch.\n" if $opt_v;
1161 system(qw(git read-tree -u -m),
1162 $tip_at_start, $tip_at_end);
1163 die "Fast-forward update failed: $?\n" if $?;
1164 }
1165 else {
1166 system(qw(git merge -m cvsimport), "$remote/$opt_o");
1167 die "Could not merge $opt_o into the current branch.\n" if $?;
1168 }
1169} else {
1170 $orig_branch = "master";
1171 print "DONE; creating $orig_branch branch\n" if $opt_v;
1172 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1173 unless defined get_headref('refs/heads/master');
1174 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1175 if ($opt_r && $opt_o ne 'HEAD');
1176 system('git', 'update-ref', 'HEAD', "$orig_branch");
1177 unless ($opt_i) {
1178 system(qw(git checkout -f));
1179 die "checkout failed: $?\n" if $?;
1180 }
1181}