1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime dup2 ENOENT);
28use IPC::Open2;
29
30$SIG{'PIPE'}="IGNORE";
31$ENV{'TZ'}="UTC";
32
33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34my (%conv_author_name, %conv_author_email);
35
36sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45END
46 exit(1);
47}
48
49sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
53
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
70 }
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
74 }
75 }
76 # NEEDSWORK: Maybe warn on unrecognized lines?
77 }
78 close ($f);
79}
80
81sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
85
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 }
89 close ($f);
90}
91
92# convert getopts specs for use by git config
93sub read_repo_config {
94 # Split the string between characters, unless there is a ':'
95 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
96 my @opts = split(/ *(?!:)/, shift);
97 foreach my $o (@opts) {
98 my $key = $o;
99 $key =~ s/://g;
100 my $arg = 'git config';
101 $arg .= ' --bool' if ($o !~ /:$/);
102
103 chomp(my $tmp = `$arg --get cvsimport.$key`);
104 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
105 no strict 'refs';
106 my $opt_name = "opt_" . $key;
107 if (!$$opt_name) {
108 $$opt_name = $tmp;
109 }
110 }
111 }
112}
113
114my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
115read_repo_config($opts);
116Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
117
118# turn the Getopt::Std specification in a Getopt::Long one,
119# with support for multiple -M options
120GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
121 or usage();
122usage if $opt_h;
123
124if (@ARGV == 0) {
125 chomp(my $module = `git config --get cvsimport.module`);
126 push(@ARGV, $module) if $? == 0;
127}
128@ARGV <= 1 or usage("You can't specify more than one CVS module");
129
130if ($opt_d) {
131 $ENV{"CVSROOT"} = $opt_d;
132} elsif (-f 'CVS/Root') {
133 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
134 $opt_d = <$f>;
135 chomp $opt_d;
136 close $f;
137 $ENV{"CVSROOT"} = $opt_d;
138} elsif ($ENV{"CVSROOT"}) {
139 $opt_d = $ENV{"CVSROOT"};
140} else {
141 usage("CVSROOT needs to be set");
142}
143$opt_s ||= "-";
144$opt_a ||= 0;
145
146my $git_tree = $opt_C;
147$git_tree ||= ".";
148
149my $remote;
150if (defined $opt_r) {
151 $remote = 'refs/remotes/' . $opt_r;
152 $opt_o ||= "master";
153} else {
154 $opt_o ||= "origin";
155 $remote = 'refs/heads';
156}
157
158my $cvs_tree;
159if ($#ARGV == 0) {
160 $cvs_tree = $ARGV[0];
161} elsif (-f 'CVS/Repository') {
162 open my $f, '<', 'CVS/Repository' or
163 die 'Failed to open CVS/Repository';
164 $cvs_tree = <$f>;
165 chomp $cvs_tree;
166 close $f;
167} else {
168 usage("CVS module has to be specified");
169}
170
171our @mergerx = ();
172if ($opt_m) {
173 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
174}
175if (@opt_M) {
176 push (@mergerx, map { qr/$_/ } @opt_M);
177}
178
179# Remember UTC of our starting time
180# we'll want to avoid importing commits
181# that are too recent
182our $starttime = time();
183
184select(STDERR); $|=1; select(STDOUT);
185
186
187package CVSconn;
188# Basic CVS dialog.
189# We're only interested in connecting and downloading, so ...
190
191use File::Spec;
192use File::Temp qw(tempfile);
193use POSIX qw(strftime dup2);
194
195sub new {
196 my ($what,$repo,$subdir) = @_;
197 $what=ref($what) if ref($what);
198
199 my $self = {};
200 $self->{'buffer'} = "";
201 bless($self,$what);
202
203 $repo =~ s#/+$##;
204 $self->{'fullrep'} = $repo;
205 $self->conn();
206
207 $self->{'subdir'} = $subdir;
208 $self->{'lines'} = undef;
209
210 return $self;
211}
212
213sub conn {
214 my $self = shift;
215 my $repo = $self->{'fullrep'};
216 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
217 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
218
219 my ($proxyhost,$proxyport);
220 if ($param && ($param =~ m/proxy=([^;]+)/)) {
221 $proxyhost = $1;
222 # Default proxyport, if not specified, is 8080.
223 $proxyport = 8080;
224 if ($ENV{"CVS_PROXY_PORT"}) {
225 $proxyport = $ENV{"CVS_PROXY_PORT"};
226 }
227 if ($param =~ m/proxyport=([^;]+)/) {
228 $proxyport = $1;
229 }
230 }
231 $repo ||= '/';
232
233 # if username is not explicit in CVSROOT, then use current user, as cvs would
234 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
235 my $rr2 = "-";
236 unless ($port) {
237 $rr2 = ":pserver:$user\@$serv:$repo";
238 $port=2401;
239 }
240 my $rr = ":pserver:$user\@$serv:$port$repo";
241
242 if ($pass) {
243 $pass = $self->_scramble($pass);
244 } else {
245 open(H,$ENV{'HOME'}."/.cvspass") and do {
246 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
247 while (<H>) {
248 chomp;
249 s/^\/\d+\s+//;
250 my ($w,$p) = split(/\s/,$_,2);
251 if ($w eq $rr or $w eq $rr2) {
252 $pass = $p;
253 last;
254 }
255 }
256 };
257 $pass = "A" unless $pass;
258 }
259
260 my ($s, $rep);
261 if ($proxyhost) {
262
263 # Use a HTTP Proxy. Only works for HTTP proxies that
264 # don't require user authentication
265 #
266 # See: http://www.ietf.org/rfc/rfc2817.txt
267
268 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
269 die "Socket to $proxyhost: $!\n" unless defined $s;
270 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
271 or die "Write to $proxyhost: $!\n";
272 $s->flush();
273
274 $rep = <$s>;
275
276 # The answer should look like 'HTTP/1.x 2yy ....'
277 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
278 die "Proxy connect: $rep\n";
279 }
280 # Skip up to the empty line of the proxy server output
281 # including the response headers.
282 while ($rep = <$s>) {
283 last if (!defined $rep ||
284 $rep eq "\n" ||
285 $rep eq "\r\n");
286 }
287 } else {
288 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
289 die "Socket to $serv: $!\n" unless defined $s;
290 }
291
292 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
293 or die "Write to $serv: $!\n";
294 $s->flush();
295
296 $rep = <$s>;
297
298 if ($rep ne "I LOVE YOU\n") {
299 $rep="<unknown>" unless $rep;
300 die "AuthReply: $rep\n";
301 }
302 $self->{'socketo'} = $s;
303 $self->{'socketi'} = $s;
304 } else { # local or ext: Fork off our own cvs server.
305 my $pr = IO::Pipe->new();
306 my $pw = IO::Pipe->new();
307 my $pid = fork();
308 die "Fork: $!\n" unless defined $pid;
309 my $cvs = 'cvs';
310 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
311 my $rsh = 'rsh';
312 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
313
314 my @cvs = ($cvs, 'server');
315 my ($local, $user, $host);
316 $local = $repo =~ s/:local://;
317 if (!$local) {
318 $repo =~ s/:ext://;
319 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
320 ($user, $host) = ($1, $2);
321 }
322 if (!$local) {
323 if ($user) {
324 unshift @cvs, $rsh, '-l', $user, $host;
325 } else {
326 unshift @cvs, $rsh, $host;
327 }
328 }
329
330 unless ($pid) {
331 $pr->writer();
332 $pw->reader();
333 dup2($pw->fileno(),0);
334 dup2($pr->fileno(),1);
335 $pr->close();
336 $pw->close();
337 exec(@cvs);
338 }
339 $pw->writer();
340 $pr->reader();
341 $self->{'socketo'} = $pw;
342 $self->{'socketi'} = $pr;
343 }
344 $self->{'socketo'}->write("Root $repo\n");
345
346 # Trial and error says that this probably is the minimum set
347 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
348
349 $self->{'socketo'}->write("valid-requests\n");
350 $self->{'socketo'}->flush();
351
352 chomp(my $rep=$self->readline());
353 if ($rep !~ s/^Valid-requests\s*//) {
354 $rep="<unknown>" unless $rep;
355 die "Expected Valid-requests from server, but got: $rep\n";
356 }
357 chomp(my $res=$self->readline());
358 die "validReply: $res\n" if $res ne "ok";
359
360 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
361 $self->{'repo'} = $repo;
362}
363
364sub readline {
365 my ($self) = @_;
366 return $self->{'socketi'}->getline();
367}
368
369sub _file {
370 # Request a file with a given revision.
371 # Trial and error says this is a good way to do it. :-/
372 my ($self,$fn,$rev) = @_;
373 $self->{'socketo'}->write("Argument -N\n") or return undef;
374 $self->{'socketo'}->write("Argument -P\n") or return undef;
375 # -kk: Linus' version doesn't use it - defaults to off
376 if ($opt_k) {
377 $self->{'socketo'}->write("Argument -kk\n") or return undef;
378 }
379 $self->{'socketo'}->write("Argument -r\n") or return undef;
380 $self->{'socketo'}->write("Argument $rev\n") or return undef;
381 $self->{'socketo'}->write("Argument --\n") or return undef;
382 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
383 $self->{'socketo'}->write("Directory .\n") or return undef;
384 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
385 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
386 $self->{'socketo'}->write("co\n") or return undef;
387 $self->{'socketo'}->flush() or return undef;
388 $self->{'lines'} = 0;
389 return 1;
390}
391sub _line {
392 # Read a line from the server.
393 # ... except that 'line' may be an entire file. ;-)
394 my ($self, $fh) = @_;
395 die "Not in lines" unless defined $self->{'lines'};
396
397 my $line;
398 my $res=0;
399 while (defined($line = $self->readline())) {
400 # M U gnupg-cvs-rep/AUTHORS
401 # Updated gnupg-cvs-rep/
402 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
403 # /AUTHORS/1.1///T1.1
404 # u=rw,g=rw,o=rw
405 # 0
406 # ok
407
408 if ($line =~ s/^(?:Created|Updated) //) {
409 $line = $self->readline(); # path
410 $line = $self->readline(); # Entries line
411 my $mode = $self->readline(); chomp $mode;
412 $self->{'mode'} = $mode;
413 defined (my $cnt = $self->readline())
414 or die "EOF from server after 'Changed'\n";
415 chomp $cnt;
416 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
417 $line="";
418 $res = $self->_fetchfile($fh, $cnt);
419 } elsif ($line =~ s/^ //) {
420 print $fh $line;
421 $res += length($line);
422 } elsif ($line =~ /^M\b/) {
423 # output, do nothing
424 } elsif ($line =~ /^Mbinary\b/) {
425 my $cnt;
426 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
427 chomp $cnt;
428 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
429 $line="";
430 $res += $self->_fetchfile($fh, $cnt);
431 } else {
432 chomp $line;
433 if ($line eq "ok") {
434 # print STDERR "S: ok (".length($res).")\n";
435 return $res;
436 } elsif ($line =~ s/^E //) {
437 # print STDERR "S: $line\n";
438 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
439 $line = $self->readline(); # filename
440 $line = $self->readline(); # OK
441 chomp $line;
442 die "Unknown: $line" if $line ne "ok";
443 return -1;
444 } else {
445 die "Unknown: $line\n";
446 }
447 }
448 }
449 return undef;
450}
451sub file {
452 my ($self,$fn,$rev) = @_;
453 my $res;
454
455 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
456 DIR => File::Spec->tmpdir(), UNLINK => 1);
457
458 $self->_file($fn,$rev) and $res = $self->_line($fh);
459
460 if (!defined $res) {
461 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
462 truncate $fh, 0;
463 $self->conn();
464 $self->_file($fn,$rev) or die "No file command send";
465 $res = $self->_line($fh);
466 die "Retry failed" unless defined $res;
467 }
468 close ($fh);
469
470 return ($name, $res);
471}
472sub _fetchfile {
473 my ($self, $fh, $cnt) = @_;
474 my $res = 0;
475 my $bufsize = 1024 * 1024;
476 while ($cnt) {
477 if ($bufsize > $cnt) {
478 $bufsize = $cnt;
479 }
480 my $buf;
481 my $num = $self->{'socketi'}->read($buf,$bufsize);
482 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
483 print $fh $buf;
484 $res += $num;
485 $cnt -= $num;
486 }
487 return $res;
488}
489
490sub _scramble {
491 my ($self, $pass) = @_;
492 my $scrambled = "A";
493
494 return $scrambled unless $pass;
495
496 my $pass_len = length($pass);
497 my @pass_arr = split("", $pass);
498 my $i;
499
500 # from cvs/src/scramble.c
501 my @shifts = (
502 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
503 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
504 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
505 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
506 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
507 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
508 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
509 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
510 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
511 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
512 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
513 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
514 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
515 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
516 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
517 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
518 );
519
520 for ($i = 0; $i < $pass_len; $i++) {
521 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
522 }
523
524 return $scrambled;
525}
526
527package main;
528
529my $cvs = CVSconn->new($opt_d, $cvs_tree);
530
531
532sub pdate($) {
533 my ($d) = @_;
534 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
535 or die "Unparseable date: $d\n";
536 my $y=$1; $y-=1900 if $y>1900;
537 return timegm($6||0,$5,$4,$3,$2-1,$y);
538}
539
540sub pmode($) {
541 my ($mode) = @_;
542 my $m = 0;
543 my $mm = 0;
544 my $um = 0;
545 for my $x(split(//,$mode)) {
546 if ($x eq ",") {
547 $m |= $mm&$um;
548 $mm = 0;
549 $um = 0;
550 } elsif ($x eq "u") { $um |= 0700;
551 } elsif ($x eq "g") { $um |= 0070;
552 } elsif ($x eq "o") { $um |= 0007;
553 } elsif ($x eq "r") { $mm |= 0444;
554 } elsif ($x eq "w") { $mm |= 0222;
555 } elsif ($x eq "x") { $mm |= 0111;
556 } elsif ($x eq "=") { # do nothing
557 } else { die "Unknown mode: $mode\n";
558 }
559 }
560 $m |= $mm&$um;
561 return $m;
562}
563
564sub getwd() {
565 my $pwd = `pwd`;
566 chomp $pwd;
567 return $pwd;
568}
569
570sub is_sha1 {
571 my $s = shift;
572 return $s =~ /^[a-f0-9]{40}$/;
573}
574
575sub get_headref ($) {
576 my $name = shift;
577 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
578 return undef unless $? == 0;
579 chomp $r;
580 return $r;
581}
582
583my $user_filename_prepend = '';
584sub munge_user_filename {
585 my $name = shift;
586 return File::Spec->file_name_is_absolute($name) ?
587 $name :
588 $user_filename_prepend . $name;
589}
590
591-d $git_tree
592 or mkdir($git_tree,0777)
593 or die "Could not create $git_tree: $!";
594if ($git_tree ne '.') {
595 $user_filename_prepend = getwd() . '/';
596 chdir($git_tree);
597}
598
599my $last_branch = "";
600my $orig_branch = "";
601my %branch_date;
602my $tip_at_start = undef;
603
604my $git_dir = $ENV{"GIT_DIR"} || ".git";
605$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
606$ENV{"GIT_DIR"} = $git_dir;
607my $orig_git_index;
608$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
609
610my %index; # holds filenames of one index per branch
611
612unless (-d $git_dir) {
613 system(qw(git init));
614 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
615 system(qw(git read-tree --empty));
616 die "Cannot init an empty tree: $?\n" if $?;
617
618 $last_branch = $opt_o;
619 $orig_branch = "";
620} else {
621 open(F, "-|", qw(git symbolic-ref HEAD)) or
622 die "Cannot run git symbolic-ref: $!\n";
623 chomp ($last_branch = <F>);
624 $last_branch = basename($last_branch);
625 close(F);
626 unless ($last_branch) {
627 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
628 $last_branch = "master";
629 }
630 $orig_branch = $last_branch;
631 $tip_at_start = `git rev-parse --verify HEAD`;
632
633 # Get the last import timestamps
634 my $fmt = '($ref, $author) = (%(refname), %(author));';
635 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
636 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
637 while (defined(my $entry = <H>)) {
638 my ($ref, $author);
639 eval($entry) || die "cannot eval refs list: $@";
640 my ($head) = ($ref =~ m|^$remote/(.*)|);
641 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
642 $branch_date{$head} = $1;
643 }
644 close(H);
645 if (!exists $branch_date{$opt_o}) {
646 die "Branch '$opt_o' does not exist.\n".
647 "Either use the correct '-o branch' option,\n".
648 "or import to a new repository.\n";
649 }
650}
651
652-d $git_dir
653 or die "Could not create git subdir ($git_dir).\n";
654
655# now we read (and possibly save) author-info as well
656-f "$git_dir/cvs-authors" and
657 read_author_info("$git_dir/cvs-authors");
658if ($opt_A) {
659 read_author_info(munge_user_filename($opt_A));
660 write_author_info("$git_dir/cvs-authors");
661}
662
663# open .git/cvs-revisions, if requested
664open my $revision_map, '>>', "$git_dir/cvs-revisions"
665 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
666 if defined $opt_R;
667
668
669#
670# run cvsps into a file unless we are getting
671# it passed as a file via $opt_P
672#
673my $cvspsfile;
674unless ($opt_P) {
675 print "Running cvsps...\n" if $opt_v;
676 my $pid = open(CVSPS,"-|");
677 my $cvspsfh;
678 die "Cannot fork: $!\n" unless defined $pid;
679 unless ($pid) {
680 my @opt;
681 @opt = split(/,/,$opt_p) if defined $opt_p;
682 unshift @opt, '-z', $opt_z if defined $opt_z;
683 unshift @opt, '-q' unless defined $opt_v;
684 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
685 push @opt, '--cvs-direct';
686 }
687 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
688 die "Could not start cvsps: $!\n";
689 }
690 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
691 DIR => File::Spec->tmpdir());
692 while (<CVSPS>) {
693 print $cvspsfh $_;
694 }
695 close CVSPS;
696 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
697 close $cvspsfh;
698} else {
699 $cvspsfile = munge_user_filename($opt_P);
700}
701
702open(CVS, "<$cvspsfile") or die $!;
703
704## cvsps output:
705#---------------------
706#PatchSet 314
707#Date: 1999/09/18 13:03:59
708#Author: wkoch
709#Branch: STABLE-BRANCH-1-0
710#Ancestor branch: HEAD
711#Tag: (none)
712#Log:
713# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
714#Members:
715# README:1.57->1.57.2.1
716# VERSION:1.96->1.96.2.1
717#
718#---------------------
719
720my $state = 0;
721
722sub update_index (\@\@) {
723 my $old = shift;
724 my $new = shift;
725 open(my $fh, '|-', qw(git update-index -z --index-info))
726 or die "unable to open git update-index: $!";
727 print $fh
728 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
729 @$old),
730 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
731 @$new)
732 or die "unable to write to git update-index: $!";
733 close $fh
734 or die "unable to write to git update-index: $!";
735 $? and die "git update-index reported error: $?";
736}
737
738sub write_tree () {
739 open(my $fh, '-|', qw(git write-tree))
740 or die "unable to open git write-tree: $!";
741 chomp(my $tree = <$fh>);
742 is_sha1($tree)
743 or die "Cannot get tree id ($tree): $!";
744 close($fh)
745 or die "Error running git write-tree: $?\n";
746 print "Tree ID $tree\n" if $opt_v;
747 return $tree;
748}
749
750my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
751my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
752
753# commits that cvsps cannot place anywhere...
754$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
755
756sub commit {
757 if ($branch eq $opt_o && !$index{branch} &&
758 !get_headref("$remote/$branch")) {
759 # looks like an initial commit
760 # use the index primed by git init
761 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
762 $index{$branch} = "$git_dir/index";
763 } else {
764 # use an index per branch to speed up
765 # imports of projects with many branches
766 unless ($index{$branch}) {
767 $index{$branch} = tmpnam();
768 $ENV{GIT_INDEX_FILE} = $index{$branch};
769 if ($ancestor) {
770 system("git", "read-tree", "$remote/$ancestor");
771 } else {
772 system("git", "read-tree", "$remote/$branch");
773 }
774 die "read-tree failed: $?\n" if $?;
775 }
776 }
777 $ENV{GIT_INDEX_FILE} = $index{$branch};
778
779 update_index(@old, @new);
780 @old = @new = ();
781 my $tree = write_tree();
782 my $parent = get_headref("$remote/$last_branch");
783 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
784
785 my @commit_args;
786 push @commit_args, ("-p", $parent) if $parent;
787
788 # loose detection of merges
789 # based on the commit msg
790 foreach my $rx (@mergerx) {
791 next unless $logmsg =~ $rx && $1;
792 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
793 if (my $sha1 = get_headref("$remote/$mparent")) {
794 push @commit_args, '-p', "$remote/$mparent";
795 print "Merge parent branch: $mparent\n" if $opt_v;
796 }
797 }
798
799 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
800 $ENV{GIT_AUTHOR_NAME} = $author_name;
801 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
802 $ENV{GIT_AUTHOR_DATE} = $commit_date;
803 $ENV{GIT_COMMITTER_NAME} = $author_name;
804 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
805 $ENV{GIT_COMMITTER_DATE} = $commit_date;
806 my $pid = open2(my $commit_read, my $commit_write,
807 'git', 'commit-tree', $tree, @commit_args);
808
809 # compatibility with git2cvs
810 substr($logmsg,32767) = "" if length($logmsg) > 32767;
811 $logmsg =~ s/[\s\n]+\z//;
812
813 if (@skipped) {
814 $logmsg .= "\n\n\nSKIPPED:\n\t";
815 $logmsg .= join("\n\t", @skipped) . "\n";
816 @skipped = ();
817 }
818
819 print($commit_write "$logmsg\n") && close($commit_write)
820 or die "Error writing to git commit-tree: $!\n";
821
822 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
823 chomp(my $cid = <$commit_read>);
824 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
825 print "Commit ID $cid\n" if $opt_v;
826 close($commit_read);
827
828 waitpid($pid,0);
829 die "Error running git commit-tree: $?\n" if $?;
830
831 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
832 or die "Cannot write branch $branch for update: $!\n";
833
834 if ($revision_map) {
835 print $revision_map "@$_ $cid\n" for @commit_revisions;
836 }
837 @commit_revisions = ();
838
839 if ($tag) {
840 my ($xtag) = $tag;
841 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
842 $xtag =~ tr/_/\./ if ( $opt_u );
843 $xtag =~ s/[\/]/$opt_s/g;
844 $xtag =~ s/\[//g;
845
846 system('git' , 'tag', '-f', $xtag, $cid) == 0
847 or die "Cannot create tag $xtag: $!\n";
848
849 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
850 }
851};
852
853my $commitcount = 1;
854while (<CVS>) {
855 chomp;
856 if ($state == 0 and /^-+$/) {
857 $state = 1;
858 } elsif ($state == 0) {
859 $state = 1;
860 redo;
861 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
862 $patchset = 0+$_;
863 $state=2;
864 } elsif ($state == 2 and s/^Date:\s+//) {
865 $date = pdate($_);
866 unless ($date) {
867 print STDERR "Could not parse date: $_\n";
868 $state=0;
869 next;
870 }
871 $state=3;
872 } elsif ($state == 3 and s/^Author:\s+//) {
873 s/\s+$//;
874 if (/^(.*?)\s+<(.*)>/) {
875 ($author_name, $author_email) = ($1, $2);
876 } elsif ($conv_author_name{$_}) {
877 $author_name = $conv_author_name{$_};
878 $author_email = $conv_author_email{$_};
879 } else {
880 $author_name = $author_email = $_;
881 }
882 $state = 4;
883 } elsif ($state == 4 and s/^Branch:\s+//) {
884 s/\s+$//;
885 tr/_/\./ if ( $opt_u );
886 s/[\/]/$opt_s/g;
887 $branch = $_;
888 $state = 5;
889 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
890 s/\s+$//;
891 $ancestor = $_;
892 $ancestor = $opt_o if $ancestor eq "HEAD";
893 $state = 6;
894 } elsif ($state == 5) {
895 $ancestor = undef;
896 $state = 6;
897 redo;
898 } elsif ($state == 6 and s/^Tag:\s+//) {
899 s/\s+$//;
900 if ($_ eq "(none)") {
901 $tag = undef;
902 } else {
903 $tag = $_;
904 }
905 $state = 7;
906 } elsif ($state == 7 and /^Log:/) {
907 $logmsg = "";
908 $state = 8;
909 } elsif ($state == 8 and /^Members:/) {
910 $branch = $opt_o if $branch eq "HEAD";
911 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
912 # skip
913 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
914 $state = 11;
915 next;
916 }
917 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
918 # skip if the commit is too recent
919 # given that the cvsps default fuzz is 300s, we give ourselves another
920 # 300s just in case -- this also prevents skipping commits
921 # due to server clock drift
922 print "skip patchset $patchset: $date too recent\n" if $opt_v;
923 $state = 11;
924 next;
925 }
926 if (exists $ignorebranch{$branch}) {
927 print STDERR "Skipping $branch\n";
928 $state = 11;
929 next;
930 }
931 if ($ancestor) {
932 if ($ancestor eq $branch) {
933 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
934 $ancestor = $opt_o;
935 }
936 if (defined get_headref("$remote/$branch")) {
937 print STDERR "Branch $branch already exists!\n";
938 $state=11;
939 next;
940 }
941 my $id = get_headref("$remote/$ancestor");
942 if (!$id) {
943 print STDERR "Branch $ancestor does not exist!\n";
944 $ignorebranch{$branch} = 1;
945 $state=11;
946 next;
947 }
948
949 system(qw(git update-ref -m cvsimport),
950 "$remote/$branch", $id);
951 if($? != 0) {
952 print STDERR "Could not create branch $branch\n";
953 $ignorebranch{$branch} = 1;
954 $state=11;
955 next;
956 }
957 }
958 $last_branch = $branch if $branch ne $last_branch;
959 $state = 9;
960 } elsif ($state == 8) {
961 $logmsg .= "$_\n";
962 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
963# VERSION:1.96->1.96.2.1
964 my $init = ($2 eq "INITIAL");
965 my $fn = $1;
966 my $rev = $3;
967 $fn =~ s#^/+##;
968 if ($opt_S && $fn =~ m/$opt_S/) {
969 print "SKIPPING $fn v $rev\n";
970 push(@skipped, $fn);
971 next;
972 }
973 push @commit_revisions, [$fn, $rev];
974 print "Fetching $fn v $rev\n" if $opt_v;
975 my ($tmpname, $size) = $cvs->file($fn,$rev);
976 if ($size == -1) {
977 push(@old,$fn);
978 print "Drop $fn\n" if $opt_v;
979 } else {
980 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
981 my $pid = open(my $F, '-|');
982 die $! unless defined $pid;
983 if (!$pid) {
984 exec("git", "hash-object", "-w", $tmpname)
985 or die "Cannot create object: $!\n";
986 }
987 my $sha = <$F>;
988 chomp $sha;
989 close $F;
990 my $mode = pmode($cvs->{'mode'});
991 push(@new,[$mode, $sha, $fn]); # may be resurrected!
992 }
993 unlink($tmpname);
994 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
995 my $fn = $1;
996 my $rev = $2;
997 $fn =~ s#^/+##;
998 push @commit_revisions, [$fn, $rev];
999 push(@old,$fn);
1000 print "Delete $fn\n" if $opt_v;
1001 } elsif ($state == 9 and /^\s*$/) {
1002 $state = 10;
1003 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1004 $commitcount++;
1005 if ($opt_L && $commitcount > $opt_L) {
1006 last;
1007 }
1008 commit();
1009 if (($commitcount & 1023) == 0) {
1010 system(qw(git repack -a -d));
1011 }
1012 $state = 1;
1013 } elsif ($state == 11 and /^-+$/) {
1014 $state = 1;
1015 } elsif (/^-+$/) { # end of unknown-line processing
1016 $state = 1;
1017 } elsif ($state != 11) { # ignore stuff when skipping
1018 print STDERR "* UNKNOWN LINE * $_\n";
1019 }
1020}
1021commit() if $branch and $state != 11;
1022
1023unless ($opt_P) {
1024 unlink($cvspsfile);
1025}
1026
1027# The heuristic of repacking every 1024 commits can leave a
1028# lot of unpacked data. If there is more than 1MB worth of
1029# not-packed objects, repack once more.
1030my $line = `git count-objects`;
1031if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1032 my ($n_objects, $kb) = ($1, $2);
1033 1024 < $kb
1034 and system(qw(git repack -a -d));
1035}
1036
1037foreach my $git_index (values %index) {
1038 if ($git_index ne "$git_dir/index") {
1039 unlink($git_index);
1040 }
1041}
1042
1043if (defined $orig_git_index) {
1044 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1045} else {
1046 delete $ENV{GIT_INDEX_FILE};
1047}
1048
1049# Now switch back to the branch we were in before all of this happened
1050if ($orig_branch) {
1051 print "DONE.\n" if $opt_v;
1052 if ($opt_i) {
1053 exit 0;
1054 }
1055 my $tip_at_end = `git rev-parse --verify HEAD`;
1056 if ($tip_at_start ne $tip_at_end) {
1057 for ($tip_at_start, $tip_at_end) { chomp; }
1058 print "Fetched into the current branch.\n" if $opt_v;
1059 system(qw(git read-tree -u -m),
1060 $tip_at_start, $tip_at_end);
1061 die "Fast-forward update failed: $?\n" if $?;
1062 }
1063 else {
1064 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1065 die "Could not merge $opt_o into the current branch.\n" if $?;
1066 }
1067} else {
1068 $orig_branch = "master";
1069 print "DONE; creating $orig_branch branch\n" if $opt_v;
1070 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1071 unless defined get_headref('refs/heads/master');
1072 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1073 if ($opt_r && $opt_o ne 'HEAD');
1074 system('git', 'update-ref', 'HEAD', "$orig_branch");
1075 unless ($opt_i) {
1076 system(qw(git checkout -f));
1077 die "checkout failed: $?\n" if $?;
1078 }
1079}