git-cvsexportcommit.perlon commit clone: allow cloning local paths with colons in them (6000334)
   1#!/usr/bin/perl
   2
   3use 5.008;
   4use strict;
   5use warnings;
   6use Getopt::Std;
   7use File::Temp qw(tempdir);
   8use Data::Dumper;
   9use File::Basename qw(basename dirname);
  10use File::Spec;
  11use Git;
  12
  13our ($opt_h, $opt_P, $opt_p, $opt_v, $opt_c, $opt_f, $opt_a, $opt_m, $opt_d, $opt_u, $opt_w, $opt_W, $opt_k);
  14
  15getopts('uhPpvcfkam:d:w:W');
  16
  17$opt_h && usage();
  18
  19die "Need at least one commit identifier!" unless @ARGV;
  20
  21# Get git-config settings
  22my $repo = Git->repository();
  23$opt_w = $repo->config('cvsexportcommit.cvsdir') unless defined $opt_w;
  24
  25if ($opt_w || $opt_W) {
  26        # Remember where GIT_DIR is before changing to CVS checkout
  27        unless ($ENV{GIT_DIR}) {
  28                # No GIT_DIR set. Figure it out for ourselves
  29                my $gd =`git-rev-parse --git-dir`;
  30                chomp($gd);
  31                $ENV{GIT_DIR} = $gd;
  32        }
  33
  34        # On MSYS, convert a Windows-style path to an MSYS-style path
  35        # so that rel2abs() below works correctly.
  36        if ($^O eq 'msys') {
  37                $ENV{GIT_DIR} =~ s#^([[:alpha:]]):/#/$1/#;
  38        }
  39
  40        # Make sure GIT_DIR is absolute
  41        $ENV{GIT_DIR} = File::Spec->rel2abs($ENV{GIT_DIR});
  42}
  43
  44if ($opt_w) {
  45        if (! -d $opt_w."/CVS" ) {
  46                die "$opt_w is not a CVS checkout";
  47        }
  48        chdir $opt_w or die "Cannot change to CVS checkout at $opt_w";
  49}
  50unless ($ENV{GIT_DIR} && -r $ENV{GIT_DIR}){
  51    die "GIT_DIR is not defined or is unreadable";
  52}
  53
  54
  55my @cvs;
  56if ($opt_d) {
  57        @cvs = ('cvs', '-d', $opt_d);
  58} else {
  59        @cvs = ('cvs');
  60}
  61
  62# resolve target commit
  63my $commit;
  64$commit = pop @ARGV;
  65$commit = safe_pipe_capture('git-rev-parse', '--verify', "$commit^0");
  66chomp $commit;
  67if ($?) {
  68    die "The commit reference $commit did not resolve!";
  69}
  70
  71# resolve what parent we want
  72my $parent;
  73if (@ARGV) {
  74    $parent = pop @ARGV;
  75    $parent =  safe_pipe_capture('git-rev-parse', '--verify', "$parent^0");
  76    chomp $parent;
  77    if ($?) {
  78        die "The parent reference did not resolve!";
  79    }
  80}
  81
  82# find parents from the commit itself
  83my @commit  = safe_pipe_capture('git-cat-file', 'commit', $commit);
  84my @parents;
  85my $committer;
  86my $author;
  87my $stage = 'headers'; # headers, msg
  88my $title;
  89my $msg = '';
  90
  91foreach my $line (@commit) {
  92    chomp $line;
  93    if ($stage eq 'headers' && $line eq '') {
  94        $stage = 'msg';
  95        next;
  96    }
  97
  98    if ($stage eq 'headers') {
  99        if ($line =~ m/^parent (\w{40})$/) { # found a parent
 100            push @parents, $1;
 101        } elsif ($line =~ m/^author (.+) \d+ [-+]\d+$/) {
 102            $author = $1;
 103        } elsif ($line =~ m/^committer (.+) \d+ [-+]\d+$/) {
 104            $committer = $1;
 105        }
 106    } else {
 107        $msg .= $line . "\n";
 108        unless ($title) {
 109            $title = $line;
 110        }
 111    }
 112}
 113
 114my $noparent = "0000000000000000000000000000000000000000";
 115if ($parent) {
 116    my $found;
 117    # double check that it's a valid parent
 118    foreach my $p (@parents) {
 119        if ($p eq $parent) {
 120            $found = 1;
 121            last;
 122        }; # found it
 123    }
 124    die "Did not find $parent in the parents for this commit!" if !$found and !$opt_P;
 125} else { # we don't have a parent from the cmdline...
 126    if (@parents == 1) { # it's safe to get it from the commit
 127        $parent = $parents[0];
 128    } elsif (@parents == 0) { # there is no parent
 129        $parent = $noparent;
 130    } else { # cannot choose automatically from multiple parents
 131        die "This commit has more than one parent -- please name the parent you want to use explicitly";
 132    }
 133}
 134
 135my $go_back_to = 0;
 136
 137if ($opt_W) {
 138    $opt_v && print "Resetting to $parent\n";
 139    $go_back_to = `git symbolic-ref HEAD 2> /dev/null ||
 140        git rev-parse HEAD` || die "Could not determine current branch";
 141    system("git checkout -q $parent^0") && die "Could not check out $parent^0";
 142}
 143
 144$opt_v && print "Applying to CVS commit $commit from parent $parent\n";
 145
 146# grab the commit message
 147open(MSG, ">.msg") or die "Cannot open .msg for writing";
 148if ($opt_m) {
 149    print MSG $opt_m;
 150}
 151print MSG $msg;
 152if ($opt_a) {
 153    print MSG "\n\nAuthor: $author\n";
 154    if ($author ne $committer) {
 155        print MSG "Committer: $committer\n";
 156    }
 157}
 158close MSG;
 159
 160if ($parent eq $noparent) {
 161    `git-diff-tree --binary -p --root $commit >.cvsexportcommit.diff`;# || die "Cannot diff";
 162} else {
 163    `git-diff-tree --binary -p $parent $commit >.cvsexportcommit.diff`;# || die "Cannot diff";
 164}
 165
 166## apply non-binary changes
 167
 168# In pedantic mode require all lines of context to match.  In normal
 169# mode, be compatible with diff/patch: assume 3 lines of context and
 170# require at least one line match, i.e. ignore at most 2 lines of
 171# context, like diff/patch do by default.
 172my $context = $opt_p ? '' : '-C1';
 173
 174print "Checking if patch will apply\n";
 175
 176my @stat;
 177open APPLY, "GIT_DIR= git-apply $context --summary --numstat<.cvsexportcommit.diff|" || die "cannot patch";
 178@stat=<APPLY>;
 179close APPLY || die "Cannot patch";
 180my (@bfiles,@files,@afiles,@dfiles);
 181chomp @stat;
 182foreach (@stat) {
 183        push (@bfiles,$1) if m/^-\t-\t(.*)$/;
 184        push (@files, $1) if m/^-\t-\t(.*)$/;
 185        push (@files, $1) if m/^\d+\t\d+\t(.*)$/;
 186        push (@afiles,$1) if m/^ create mode [0-7]+ (.*)$/;
 187        push (@dfiles,$1) if m/^ delete mode [0-7]+ (.*)$/;
 188}
 189map { s/^"(.*)"$/$1/g } @bfiles,@files;
 190map { s/\\([0-7]{3})/sprintf('%c',oct $1)/eg } @bfiles,@files;
 191
 192# check that the files are clean and up to date according to cvs
 193my $dirty;
 194my @dirs;
 195foreach my $p (@afiles) {
 196    my $path = dirname $p;
 197    while (!-d $path and ! grep { $_ eq $path } @dirs) {
 198        unshift @dirs, $path;
 199        $path = dirname $path;
 200    }
 201}
 202
 203# ... check dirs,
 204foreach my $d (@dirs) {
 205    if (-e $d) {
 206        $dirty = 1;
 207        warn "$d exists and is not a directory!\n";
 208    }
 209}
 210
 211# ... query status of all files that we have a directory for and parse output of 'cvs status' to %cvsstat.
 212my @canstatusfiles;
 213foreach my $f (@files) {
 214    my $path = dirname $f;
 215    next if (grep { $_ eq $path } @dirs);
 216    push @canstatusfiles, $f;
 217}
 218
 219my %cvsstat;
 220if (@canstatusfiles) {
 221    if ($opt_u) {
 222      my @updated = xargs_safe_pipe_capture([@cvs, 'update'], @canstatusfiles);
 223      print @updated;
 224    }
 225    # "cvs status" reorders the parameters, notably when there are multiple
 226    # arguments with the same basename.  So be precise here.
 227
 228    my %added = map { $_ => 1 } @afiles;
 229    my %todo = map { $_ => 1 } @canstatusfiles;
 230
 231    while (%todo) {
 232      my @canstatusfiles2 = ();
 233      my %fullname = ();
 234      foreach my $name (keys %todo) {
 235        my $basename = basename($name);
 236
 237        # CVS reports files that don't exist in the current revision as
 238        # "no file $basename" in its "status" output, so we should
 239        # anticipate that.  Totally unknown files will have a status
 240        # "Unknown". However, if they exist in the Attic, their status
 241        # will be "Up-to-date" (this means they were added once but have
 242        # been removed).
 243        $basename = "no file $basename" if $added{$basename};
 244
 245        $basename =~ s/^\s+//;
 246        $basename =~ s/\s+$//;
 247
 248        if (!exists($fullname{$basename})) {
 249          $fullname{$basename} = $name;
 250          push (@canstatusfiles2, $name);
 251          delete($todo{$name});
 252        }
 253      }
 254      my @cvsoutput;
 255      @cvsoutput = xargs_safe_pipe_capture([@cvs, 'status'], @canstatusfiles2);
 256      foreach my $l (@cvsoutput) {
 257        chomp $l;
 258        next unless
 259            my ($file, $status) = $l =~ /^File:\s+(.*\S)\s+Status: (.*)$/;
 260
 261        my $fullname = $fullname{$file};
 262        print STDERR "Huh? Status '$status' reported for unexpected file '$file'\n"
 263            unless defined $fullname;
 264
 265        # This response means the file does not exist except in
 266        # CVS's attic, so set the status accordingly
 267        $status = "In-attic"
 268            if $file =~ /^no file /
 269                && $status eq 'Up-to-date';
 270
 271        $cvsstat{$fullname{$file}} = $status
 272            if defined $fullname{$file};
 273      }
 274    }
 275}
 276
 277# ... Validate that new files have the correct status
 278foreach my $f (@afiles) {
 279    next unless defined(my $stat = $cvsstat{$f});
 280
 281    # This means the file has never been seen before
 282    next if $stat eq 'Unknown';
 283
 284    # This means the file has been seen before but was removed
 285    next if $stat eq 'In-attic';
 286
 287    $dirty = 1;
 288        warn "File $f is already known in your CVS checkout -- perhaps it has been added by another user. Or this may indicate that it exists on a different branch. If this is the case, use -f to force the merge.\n";
 289        warn "Status was: $cvsstat{$f}\n";
 290}
 291
 292# ... validate known files.
 293foreach my $f (@files) {
 294    next if grep { $_ eq $f } @afiles;
 295    # TODO:we need to handle removed in cvs
 296    unless (defined ($cvsstat{$f}) and $cvsstat{$f} eq "Up-to-date") {
 297        $dirty = 1;
 298        warn "File $f not up to date but has status '$cvsstat{$f}' in your CVS checkout!\n";
 299    }
 300
 301    # Depending on how your GIT tree got imported from CVS you may
 302    # have a conflict between expanded keywords in your CVS tree and
 303    # unexpanded keywords in the patch about to be applied.
 304    if ($opt_k) {
 305        my $orig_file ="$f.orig";
 306        rename $f, $orig_file;
 307        open(FILTER_IN, "<$orig_file") or die "Cannot open $orig_file\n";
 308        open(FILTER_OUT, ">$f") or die "Cannot open $f\n";
 309        while (<FILTER_IN>)
 310        {
 311            my $line = $_;
 312            $line =~ s/\$([A-Z][a-z]+):[^\$]+\$/\$$1\$/g;
 313            print FILTER_OUT $line;
 314        }
 315        close FILTER_IN;
 316        close FILTER_OUT;
 317    }
 318}
 319
 320if ($dirty) {
 321    if ($opt_f) {       warn "The tree is not clean -- forced merge\n";
 322        $dirty = 0;
 323    } else {
 324        die "Exiting: your CVS tree is not clean for this merge.";
 325    }
 326}
 327
 328print "Applying\n";
 329if ($opt_W) {
 330    system("git checkout -q $commit^0") && die "cannot patch";
 331} else {
 332    `GIT_DIR= git-apply $context --summary --numstat --apply <.cvsexportcommit.diff` || die "cannot patch";
 333}
 334
 335print "Patch applied successfully. Adding new files and directories to CVS\n";
 336my $dirtypatch = 0;
 337
 338#
 339# We have to add the directories in order otherwise we will have
 340# problems when we try and add the sub-directory of a directory we
 341# have not added yet.
 342#
 343# Luckily this is easy to deal with by sorting the directories and
 344# dealing with the shortest ones first.
 345#
 346@dirs = sort { length $a <=> length $b} @dirs;
 347
 348foreach my $d (@dirs) {
 349    if (system(@cvs,'add',$d)) {
 350        $dirtypatch = 1;
 351        warn "Failed to cvs add directory $d -- you may need to do it manually";
 352    }
 353}
 354
 355foreach my $f (@afiles) {
 356    if (grep { $_ eq $f } @bfiles) {
 357      system(@cvs, 'add','-kb',$f);
 358    } else {
 359      system(@cvs, 'add', $f);
 360    }
 361    if ($?) {
 362        $dirtypatch = 1;
 363        warn "Failed to cvs add $f -- you may need to do it manually";
 364    }
 365}
 366
 367foreach my $f (@dfiles) {
 368    system(@cvs, 'rm', '-f', $f);
 369    if ($?) {
 370        $dirtypatch = 1;
 371        warn "Failed to cvs rm -f $f -- you may need to do it manually";
 372    }
 373}
 374
 375print "Commit to CVS\n";
 376print "Patch title (first comment line): $title\n";
 377my @commitfiles = map { unless (m/\s/) { '\''.$_.'\''; } else { $_; }; } (@files);
 378my $cmd = join(' ', @cvs)." commit -F .msg @commitfiles";
 379
 380if ($dirtypatch) {
 381    print "NOTE: One or more hunks failed to apply cleanly.\n";
 382    print "You'll need to apply the patch in .cvsexportcommit.diff manually\n";
 383    print "using a patch program. After applying the patch and resolving the\n";
 384    print "problems you may commit using:";
 385    print "\n    cd \"$opt_w\"" if $opt_w;
 386    print "\n    $cmd\n";
 387    print "\n    git checkout $go_back_to\n" if $go_back_to;
 388    print "\n";
 389    exit(1);
 390}
 391
 392if ($opt_c) {
 393    print "Autocommit\n  $cmd\n";
 394    print xargs_safe_pipe_capture([@cvs, 'commit', '-F', '.msg'], @files);
 395    if ($?) {
 396        die "Exiting: The commit did not succeed";
 397    }
 398    print "Committed successfully to CVS\n";
 399    # clean up
 400    unlink(".msg");
 401} else {
 402    print "Ready for you to commit, just run:\n\n   $cmd\n";
 403}
 404
 405# clean up
 406unlink(".cvsexportcommit.diff");
 407
 408if ($opt_W) {
 409    system("git checkout $go_back_to") && die "cannot move back to $go_back_to";
 410    if (!($go_back_to =~ /^[0-9a-fA-F]{40}$/)) {
 411        system("git symbolic-ref HEAD $go_back_to") &&
 412            die "cannot move back to $go_back_to";
 413    }
 414}
 415
 416# CVS version 1.11.x and 1.12.x sleeps the wrong way to ensure the timestamp
 417# used by CVS and the one set by subsequence file modifications are different.
 418# If they are not different CVS will not detect changes.
 419sleep(1);
 420
 421sub usage {
 422        print STDERR <<END;
 423Usage: GIT_DIR=/path/to/.git git cvsexportcommit [-h] [-p] [-v] [-c] [-f] [-u] [-k] [-w cvsworkdir] [-m msgprefix] [ parent ] commit
 424END
 425        exit(1);
 426}
 427
 428# An alternative to `command` that allows input to be passed as an array
 429# to work around shell problems with weird characters in arguments
 430# if the exec returns non-zero we die
 431sub safe_pipe_capture {
 432    my @output;
 433    if (my $pid = open my $child, '-|') {
 434        @output = (<$child>);
 435        close $child or die join(' ',@_).": $! $?";
 436    } else {
 437        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
 438    }
 439    return wantarray ? @output : join('',@output);
 440}
 441
 442sub xargs_safe_pipe_capture {
 443        my $MAX_ARG_LENGTH = 65536;
 444        my $cmd = shift;
 445        my @output;
 446        my $output;
 447        while(@_) {
 448                my @args;
 449                my $length = 0;
 450                while(@_ && $length < $MAX_ARG_LENGTH) {
 451                        push @args, shift;
 452                        $length += length($args[$#args]);
 453                }
 454                if (wantarray) {
 455                        push @output, safe_pipe_capture(@$cmd, @args);
 456                }
 457                else {
 458                        $output .= safe_pipe_capture(@$cmd, @args);
 459                }
 460        }
 461        return wantarray ? @output : $output;
 462}