git-archimport.perlon commit archimport: remove String::ShellQuote dependency. (f88961a)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Spec;
  57use File::Temp qw(tempfile tempdir);
  58use File::Path qw(mkpath rmtree);
  59use File::Basename qw(basename dirname);
  60use Time::Local;
  61use IO::Socket;
  62use IO::Pipe;
  63use POSIX qw(strftime dup2);
  64use Data::Dumper qw/ Dumper /;
  65use IPC::Open2;
  66
  67$SIG{'PIPE'}="IGNORE";
  68$ENV{'TZ'}="UTC";
  69
  70my $git_dir = $ENV{"GIT_DIR"} || ".git";
  71$ENV{"GIT_DIR"} = $git_dir;
  72my $ptag_dir = "$git_dir/archimport/tags";
  73
  74our($opt_h,$opt_v, $opt_T,$opt_t,$opt_o);
  75
  76sub usage() {
  77    print STDERR <<END;
  78Usage: ${\basename $0}     # fetch/update GIT from Arch
  79       [ -o ] [ -h ] [ -v ] [ -T ] [ -t tempdir ] 
  80       repository/arch-branch [ repository/arch-branch] ...
  81END
  82    exit(1);
  83}
  84
  85getopts("Thvt:") or usage();
  86usage if $opt_h;
  87
  88@ARGV >= 1 or usage();
  89my @arch_roots = @ARGV;
  90
  91my ($tmpdir, $tmpdirname) = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  92my $tmp = $opt_t || 1;
  93$tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  94$opt_v && print "+ Using $tmp as temporary directory\n";
  95
  96my @psets  = ();                # the collection
  97my %psets  = ();                # the collection, by name
  98
  99my %rptags = ();                # my reverse private tags
 100                                # to map a SHA1 to a commitid
 101my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 102
 103foreach my $root (@arch_roots) {
 104    my ($arepo, $abranch) = split(m!/!, $root);
 105    open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |" 
 106        or die "Problems with tla abrowse: $!";
 107    
 108    my %ps        = ();         # the current one
 109    my $mode      = '';
 110    my $lastseen  = '';
 111    
 112    while (<ABROWSE>) {
 113        chomp;
 114        
 115        # first record padded w 8 spaces
 116        if (s/^\s{8}\b//) {
 117            
 118            # store the record we just captured
 119            if (%ps) {
 120                my %temp = %ps; # break references
 121                push (@psets, \%temp);
 122                $psets{$temp{id}} = \%temp;
 123                %ps = ();
 124            }
 125            
 126            my ($id, $type) = split(m/\s{3}/, $_);
 127            $ps{id}   = $id;
 128            $ps{repo} = $arepo;
 129
 130            # deal with types
 131            if ($type =~ m/^\(simple changeset\)/) {
 132                $ps{type} = 's';
 133            } elsif ($type eq '(initial import)') {
 134                $ps{type} = 'i';
 135            } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
 136                $ps{type} = 't';
 137                $ps{tag}  = $1;
 138            } else { 
 139                warn "Unknown type $type";
 140            }
 141            $lastseen = 'id';
 142        }
 143        
 144        if (s/^\s{10}//) { 
 145            # 10 leading spaces or more 
 146            # indicate commit metadata
 147            
 148            # date & author 
 149            if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
 150                
 151                my ($date, $authoremail) = split(m/\s{2,}/, $_);
 152                $ps{date}   = $date;
 153                $ps{date}   =~ s/\bGMT$//; # strip off trailign GMT
 154                if ($ps{date} =~ m/\b\w+$/) {
 155                    warn 'Arch dates not in GMT?! - imported dates will be wrong';
 156                }
 157            
 158                $authoremail =~ m/^(.+)\s(\S+)$/;
 159                $ps{author} = $1;
 160                $ps{email}  = $2;
 161            
 162                $lastseen = 'date';
 163            
 164            } elsif ($lastseen eq 'date') {
 165                # the only hint is position
 166                # subject is after date
 167                $ps{subj} = $_;
 168                $lastseen = 'subj';
 169            
 170            } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
 171                $ps{merges} = [];
 172                $lastseen = 'merges';
 173            
 174            } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 175                push (@{$ps{merges}}, $_);
 176            } else {
 177                warn 'more metadata after merges!?';
 178            }
 179            
 180        }
 181    }
 182
 183    if (%ps) {
 184        my %temp = %ps;         # break references
 185        push (@psets, \%temp);  
 186        $psets{ $temp{id} } = \%temp;
 187        %ps = ();
 188    }    
 189    close ABROWSE;
 190}                               # end foreach $root
 191
 192## Order patches by time
 193@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 194
 195#print Dumper \@psets;
 196
 197##
 198## TODO cleanup irrelevant patches
 199##      and put an initial import
 200##      or a full tag
 201my $import = 0;
 202unless (-d $git_dir) { # initial import
 203    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 204        print "Starting import from $psets[0]{id}\n";
 205        `git-init-db`;
 206        die $! if $?;
 207        $import = 1;
 208    } else {
 209        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 210    }
 211} else {    # progressing an import
 212    # load the rptags
 213    opendir(DIR, "$git_dir/archimport/tags")
 214        || die "can't opendir: $!";
 215    while (my $file = readdir(DIR)) {
 216        # skip non-interesting-files
 217        next unless -f "$ptag_dir/$file";
 218   
 219        # convert first '--' to '/' from old git-archimport to use
 220        # as an archivename/c--b--v private tag
 221        if ($file !~ m!,!) {
 222            my $oldfile = $file;
 223            $file =~ s!--!,!;
 224            print STDERR "converting old tag $oldfile to $file\n";
 225            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 226        }
 227        my $sha = ptag($file);
 228        chomp $sha;
 229        $rptags{$sha} = $file;
 230    }
 231    closedir DIR;
 232}
 233
 234# process patchsets
 235# extract the Arch repository name (Arch "archive" in Arch-speak)
 236sub extract_reponame {
 237    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 238    return (split(/\//, $fq_cvbr))[0];
 239}
 240 
 241sub extract_versionname {
 242    my $name = shift;
 243    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 244    return $name;
 245}
 246
 247# convert a fully-qualified revision or version to a unique dirname:
 248#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 249# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 250#
 251# the git notion of a branch is closer to
 252# archive/category--branch--version than archive/category--branch, so we
 253# use this to convert to git branch names.
 254# Also, keep archive names but replace '/' with ',' since it won't require
 255# subdirectories, and is safer than swapping '--' which could confuse
 256# reverse-mapping when dealing with bastard branches that
 257# are just archive/category--version  (no --branch)
 258sub tree_dirname {
 259    my $revision = shift;
 260    my $name = extract_versionname($revision);
 261    $name =~ s#/#,#;
 262    return $name;
 263}
 264
 265# old versions of git-archimport just use the <category--branch> part:
 266sub old_style_branchname {
 267    my $id = shift;
 268    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 269    chomp $ret;
 270    return $ret;
 271}
 272
 273*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 274
 275# process patchsets
 276foreach my $ps (@psets) {
 277    $ps->{branch} = git_branchname($ps->{id});
 278
 279    #
 280    # ensure we have a clean state 
 281    # 
 282    if (`git diff-files`) {
 283        die "Unclean tree when about to process $ps->{id} " .
 284            " - did we fail to commit cleanly before?";
 285    }
 286    die $! if $?;
 287
 288    #
 289    # skip commits already in repo
 290    #
 291    if (ptag($ps->{id})) {
 292      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 293      next;
 294    }
 295
 296    print " * Starting to work on $ps->{id}\n";
 297
 298    # 
 299    # create the branch if needed
 300    #
 301    if ($ps->{type} eq 'i' && !$import) {
 302        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 303    }
 304
 305    unless ($import) { # skip for import
 306        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 307            # we know about this branch
 308            system('git-checkout',$ps->{branch});
 309        } else {
 310            # new branch! we need to verify a few things
 311            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 312            my $branchpoint = ptag($ps->{tag});
 313            die "Tagging from unknown id unsupported: $ps->{tag}" 
 314                unless $branchpoint;
 315            
 316            # find where we are supposed to branch from
 317            system('git-checkout','-b',$ps->{branch},$branchpoint);
 318
 319            # If we trust Arch with the fact that this is just 
 320            # a tag, and it does not affect the state of the tree
 321            # then we just tag and move on
 322            tag($ps->{id}, $branchpoint);
 323            ptag($ps->{id}, $branchpoint);
 324            print " * Tagged $ps->{id} at $branchpoint\n";
 325            next;
 326        } 
 327        die $! if $?;
 328    } 
 329
 330    #
 331    # Apply the import/changeset/merge into the working tree
 332    # 
 333    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 334        apply_import($ps) or die $!;
 335        $import=0;
 336    } elsif ($ps->{type} eq 's') {
 337        apply_cset($ps);
 338    }
 339
 340    #
 341    # prepare update git's index, based on what arch knows
 342    # about the pset, resolve parents, etc
 343    #
 344    my $tree;
 345    
 346    my $commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 347    die "Error in cat-archive-log: $!" if $?;
 348        
 349    # parselog will git-add/rm files
 350    # and generally prepare things for the commit
 351    # NOTE: parselog will shell-quote filenames! 
 352    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
 353    my $logmessage = "$sum\n$msg";
 354
 355
 356    # imports don't give us good info
 357    # on added files. Shame on them
 358    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
 359        `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
 360        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
 361    }
 362
 363    if (@$add) {
 364        while (@$add) {
 365            my @slice = splice(@$add, 0, 100);
 366            my $slice = join(' ', @slice);          
 367            `git-update-index --add $slice`;
 368            die "Error in git-update-index --add: $!" if $?;
 369        }
 370    }
 371    if (@$del) {
 372        foreach my $file (@$del) {
 373            unlink $file or die "Problems deleting $file : $!";
 374        }
 375        while (@$del) {
 376            my @slice = splice(@$del, 0, 100);
 377            my $slice = join(' ', @slice);
 378            `git-update-index --remove $slice`;
 379            die "Error in git-update-index --remove: $!" if $?;
 380        }
 381    }
 382    if (@$ren) {                # renamed
 383        if (@$ren % 2) {
 384            die "Odd number of entries in rename!?";
 385        }
 386        ;
 387        while (@$ren) {
 388            my $from = pop @$ren;
 389            my $to   = pop @$ren;           
 390
 391            unless (-d dirname($to)) {
 392                mkpath(dirname($to)); # will die on err
 393            }
 394            #print "moving $from $to";
 395            `mv $from $to`;
 396            die "Error renaming $from $to : $!" if $?;
 397            `git-update-index --remove $from`;
 398            die "Error in git-update-index --remove: $!" if $?;
 399            `git-update-index --add $to`;
 400            die "Error in git-update-index --add: $!" if $?;
 401        }
 402
 403    }
 404    if (@$mod) {                # must be _after_ renames
 405        while (@$mod) {
 406            my @slice = splice(@$mod, 0, 100);
 407            my $slice = join(' ', @slice);
 408            `git-update-index $slice`;
 409            die "Error in git-update-index: $!" if $?;
 410        }
 411    }
 412
 413    # warn "errors when running git-update-index! $!";
 414    $tree = `git-write-tree`;
 415    die "cannot write tree $!" if $?;
 416    chomp $tree;
 417        
 418    
 419    #
 420    # Who's your daddy?
 421    #
 422    my @par;
 423    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 424        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 425            my $p = <HEAD>;
 426            close HEAD;
 427            chomp $p;
 428            push @par, '-p', $p;
 429        } else { 
 430            if ($ps->{type} eq 's') {
 431                warn "Could not find the right head for the branch $ps->{branch}";
 432            }
 433        }
 434    }
 435    
 436    if ($ps->{merges}) {
 437        push @par, find_parents($ps);
 438    }
 439
 440    #    
 441    # Commit, tag and clean state
 442    #
 443    $ENV{TZ}                  = 'GMT';
 444    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 445    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 446    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 447    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 448    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 449    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 450
 451    my ($pid, $commit_rh, $commit_wh);
 452    $commit_rh = 'commit_rh';
 453    $commit_wh = 'commit_wh';
 454    
 455    $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 456        or die $!;
 457    print WRITER $logmessage;   # write
 458    close WRITER;
 459    my $commitid = <READER>;    # read
 460    chomp $commitid;
 461    close READER;
 462    waitpid $pid,0;             # close;
 463
 464    if (length $commitid != 40) {
 465        die "Something went wrong with the commit! $! $commitid";
 466    }
 467    #
 468    # Update the branch
 469    # 
 470    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 471    print HEAD $commitid;
 472    close HEAD;
 473    system('git-update-ref', 'HEAD', "$ps->{branch}");
 474
 475    # tag accordingly
 476    ptag($ps->{id}, $commitid); # private tag
 477    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 478        tag($ps->{id}, $commitid);
 479    }
 480    print " * Committed $ps->{id}\n";
 481    print "   + tree   $tree\n";
 482    print "   + commit $commitid\n";
 483    $opt_v && print "   + commit date is  $ps->{date} \n";
 484    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 485}
 486
 487sub apply_import {
 488    my $ps = shift;
 489    my $bname = git_branchname($ps->{id});
 490
 491    mkpath($tmp);
 492
 493    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 494    die "Cannot get import: $!" if $?;    
 495    system('rsync','-aI','--delete', '--exclude',$git_dir,
 496                '--exclude','.arch-ids','--exclude','{arch}',
 497                "$tmp/import/", './');
 498    die "Cannot rsync import:$!" if $?;
 499    
 500    rmtree("$tmp/import");
 501    die "Cannot remove tempdir: $!" if $?;
 502    
 503
 504    return 1;
 505}
 506
 507sub apply_cset {
 508    my $ps = shift;
 509
 510    mkpath($tmp);
 511
 512    # get the changeset
 513    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 514    die "Cannot get changeset: $!" if $?;
 515    
 516    # apply patches
 517    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 518        # this can be sped up considerably by doing
 519        #    (find | xargs cat) | patch
 520        # but that cna get mucked up by patches
 521        # with missing trailing newlines or the standard 
 522        # 'missing newline' flag in the patch - possibly
 523        # produced with an old/buggy diff.
 524        # slow and safe, we invoke patch once per patchfile
 525        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 526        die "Problem applying patches! $!" if $?;
 527    }
 528
 529    # apply changed binary files
 530    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 531        foreach my $mod (@modified) {
 532            chomp $mod;
 533            my $orig = $mod;
 534            $orig =~ s/\.modified$//; # lazy
 535            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 536            #print "rsync -p '$mod' '$orig'";
 537            system('rsync','-p',$mod,"./$orig");
 538            die "Problem applying binary changes! $!" if $?;
 539        }
 540    }
 541
 542    # bring in new files
 543    system('rsync','-aI','--exclude',$git_dir,
 544                '--exclude','.arch-ids',
 545                '--exclude', '{arch}',
 546                "$tmp/changeset/new-files-archive/",'./');
 547
 548    # deleted files are hinted from the commitlog processing
 549
 550    rmtree("$tmp/changeset");
 551}
 552
 553
 554# =for reference
 555# A log entry looks like 
 556# Revision: moodle-org--moodle--1.3.3--patch-15
 557# Archive: arch-eduforge@catalyst.net.nz--2004
 558# Creator: Penny Leach <penny@catalyst.net.nz>
 559# Date: Wed May 25 14:15:34 NZST 2005
 560# Standard-date: 2005-05-25 02:15:34 GMT
 561# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 562#     lang/de/.arch-ids/block_html.php.id
 563# New-directories: lang/de/help/questionnaire
 564#     lang/de/help/questionnaire/.arch-ids
 565# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 566#    db_sears.sql db/db_sears.sql
 567# Removed-files: lang/be/docs/.arch-ids/release.html.id
 568#     lang/be/docs/.arch-ids/releaseold.html.id
 569# Modified-files: admin/cron.php admin/delete.php
 570#     admin/editor.html backup/lib.php backup/restore.php
 571# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 572# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 573# Keywords:
 574#
 575# Updating yadda tadda tadda madda
 576sub parselog {
 577    my $log = shift;
 578    #print $log;
 579
 580    my (@add, @del, @mod, @ren, @kw, $sum, $msg );
 581
 582    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
 583        my $files = $1;
 584        @add = split(m/\s+/s, $files);
 585    }
 586       
 587    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
 588        my $files = $1;
 589        @del = split(m/\s+/s, $files);
 590    }
 591    
 592    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
 593        my $files = $1;
 594        @mod = split(m/\s+/s, $files);
 595    }
 596    
 597    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
 598        my $files = $1;
 599        @ren = split(m/\s+/s, $files);
 600    }
 601
 602    $sum ='';
 603    if ($log =~ m/^Summary:(.+?)$/m ) {
 604        $sum = $1;
 605        $sum =~ s/^\s+//;
 606        $sum =~ s/\s+$//;
 607    }
 608
 609    $msg = '';
 610    if ($log =~ m/\n\n(.+)$/s) {
 611        $msg = $1;
 612        $msg =~ s/^\s+//;
 613        $msg =~ s/\s+$//;
 614    }
 615
 616
 617    # cleanup the arrays
 618    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
 619        my @tmp = ();
 620        while (my $t = pop @$ref) {
 621            next unless length ($t);
 622            next if $t =~ m!\{arch\}/!;
 623            next if $t =~ m!\.arch-ids/!;
 624            next if $t =~ m!\.arch-inventory$!;
 625           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 626           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 627           if  ($t =~ /\\/ ){
 628               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 629           }
 630            push (@tmp, $t);
 631        }
 632        @$ref = @tmp;
 633    }
 634    
 635    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
 636    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
 637}
 638
 639# write/read a tag
 640sub tag {
 641    my ($tag, $commit) = @_;
 642 
 643    if ($opt_o) {
 644        $tag =~ s|/|--|g;
 645    } else {
 646        # don't use subdirs for tags yet, it could screw up other porcelains
 647        $tag =~ s|/|,|g;
 648    }
 649    
 650    if ($commit) {
 651        open(C,">","$git_dir/refs/tags/$tag")
 652            or die "Cannot create tag $tag: $!\n";
 653        print C "$commit\n"
 654            or die "Cannot write tag $tag: $!\n";
 655        close(C)
 656            or die "Cannot write tag $tag: $!\n";
 657        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 658    } else {                    # read
 659        open(C,"<","$git_dir/refs/tags/$tag")
 660            or die "Cannot read tag $tag: $!\n";
 661        $commit = <C>;
 662        chomp $commit;
 663        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 664        close(C)
 665            or die "Cannot read tag $tag: $!\n";
 666        return $commit;
 667    }
 668}
 669
 670# write/read a private tag
 671# reads fail softly if the tag isn't there
 672sub ptag {
 673    my ($tag, $commit) = @_;
 674
 675    # don't use subdirs for tags yet, it could screw up other porcelains
 676    $tag =~ s|/|,|g; 
 677    
 678    my $tag_file = "$ptag_dir/$tag";
 679    my $tag_branch_dir = dirname($tag_file);
 680    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 681
 682    if ($commit) {              # write
 683        open(C,">",$tag_file)
 684            or die "Cannot create tag $tag: $!\n";
 685        print C "$commit\n"
 686            or die "Cannot write tag $tag: $!\n";
 687        close(C)
 688            or die "Cannot write tag $tag: $!\n";
 689        $rptags{$commit} = $tag 
 690            unless $tag =~ m/--base-0$/;
 691    } else {                    # read
 692        # if the tag isn't there, return 0
 693        unless ( -s $tag_file) {
 694            return 0;
 695        }
 696        open(C,"<",$tag_file)
 697            or die "Cannot read tag $tag: $!\n";
 698        $commit = <C>;
 699        chomp $commit;
 700        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 701        close(C)
 702            or die "Cannot read tag $tag: $!\n";
 703        unless (defined $rptags{$commit}) {
 704            $rptags{$commit} = $tag;
 705        }
 706        return $commit;
 707    }
 708}
 709
 710sub find_parents {
 711    #
 712    # Identify what branches are merging into me
 713    # and whether we are fully merged
 714    # git-merge-base <headsha> <headsha> should tell
 715    # me what the base of the merge should be 
 716    #
 717    my $ps = shift;
 718
 719    my %branches; # holds an arrayref per branch
 720                  # the arrayref contains a list of
 721                  # merged patches between the base
 722                  # of the merge and the current head
 723
 724    my @parents;  # parents found for this commit
 725
 726    # simple loop to split the merges
 727    # per branch
 728    foreach my $merge (@{$ps->{merges}}) {
 729        my $branch = git_branchname($merge);
 730        unless (defined $branches{$branch} ){
 731            $branches{$branch} = [];
 732        }
 733        push @{$branches{$branch}}, $merge;
 734    }
 735
 736    #
 737    # foreach branch find a merge base and walk it to the 
 738    # head where we are, collecting the merged patchsets that
 739    # Arch has recorded. Keep that in @have
 740    # Compare that with the commits on the other branch
 741    # between merge-base and the tip of the branch (@need)
 742    # and see if we have a series of consecutive patches
 743    # starting from the merge base. The tip of the series
 744    # of consecutive patches merged is our new parent for 
 745    # that branch.
 746    #
 747    foreach my $branch (keys %branches) {
 748
 749        # check that we actually know about the branch
 750        next unless -e "$git_dir/refs/heads/$branch";
 751
 752        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 753        if ($?) { 
 754            # Don't die here, Arch supports one-way cherry-picking
 755            # between branches with no common base (or any relationship
 756            # at all beforehand)
 757            warn "Cannot find merge base for $branch and $ps->{branch}";
 758            next;
 759        }
 760        chomp $mergebase;
 761
 762        # now walk up to the mergepoint collecting what patches we have
 763        my $branchtip = git_rev_parse($ps->{branch});
 764        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 765        my %have; # collected merges this branch has
 766        foreach my $merge (@{$ps->{merges}}) {
 767            $have{$merge} = 1;
 768        }
 769        my %ancestorshave;
 770        foreach my $par (@ancestors) {
 771            $par = commitid2pset($par);
 772            if (defined $par->{merges}) {
 773                foreach my $merge (@{$par->{merges}}) {
 774                    $ancestorshave{$merge}=1;
 775                }
 776            }
 777        }
 778        # print "++++ Merges in $ps->{id} are....\n";
 779        # my @have = sort keys %have;   print Dumper(\@have);
 780
 781        # merge what we have with what ancestors have
 782        %have = (%have, %ancestorshave);
 783
 784        # see what the remote branch has - these are the merges we 
 785        # will want to have in a consecutive series from the mergebase
 786        my $otherbranchtip = git_rev_parse($branch);
 787        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 788        my @need;
 789        foreach my $needps (@needraw) {         # get the psets
 790            $needps = commitid2pset($needps);
 791            # git-rev-list will also
 792            # list commits merged in via earlier 
 793            # merges. we are only interested in commits
 794            # from the branch we're looking at
 795            if ($branch eq $needps->{branch}) {
 796                push @need, $needps->{id};
 797            }
 798        }
 799
 800        # print "++++ Merges from $branch we want are....\n";
 801        # print Dumper(\@need);
 802
 803        my $newparent;
 804        while (my $needed_commit = pop @need) {
 805            if ($have{$needed_commit}) {
 806                $newparent = $needed_commit;
 807            } else {
 808                last; # break out of the while
 809            }
 810        }
 811        if ($newparent) {
 812            push @parents, $newparent;
 813        }
 814
 815
 816    } # end foreach branch
 817
 818    # prune redundant parents
 819    my %parents;
 820    foreach my $p (@parents) {
 821        $parents{$p} = 1;
 822    }
 823    foreach my $p (@parents) {
 824        next unless exists $psets{$p}{merges};
 825        next unless ref    $psets{$p}{merges};
 826        my @merges = @{$psets{$p}{merges}};
 827        foreach my $merge (@merges) {
 828            if ($parents{$merge}) { 
 829                delete $parents{$merge};
 830            }
 831        }
 832    }
 833    @parents = ();
 834    foreach (keys %parents) {
 835        push @parents, '-p', ptag($_);
 836    }
 837    return @parents;
 838}
 839
 840sub git_rev_parse {
 841    my $name = shift;
 842    my $val  = `git-rev-parse $name`;
 843    die "Error: git-rev-parse $name" if $?;
 844    chomp $val;
 845    return $val;
 846}
 847
 848# resolve a SHA1 to a known patchset
 849sub commitid2pset {
 850    my $commitid = shift;
 851    chomp $commitid;
 852    my $name = $rptags{$commitid} 
 853        || die "Cannot find reverse tag mapping for $commitid";
 854    $name =~ s|,|/|;
 855    my $ps   = $psets{$name} 
 856        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 857    return $ps;
 858}
 859
 860# an alterative to `command` that allows input to be passed as an array
 861# to work around shell problems with weird characters in arguments
 862sub safe_pipe_capture {
 863    my @output;
 864    if (my $pid = open my $child, '-|') {
 865        @output = (<$child>);
 866        close $child or die join(' ',@_).": $! $?";
 867    } else {
 868        exec(@_) or die $?; # exec() can fail the executable can't be found
 869    }
 870    return wantarray ? @output : join('',@output);
 871}
 872
 873