git-archimport.perlon commit archimport: fix -t tmpdir switch (5744f27)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Spec;
  57use File::Temp qw(tempfile tempdir);
  58use File::Path qw(mkpath rmtree);
  59use File::Basename qw(basename dirname);
  60use Time::Local;
  61use IO::Socket;
  62use IO::Pipe;
  63use POSIX qw(strftime dup2);
  64use Data::Dumper qw/ Dumper /;
  65use IPC::Open2;
  66
  67$SIG{'PIPE'}="IGNORE";
  68$ENV{'TZ'}="UTC";
  69
  70my $git_dir = $ENV{"GIT_DIR"} || ".git";
  71$ENV{"GIT_DIR"} = $git_dir;
  72my $ptag_dir = "$git_dir/archimport/tags";
  73
  74our($opt_h,$opt_v, $opt_T,$opt_t,$opt_o);
  75
  76sub usage() {
  77    print STDERR <<END;
  78Usage: ${\basename $0}     # fetch/update GIT from Arch
  79       [ -o ] [ -h ] [ -v ] [ -T ] [ -t tempdir ] 
  80       repository/arch-branch [ repository/arch-branch] ...
  81END
  82    exit(1);
  83}
  84
  85getopts("Thvt:") or usage();
  86usage if $opt_h;
  87
  88@ARGV >= 1 or usage();
  89my @arch_roots = @ARGV;
  90
  91$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  92my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  93$opt_v && print "+ Using $tmp as temporary directory\n";
  94
  95my @psets  = ();                # the collection
  96my %psets  = ();                # the collection, by name
  97
  98my %rptags = ();                # my reverse private tags
  99                                # to map a SHA1 to a commitid
 100my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 101
 102foreach my $root (@arch_roots) {
 103    my ($arepo, $abranch) = split(m!/!, $root);
 104    open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |" 
 105        or die "Problems with tla abrowse: $!";
 106    
 107    my %ps        = ();         # the current one
 108    my $mode      = '';
 109    my $lastseen  = '';
 110    
 111    while (<ABROWSE>) {
 112        chomp;
 113        
 114        # first record padded w 8 spaces
 115        if (s/^\s{8}\b//) {
 116            
 117            # store the record we just captured
 118            if (%ps) {
 119                my %temp = %ps; # break references
 120                push (@psets, \%temp);
 121                $psets{$temp{id}} = \%temp;
 122                %ps = ();
 123            }
 124            
 125            my ($id, $type) = split(m/\s{3}/, $_);
 126            $ps{id}   = $id;
 127            $ps{repo} = $arepo;
 128
 129            # deal with types
 130            if ($type =~ m/^\(simple changeset\)/) {
 131                $ps{type} = 's';
 132            } elsif ($type eq '(initial import)') {
 133                $ps{type} = 'i';
 134            } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
 135                $ps{type} = 't';
 136                $ps{tag}  = $1;
 137            } else { 
 138                warn "Unknown type $type";
 139            }
 140            $lastseen = 'id';
 141        }
 142        
 143        if (s/^\s{10}//) { 
 144            # 10 leading spaces or more 
 145            # indicate commit metadata
 146            
 147            # date & author 
 148            if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
 149                
 150                my ($date, $authoremail) = split(m/\s{2,}/, $_);
 151                $ps{date}   = $date;
 152                $ps{date}   =~ s/\bGMT$//; # strip off trailign GMT
 153                if ($ps{date} =~ m/\b\w+$/) {
 154                    warn 'Arch dates not in GMT?! - imported dates will be wrong';
 155                }
 156            
 157                $authoremail =~ m/^(.+)\s(\S+)$/;
 158                $ps{author} = $1;
 159                $ps{email}  = $2;
 160            
 161                $lastseen = 'date';
 162            
 163            } elsif ($lastseen eq 'date') {
 164                # the only hint is position
 165                # subject is after date
 166                $ps{subj} = $_;
 167                $lastseen = 'subj';
 168            
 169            } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
 170                $ps{merges} = [];
 171                $lastseen = 'merges';
 172            
 173            } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 174                push (@{$ps{merges}}, $_);
 175            } else {
 176                warn 'more metadata after merges!?';
 177            }
 178            
 179        }
 180    }
 181
 182    if (%ps) {
 183        my %temp = %ps;         # break references
 184        push (@psets, \%temp);  
 185        $psets{ $temp{id} } = \%temp;
 186        %ps = ();
 187    }    
 188    close ABROWSE;
 189}                               # end foreach $root
 190
 191## Order patches by time
 192@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 193
 194#print Dumper \@psets;
 195
 196##
 197## TODO cleanup irrelevant patches
 198##      and put an initial import
 199##      or a full tag
 200my $import = 0;
 201unless (-d $git_dir) { # initial import
 202    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 203        print "Starting import from $psets[0]{id}\n";
 204        `git-init-db`;
 205        die $! if $?;
 206        $import = 1;
 207    } else {
 208        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 209    }
 210} else {    # progressing an import
 211    # load the rptags
 212    opendir(DIR, "$git_dir/archimport/tags")
 213        || die "can't opendir: $!";
 214    while (my $file = readdir(DIR)) {
 215        # skip non-interesting-files
 216        next unless -f "$ptag_dir/$file";
 217   
 218        # convert first '--' to '/' from old git-archimport to use
 219        # as an archivename/c--b--v private tag
 220        if ($file !~ m!,!) {
 221            my $oldfile = $file;
 222            $file =~ s!--!,!;
 223            print STDERR "converting old tag $oldfile to $file\n";
 224            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 225        }
 226        my $sha = ptag($file);
 227        chomp $sha;
 228        $rptags{$sha} = $file;
 229    }
 230    closedir DIR;
 231}
 232
 233# process patchsets
 234# extract the Arch repository name (Arch "archive" in Arch-speak)
 235sub extract_reponame {
 236    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 237    return (split(/\//, $fq_cvbr))[0];
 238}
 239 
 240sub extract_versionname {
 241    my $name = shift;
 242    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 243    return $name;
 244}
 245
 246# convert a fully-qualified revision or version to a unique dirname:
 247#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 248# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 249#
 250# the git notion of a branch is closer to
 251# archive/category--branch--version than archive/category--branch, so we
 252# use this to convert to git branch names.
 253# Also, keep archive names but replace '/' with ',' since it won't require
 254# subdirectories, and is safer than swapping '--' which could confuse
 255# reverse-mapping when dealing with bastard branches that
 256# are just archive/category--version  (no --branch)
 257sub tree_dirname {
 258    my $revision = shift;
 259    my $name = extract_versionname($revision);
 260    $name =~ s#/#,#;
 261    return $name;
 262}
 263
 264# old versions of git-archimport just use the <category--branch> part:
 265sub old_style_branchname {
 266    my $id = shift;
 267    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 268    chomp $ret;
 269    return $ret;
 270}
 271
 272*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 273
 274# process patchsets
 275foreach my $ps (@psets) {
 276    $ps->{branch} = git_branchname($ps->{id});
 277
 278    #
 279    # ensure we have a clean state 
 280    # 
 281    if (`git diff-files`) {
 282        die "Unclean tree when about to process $ps->{id} " .
 283            " - did we fail to commit cleanly before?";
 284    }
 285    die $! if $?;
 286
 287    #
 288    # skip commits already in repo
 289    #
 290    if (ptag($ps->{id})) {
 291      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 292      next;
 293    }
 294
 295    print " * Starting to work on $ps->{id}\n";
 296
 297    # 
 298    # create the branch if needed
 299    #
 300    if ($ps->{type} eq 'i' && !$import) {
 301        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 302    }
 303
 304    unless ($import) { # skip for import
 305        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 306            # we know about this branch
 307            system('git-checkout',$ps->{branch});
 308        } else {
 309            # new branch! we need to verify a few things
 310            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 311            my $branchpoint = ptag($ps->{tag});
 312            die "Tagging from unknown id unsupported: $ps->{tag}" 
 313                unless $branchpoint;
 314            
 315            # find where we are supposed to branch from
 316            system('git-checkout','-b',$ps->{branch},$branchpoint);
 317
 318            # If we trust Arch with the fact that this is just 
 319            # a tag, and it does not affect the state of the tree
 320            # then we just tag and move on
 321            tag($ps->{id}, $branchpoint);
 322            ptag($ps->{id}, $branchpoint);
 323            print " * Tagged $ps->{id} at $branchpoint\n";
 324            next;
 325        } 
 326        die $! if $?;
 327    } 
 328
 329    #
 330    # Apply the import/changeset/merge into the working tree
 331    # 
 332    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 333        apply_import($ps) or die $!;
 334        $import=0;
 335    } elsif ($ps->{type} eq 's') {
 336        apply_cset($ps);
 337    }
 338
 339    #
 340    # prepare update git's index, based on what arch knows
 341    # about the pset, resolve parents, etc
 342    #
 343    my $tree;
 344    
 345    my $commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 346    die "Error in cat-archive-log: $!" if $?;
 347        
 348    # parselog will git-add/rm files
 349    # and generally prepare things for the commit
 350    # NOTE: parselog will shell-quote filenames! 
 351    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
 352    my $logmessage = "$sum\n$msg";
 353
 354
 355    # imports don't give us good info
 356    # on added files. Shame on them
 357    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
 358        `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
 359        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
 360    }
 361
 362    if (@$add) {
 363        while (@$add) {
 364            my @slice = splice(@$add, 0, 100);
 365            my $slice = join(' ', @slice);          
 366            `git-update-index --add $slice`;
 367            die "Error in git-update-index --add: $!" if $?;
 368        }
 369    }
 370    if (@$del) {
 371        foreach my $file (@$del) {
 372            unlink $file or die "Problems deleting $file : $!";
 373        }
 374        while (@$del) {
 375            my @slice = splice(@$del, 0, 100);
 376            my $slice = join(' ', @slice);
 377            `git-update-index --remove $slice`;
 378            die "Error in git-update-index --remove: $!" if $?;
 379        }
 380    }
 381    if (@$ren) {                # renamed
 382        if (@$ren % 2) {
 383            die "Odd number of entries in rename!?";
 384        }
 385        ;
 386        while (@$ren) {
 387            my $from = pop @$ren;
 388            my $to   = pop @$ren;           
 389
 390            unless (-d dirname($to)) {
 391                mkpath(dirname($to)); # will die on err
 392            }
 393            #print "moving $from $to";
 394            `mv $from $to`;
 395            die "Error renaming $from $to : $!" if $?;
 396            `git-update-index --remove $from`;
 397            die "Error in git-update-index --remove: $!" if $?;
 398            `git-update-index --add $to`;
 399            die "Error in git-update-index --add: $!" if $?;
 400        }
 401
 402    }
 403    if (@$mod) {                # must be _after_ renames
 404        while (@$mod) {
 405            my @slice = splice(@$mod, 0, 100);
 406            my $slice = join(' ', @slice);
 407            `git-update-index $slice`;
 408            die "Error in git-update-index: $!" if $?;
 409        }
 410    }
 411
 412    # warn "errors when running git-update-index! $!";
 413    $tree = `git-write-tree`;
 414    die "cannot write tree $!" if $?;
 415    chomp $tree;
 416        
 417    
 418    #
 419    # Who's your daddy?
 420    #
 421    my @par;
 422    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 423        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 424            my $p = <HEAD>;
 425            close HEAD;
 426            chomp $p;
 427            push @par, '-p', $p;
 428        } else { 
 429            if ($ps->{type} eq 's') {
 430                warn "Could not find the right head for the branch $ps->{branch}";
 431            }
 432        }
 433    }
 434    
 435    if ($ps->{merges}) {
 436        push @par, find_parents($ps);
 437    }
 438
 439    #    
 440    # Commit, tag and clean state
 441    #
 442    $ENV{TZ}                  = 'GMT';
 443    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 444    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 445    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 446    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 447    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 448    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 449
 450    my ($pid, $commit_rh, $commit_wh);
 451    $commit_rh = 'commit_rh';
 452    $commit_wh = 'commit_wh';
 453    
 454    $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 455        or die $!;
 456    print WRITER $logmessage;   # write
 457    close WRITER;
 458    my $commitid = <READER>;    # read
 459    chomp $commitid;
 460    close READER;
 461    waitpid $pid,0;             # close;
 462
 463    if (length $commitid != 40) {
 464        die "Something went wrong with the commit! $! $commitid";
 465    }
 466    #
 467    # Update the branch
 468    # 
 469    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 470    print HEAD $commitid;
 471    close HEAD;
 472    system('git-update-ref', 'HEAD', "$ps->{branch}");
 473
 474    # tag accordingly
 475    ptag($ps->{id}, $commitid); # private tag
 476    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 477        tag($ps->{id}, $commitid);
 478    }
 479    print " * Committed $ps->{id}\n";
 480    print "   + tree   $tree\n";
 481    print "   + commit $commitid\n";
 482    $opt_v && print "   + commit date is  $ps->{date} \n";
 483    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 484}
 485
 486sub apply_import {
 487    my $ps = shift;
 488    my $bname = git_branchname($ps->{id});
 489
 490    mkpath($tmp);
 491
 492    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 493    die "Cannot get import: $!" if $?;    
 494    system('rsync','-aI','--delete', '--exclude',$git_dir,
 495                '--exclude','.arch-ids','--exclude','{arch}',
 496                "$tmp/import/", './');
 497    die "Cannot rsync import:$!" if $?;
 498    
 499    rmtree("$tmp/import");
 500    die "Cannot remove tempdir: $!" if $?;
 501    
 502
 503    return 1;
 504}
 505
 506sub apply_cset {
 507    my $ps = shift;
 508
 509    mkpath($tmp);
 510
 511    # get the changeset
 512    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 513    die "Cannot get changeset: $!" if $?;
 514    
 515    # apply patches
 516    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 517        # this can be sped up considerably by doing
 518        #    (find | xargs cat) | patch
 519        # but that cna get mucked up by patches
 520        # with missing trailing newlines or the standard 
 521        # 'missing newline' flag in the patch - possibly
 522        # produced with an old/buggy diff.
 523        # slow and safe, we invoke patch once per patchfile
 524        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 525        die "Problem applying patches! $!" if $?;
 526    }
 527
 528    # apply changed binary files
 529    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 530        foreach my $mod (@modified) {
 531            chomp $mod;
 532            my $orig = $mod;
 533            $orig =~ s/\.modified$//; # lazy
 534            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 535            #print "rsync -p '$mod' '$orig'";
 536            system('rsync','-p',$mod,"./$orig");
 537            die "Problem applying binary changes! $!" if $?;
 538        }
 539    }
 540
 541    # bring in new files
 542    system('rsync','-aI','--exclude',$git_dir,
 543                '--exclude','.arch-ids',
 544                '--exclude', '{arch}',
 545                "$tmp/changeset/new-files-archive/",'./');
 546
 547    # deleted files are hinted from the commitlog processing
 548
 549    rmtree("$tmp/changeset");
 550}
 551
 552
 553# =for reference
 554# A log entry looks like 
 555# Revision: moodle-org--moodle--1.3.3--patch-15
 556# Archive: arch-eduforge@catalyst.net.nz--2004
 557# Creator: Penny Leach <penny@catalyst.net.nz>
 558# Date: Wed May 25 14:15:34 NZST 2005
 559# Standard-date: 2005-05-25 02:15:34 GMT
 560# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 561#     lang/de/.arch-ids/block_html.php.id
 562# New-directories: lang/de/help/questionnaire
 563#     lang/de/help/questionnaire/.arch-ids
 564# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 565#    db_sears.sql db/db_sears.sql
 566# Removed-files: lang/be/docs/.arch-ids/release.html.id
 567#     lang/be/docs/.arch-ids/releaseold.html.id
 568# Modified-files: admin/cron.php admin/delete.php
 569#     admin/editor.html backup/lib.php backup/restore.php
 570# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 571# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 572# Keywords:
 573#
 574# Updating yadda tadda tadda madda
 575sub parselog {
 576    my $log = shift;
 577    #print $log;
 578
 579    my (@add, @del, @mod, @ren, @kw, $sum, $msg );
 580
 581    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
 582        my $files = $1;
 583        @add = split(m/\s+/s, $files);
 584    }
 585       
 586    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
 587        my $files = $1;
 588        @del = split(m/\s+/s, $files);
 589    }
 590    
 591    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
 592        my $files = $1;
 593        @mod = split(m/\s+/s, $files);
 594    }
 595    
 596    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
 597        my $files = $1;
 598        @ren = split(m/\s+/s, $files);
 599    }
 600
 601    $sum ='';
 602    if ($log =~ m/^Summary:(.+?)$/m ) {
 603        $sum = $1;
 604        $sum =~ s/^\s+//;
 605        $sum =~ s/\s+$//;
 606    }
 607
 608    $msg = '';
 609    if ($log =~ m/\n\n(.+)$/s) {
 610        $msg = $1;
 611        $msg =~ s/^\s+//;
 612        $msg =~ s/\s+$//;
 613    }
 614
 615
 616    # cleanup the arrays
 617    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
 618        my @tmp = ();
 619        while (my $t = pop @$ref) {
 620            next unless length ($t);
 621            next if $t =~ m!\{arch\}/!;
 622            next if $t =~ m!\.arch-ids/!;
 623            next if $t =~ m!\.arch-inventory$!;
 624           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 625           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 626           if  ($t =~ /\\/ ){
 627               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 628           }
 629            push (@tmp, $t);
 630        }
 631        @$ref = @tmp;
 632    }
 633    
 634    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
 635    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
 636}
 637
 638# write/read a tag
 639sub tag {
 640    my ($tag, $commit) = @_;
 641 
 642    if ($opt_o) {
 643        $tag =~ s|/|--|g;
 644    } else {
 645        # don't use subdirs for tags yet, it could screw up other porcelains
 646        $tag =~ s|/|,|g;
 647    }
 648    
 649    if ($commit) {
 650        open(C,">","$git_dir/refs/tags/$tag")
 651            or die "Cannot create tag $tag: $!\n";
 652        print C "$commit\n"
 653            or die "Cannot write tag $tag: $!\n";
 654        close(C)
 655            or die "Cannot write tag $tag: $!\n";
 656        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 657    } else {                    # read
 658        open(C,"<","$git_dir/refs/tags/$tag")
 659            or die "Cannot read tag $tag: $!\n";
 660        $commit = <C>;
 661        chomp $commit;
 662        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 663        close(C)
 664            or die "Cannot read tag $tag: $!\n";
 665        return $commit;
 666    }
 667}
 668
 669# write/read a private tag
 670# reads fail softly if the tag isn't there
 671sub ptag {
 672    my ($tag, $commit) = @_;
 673
 674    # don't use subdirs for tags yet, it could screw up other porcelains
 675    $tag =~ s|/|,|g; 
 676    
 677    my $tag_file = "$ptag_dir/$tag";
 678    my $tag_branch_dir = dirname($tag_file);
 679    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 680
 681    if ($commit) {              # write
 682        open(C,">",$tag_file)
 683            or die "Cannot create tag $tag: $!\n";
 684        print C "$commit\n"
 685            or die "Cannot write tag $tag: $!\n";
 686        close(C)
 687            or die "Cannot write tag $tag: $!\n";
 688        $rptags{$commit} = $tag 
 689            unless $tag =~ m/--base-0$/;
 690    } else {                    # read
 691        # if the tag isn't there, return 0
 692        unless ( -s $tag_file) {
 693            return 0;
 694        }
 695        open(C,"<",$tag_file)
 696            or die "Cannot read tag $tag: $!\n";
 697        $commit = <C>;
 698        chomp $commit;
 699        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 700        close(C)
 701            or die "Cannot read tag $tag: $!\n";
 702        unless (defined $rptags{$commit}) {
 703            $rptags{$commit} = $tag;
 704        }
 705        return $commit;
 706    }
 707}
 708
 709sub find_parents {
 710    #
 711    # Identify what branches are merging into me
 712    # and whether we are fully merged
 713    # git-merge-base <headsha> <headsha> should tell
 714    # me what the base of the merge should be 
 715    #
 716    my $ps = shift;
 717
 718    my %branches; # holds an arrayref per branch
 719                  # the arrayref contains a list of
 720                  # merged patches between the base
 721                  # of the merge and the current head
 722
 723    my @parents;  # parents found for this commit
 724
 725    # simple loop to split the merges
 726    # per branch
 727    foreach my $merge (@{$ps->{merges}}) {
 728        my $branch = git_branchname($merge);
 729        unless (defined $branches{$branch} ){
 730            $branches{$branch} = [];
 731        }
 732        push @{$branches{$branch}}, $merge;
 733    }
 734
 735    #
 736    # foreach branch find a merge base and walk it to the 
 737    # head where we are, collecting the merged patchsets that
 738    # Arch has recorded. Keep that in @have
 739    # Compare that with the commits on the other branch
 740    # between merge-base and the tip of the branch (@need)
 741    # and see if we have a series of consecutive patches
 742    # starting from the merge base. The tip of the series
 743    # of consecutive patches merged is our new parent for 
 744    # that branch.
 745    #
 746    foreach my $branch (keys %branches) {
 747
 748        # check that we actually know about the branch
 749        next unless -e "$git_dir/refs/heads/$branch";
 750
 751        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 752        if ($?) { 
 753            # Don't die here, Arch supports one-way cherry-picking
 754            # between branches with no common base (or any relationship
 755            # at all beforehand)
 756            warn "Cannot find merge base for $branch and $ps->{branch}";
 757            next;
 758        }
 759        chomp $mergebase;
 760
 761        # now walk up to the mergepoint collecting what patches we have
 762        my $branchtip = git_rev_parse($ps->{branch});
 763        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 764        my %have; # collected merges this branch has
 765        foreach my $merge (@{$ps->{merges}}) {
 766            $have{$merge} = 1;
 767        }
 768        my %ancestorshave;
 769        foreach my $par (@ancestors) {
 770            $par = commitid2pset($par);
 771            if (defined $par->{merges}) {
 772                foreach my $merge (@{$par->{merges}}) {
 773                    $ancestorshave{$merge}=1;
 774                }
 775            }
 776        }
 777        # print "++++ Merges in $ps->{id} are....\n";
 778        # my @have = sort keys %have;   print Dumper(\@have);
 779
 780        # merge what we have with what ancestors have
 781        %have = (%have, %ancestorshave);
 782
 783        # see what the remote branch has - these are the merges we 
 784        # will want to have in a consecutive series from the mergebase
 785        my $otherbranchtip = git_rev_parse($branch);
 786        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 787        my @need;
 788        foreach my $needps (@needraw) {         # get the psets
 789            $needps = commitid2pset($needps);
 790            # git-rev-list will also
 791            # list commits merged in via earlier 
 792            # merges. we are only interested in commits
 793            # from the branch we're looking at
 794            if ($branch eq $needps->{branch}) {
 795                push @need, $needps->{id};
 796            }
 797        }
 798
 799        # print "++++ Merges from $branch we want are....\n";
 800        # print Dumper(\@need);
 801
 802        my $newparent;
 803        while (my $needed_commit = pop @need) {
 804            if ($have{$needed_commit}) {
 805                $newparent = $needed_commit;
 806            } else {
 807                last; # break out of the while
 808            }
 809        }
 810        if ($newparent) {
 811            push @parents, $newparent;
 812        }
 813
 814
 815    } # end foreach branch
 816
 817    # prune redundant parents
 818    my %parents;
 819    foreach my $p (@parents) {
 820        $parents{$p} = 1;
 821    }
 822    foreach my $p (@parents) {
 823        next unless exists $psets{$p}{merges};
 824        next unless ref    $psets{$p}{merges};
 825        my @merges = @{$psets{$p}{merges}};
 826        foreach my $merge (@merges) {
 827            if ($parents{$merge}) { 
 828                delete $parents{$merge};
 829            }
 830        }
 831    }
 832    @parents = ();
 833    foreach (keys %parents) {
 834        push @parents, '-p', ptag($_);
 835    }
 836    return @parents;
 837}
 838
 839sub git_rev_parse {
 840    my $name = shift;
 841    my $val  = `git-rev-parse $name`;
 842    die "Error: git-rev-parse $name" if $?;
 843    chomp $val;
 844    return $val;
 845}
 846
 847# resolve a SHA1 to a known patchset
 848sub commitid2pset {
 849    my $commitid = shift;
 850    chomp $commitid;
 851    my $name = $rptags{$commitid} 
 852        || die "Cannot find reverse tag mapping for $commitid";
 853    $name =~ s|,|/|;
 854    my $ps   = $psets{$name} 
 855        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 856    return $ps;
 857}
 858
 859# an alterative to `command` that allows input to be passed as an array
 860# to work around shell problems with weird characters in arguments
 861sub safe_pipe_capture {
 862    my @output;
 863    if (my $pid = open my $child, '-|') {
 864        @output = (<$child>);
 865        close $child or die join(' ',@_).": $! $?";
 866    } else {
 867        exec(@_) or die $?; # exec() can fail the executable can't be found
 868    }
 869    return wantarray ? @output : join('',@output);
 870}
 871
 872