git-archimport.perlon commit archimport: first, make sure it still compiles (2777ef7)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Spec;
  57use File::Temp qw(tempfile tempdir);
  58use File::Path qw(mkpath);
  59use File::Basename qw(basename dirname);
  60use String::ShellQuote;
  61use Time::Local;
  62use IO::Socket;
  63use IO::Pipe;
  64use POSIX qw(strftime dup2);
  65use Data::Dumper qw/ Dumper /;
  66use IPC::Open2;
  67
  68$SIG{'PIPE'}="IGNORE";
  69$ENV{'TZ'}="UTC";
  70
  71my $git_dir = $ENV{"GIT_DIR"} || ".git";
  72$ENV{"GIT_DIR"} = $git_dir;
  73my $ptag_dir = "$git_dir/archimport/tags";
  74
  75our($opt_h,$opt_v, $opt_T,$opt_t,$opt_o);
  76
  77sub usage() {
  78    print STDERR <<END;
  79Usage: ${\basename $0}     # fetch/update GIT from Arch
  80       [ -o ] [ -h ] [ -v ] [ -T ] [ -t tempdir ] 
  81       repository/arch-branch [ repository/arch-branch] ...
  82END
  83    exit(1);
  84}
  85
  86getopts("Thvt:") or usage();
  87usage if $opt_h;
  88
  89@ARGV >= 1 or usage();
  90my @arch_roots = @ARGV;
  91
  92my ($tmpdir, $tmpdirname) = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  93my $tmp = $opt_t || 1;
  94$tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  95$opt_v && print "+ Using $tmp as temporary directory\n";
  96
  97my @psets  = ();                # the collection
  98my %psets  = ();                # the collection, by name
  99
 100my %rptags = ();                # my reverse private tags
 101                                # to map a SHA1 to a commitid
 102my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 103
 104foreach my $root (@arch_roots) {
 105    my ($arepo, $abranch) = split(m!/!, $root);
 106    open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |" 
 107        or die "Problems with tla abrowse: $!";
 108    
 109    my %ps        = ();         # the current one
 110    my $mode      = '';
 111    my $lastseen  = '';
 112    
 113    while (<ABROWSE>) {
 114        chomp;
 115        
 116        # first record padded w 8 spaces
 117        if (s/^\s{8}\b//) {
 118            
 119            # store the record we just captured
 120            if (%ps) {
 121                my %temp = %ps; # break references
 122                push (@psets, \%temp);
 123                $psets{$temp{id}} = \%temp;
 124                %ps = ();
 125            }
 126            
 127            my ($id, $type) = split(m/\s{3}/, $_);
 128            $ps{id}   = $id;
 129            $ps{repo} = $arepo;
 130
 131            # deal with types
 132            if ($type =~ m/^\(simple changeset\)/) {
 133                $ps{type} = 's';
 134            } elsif ($type eq '(initial import)') {
 135                $ps{type} = 'i';
 136            } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
 137                $ps{type} = 't';
 138                $ps{tag}  = $1;
 139            } else { 
 140                warn "Unknown type $type";
 141            }
 142            $lastseen = 'id';
 143        }
 144        
 145        if (s/^\s{10}//) { 
 146            # 10 leading spaces or more 
 147            # indicate commit metadata
 148            
 149            # date & author 
 150            if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
 151                
 152                my ($date, $authoremail) = split(m/\s{2,}/, $_);
 153                $ps{date}   = $date;
 154                $ps{date}   =~ s/\bGMT$//; # strip off trailign GMT
 155                if ($ps{date} =~ m/\b\w+$/) {
 156                    warn 'Arch dates not in GMT?! - imported dates will be wrong';
 157                }
 158            
 159                $authoremail =~ m/^(.+)\s(\S+)$/;
 160                $ps{author} = $1;
 161                $ps{email}  = $2;
 162            
 163                $lastseen = 'date';
 164            
 165            } elsif ($lastseen eq 'date') {
 166                # the only hint is position
 167                # subject is after date
 168                $ps{subj} = $_;
 169                $lastseen = 'subj';
 170            
 171            } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
 172                $ps{merges} = [];
 173                $lastseen = 'merges';
 174            
 175            } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 176                push (@{$ps{merges}}, $_);
 177            } else {
 178                warn 'more metadata after merges!?';
 179            }
 180            
 181        }
 182    }
 183
 184    if (%ps) {
 185        my %temp = %ps;         # break references
 186        push (@psets, \%temp);  
 187        $psets{ $temp{id} } = \%temp;
 188        %ps = ();
 189    }    
 190    close ABROWSE;
 191}                               # end foreach $root
 192
 193## Order patches by time
 194@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 195
 196#print Dumper \@psets;
 197
 198##
 199## TODO cleanup irrelevant patches
 200##      and put an initial import
 201##      or a full tag
 202my $import = 0;
 203unless (-d $git_dir) { # initial import
 204    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 205        print "Starting import from $psets[0]{id}\n";
 206        `git-init-db`;
 207        die $! if $?;
 208        $import = 1;
 209    } else {
 210        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 211    }
 212} else {    # progressing an import
 213    # load the rptags
 214    opendir(DIR, "$git_dir/archimport/tags")
 215        || die "can't opendir: $!";
 216    while (my $file = readdir(DIR)) {
 217        # skip non-interesting-files
 218        next unless -f "$ptag_dir/$file";
 219   
 220        # convert first '--' to '/' from old git-archimport to use
 221        # as an archivename/c--b--v private tag
 222        if ($file !~ m!,!) {
 223            my $oldfile = $file;
 224            $file =~ s!--!,!;
 225            print STDERR "converting old tag $oldfile to $file\n";
 226            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 227        }
 228        my $sha = ptag($file);
 229        chomp $sha;
 230        $rptags{$sha} = $file;
 231    }
 232    closedir DIR;
 233}
 234
 235# process patchsets
 236# extract the Arch repository name (Arch "archive" in Arch-speak)
 237sub extract_reponame {
 238    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 239    return (split(/\//, $fq_cvbr))[0];
 240}
 241 
 242sub extract_versionname {
 243    my $name = shift;
 244    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 245    return $name;
 246}
 247
 248# convert a fully-qualified revision or version to a unique dirname:
 249#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 250# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 251#
 252# the git notion of a branch is closer to
 253# archive/category--branch--version than archive/category--branch, so we
 254# use this to convert to git branch names.
 255# Also, keep archive names but replace '/' with ',' since it won't require
 256# subdirectories, and is safer than swapping '--' which could confuse
 257# reverse-mapping when dealing with bastard branches that
 258# are just archive/category--version  (no --branch)
 259sub tree_dirname {
 260    my $revision = shift;
 261    my $name = extract_versionname($revision);
 262    $name =~ s#/#,#;
 263    return $name;
 264}
 265
 266# old versions of git-archimport just use the <category--branch> part:
 267sub old_style_branchname {
 268    my $id = shift;
 269    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 270    chomp $ret;
 271    return $ret;
 272}
 273
 274*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 275
 276# process patchsets
 277foreach my $ps (@psets) {
 278    $ps->{branch} = git_branchname($ps->{id});
 279
 280    #
 281    # ensure we have a clean state 
 282    # 
 283    if (`git diff-files`) {
 284        die "Unclean tree when about to process $ps->{id} " .
 285            " - did we fail to commit cleanly before?";
 286    }
 287    die $! if $?;
 288
 289    #
 290    # skip commits already in repo
 291    #
 292    if (ptag($ps->{id})) {
 293      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 294      next;
 295    }
 296
 297    print " * Starting to work on $ps->{id}\n";
 298
 299    # 
 300    # create the branch if needed
 301    #
 302    if ($ps->{type} eq 'i' && !$import) {
 303        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 304    }
 305
 306    unless ($import) { # skip for import
 307        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 308            # we know about this branch
 309            `git checkout    $ps->{branch}`;
 310        } else {
 311            # new branch! we need to verify a few things
 312            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 313            my $branchpoint = ptag($ps->{tag});
 314            die "Tagging from unknown id unsupported: $ps->{tag}" 
 315                unless $branchpoint;
 316            
 317            # find where we are supposed to branch from
 318            `git checkout -b $ps->{branch} $branchpoint`;
 319
 320            # If we trust Arch with the fact that this is just 
 321            # a tag, and it does not affect the state of the tree
 322            # then we just tag and move on
 323            tag($ps->{id}, $branchpoint);
 324            ptag($ps->{id}, $branchpoint);
 325            print " * Tagged $ps->{id} at $branchpoint\n";
 326            next;
 327        } 
 328        die $! if $?;
 329    } 
 330
 331    #
 332    # Apply the import/changeset/merge into the working tree
 333    # 
 334    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 335        apply_import($ps) or die $!;
 336        $import=0;
 337    } elsif ($ps->{type} eq 's') {
 338        apply_cset($ps);
 339    }
 340
 341    #
 342    # prepare update git's index, based on what arch knows
 343    # about the pset, resolve parents, etc
 344    #
 345    my $tree;
 346    
 347    my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`; 
 348    die "Error in cat-archive-log: $!" if $?;
 349        
 350    # parselog will git-add/rm files
 351    # and generally prepare things for the commit
 352    # NOTE: parselog will shell-quote filenames! 
 353    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
 354    my $logmessage = "$sum\n$msg";
 355
 356
 357    # imports don't give us good info
 358    # on added files. Shame on them
 359    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
 360        `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
 361        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
 362    }
 363
 364    if (@$add) {
 365        while (@$add) {
 366            my @slice = splice(@$add, 0, 100);
 367            my $slice = join(' ', @slice);          
 368            `git-update-index --add $slice`;
 369            die "Error in git-update-index --add: $!" if $?;
 370        }
 371    }
 372    if (@$del) {
 373        foreach my $file (@$del) {
 374            unlink $file or die "Problems deleting $file : $!";
 375        }
 376        while (@$del) {
 377            my @slice = splice(@$del, 0, 100);
 378            my $slice = join(' ', @slice);
 379            `git-update-index --remove $slice`;
 380            die "Error in git-update-index --remove: $!" if $?;
 381        }
 382    }
 383    if (@$ren) {                # renamed
 384        if (@$ren % 2) {
 385            die "Odd number of entries in rename!?";
 386        }
 387        ;
 388        while (@$ren) {
 389            my $from = pop @$ren;
 390            my $to   = pop @$ren;           
 391
 392            unless (-d dirname($to)) {
 393                mkpath(dirname($to)); # will die on err
 394            }
 395            #print "moving $from $to";
 396            `mv $from $to`;
 397            die "Error renaming $from $to : $!" if $?;
 398            `git-update-index --remove $from`;
 399            die "Error in git-update-index --remove: $!" if $?;
 400            `git-update-index --add $to`;
 401            die "Error in git-update-index --add: $!" if $?;
 402        }
 403
 404    }
 405    if (@$mod) {                # must be _after_ renames
 406        while (@$mod) {
 407            my @slice = splice(@$mod, 0, 100);
 408            my $slice = join(' ', @slice);
 409            `git-update-index $slice`;
 410            die "Error in git-update-index: $!" if $?;
 411        }
 412    }
 413
 414    # warn "errors when running git-update-index! $!";
 415    $tree = `git-write-tree`;
 416    die "cannot write tree $!" if $?;
 417    chomp $tree;
 418        
 419    
 420    #
 421    # Who's your daddy?
 422    #
 423    my @par;
 424    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 425        if (open HEAD, "<$git_dir/refs/heads/$ps->{branch}") {
 426            my $p = <HEAD>;
 427            close HEAD;
 428            chomp $p;
 429            push @par, '-p', $p;
 430        } else { 
 431            if ($ps->{type} eq 's') {
 432                warn "Could not find the right head for the branch $ps->{branch}";
 433            }
 434        }
 435    }
 436    
 437    if ($ps->{merges}) {
 438        push @par, find_parents($ps);
 439    }
 440    my $par = join (' ', @par);
 441
 442    #    
 443    # Commit, tag and clean state
 444    #
 445    $ENV{TZ}                  = 'GMT';
 446    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 447    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 448    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 449    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 450    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 451    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 452
 453    my ($pid, $commit_rh, $commit_wh);
 454    $commit_rh = 'commit_rh';
 455    $commit_wh = 'commit_wh';
 456    
 457    $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par") 
 458        or die $!;
 459    print WRITER $logmessage;   # write
 460    close WRITER;
 461    my $commitid = <READER>;    # read
 462    chomp $commitid;
 463    close READER;
 464    waitpid $pid,0;             # close;
 465
 466    if (length $commitid != 40) {
 467        die "Something went wrong with the commit! $! $commitid";
 468    }
 469    #
 470    # Update the branch
 471    # 
 472    open  HEAD, ">$git_dir/refs/heads/$ps->{branch}";
 473    print HEAD $commitid;
 474    close HEAD;
 475    system('git-update-ref', 'HEAD', "$ps->{branch}");
 476
 477    # tag accordingly
 478    ptag($ps->{id}, $commitid); # private tag
 479    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 480        tag($ps->{id}, $commitid);
 481    }
 482    print " * Committed $ps->{id}\n";
 483    print "   + tree   $tree\n";
 484    print "   + commit $commitid\n";
 485    $opt_v && print "   + commit date is  $ps->{date} \n";
 486    $opt_v && print "   + parents:  $par \n";
 487}
 488
 489sub apply_import {
 490    my $ps = shift;
 491    my $bname = git_branchname($ps->{id});
 492
 493    `mkdir -p $tmp`;
 494
 495    `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
 496    die "Cannot get import: $!" if $?;    
 497    `rsync -v --archive --delete --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
 498    die "Cannot rsync import:$!" if $?;
 499    
 500    `rm -fr $tmp/import`;
 501    die "Cannot remove tempdir: $!" if $?;
 502    
 503
 504    return 1;
 505}
 506
 507sub apply_cset {
 508    my $ps = shift;
 509
 510    `mkdir -p $tmp`;
 511
 512    # get the changeset
 513    `tla get-changeset  -A $ps->{repo} $ps->{id} $tmp/changeset`;
 514    die "Cannot get changeset: $!" if $?;
 515    
 516    # apply patches
 517    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 518        # this can be sped up considerably by doing
 519        #    (find | xargs cat) | patch
 520        # but that cna get mucked up by patches
 521        # with missing trailing newlines or the standard 
 522        # 'missing newline' flag in the patch - possibly
 523        # produced with an old/buggy diff.
 524        # slow and safe, we invoke patch once per patchfile
 525        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 526        die "Problem applying patches! $!" if $?;
 527    }
 528
 529    # apply changed binary files
 530    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 531        foreach my $mod (@modified) {
 532            chomp $mod;
 533            my $orig = $mod;
 534            $orig =~ s/\.modified$//; # lazy
 535            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 536            #print "rsync -p '$mod' '$orig'";
 537            `rsync -p $mod ./$orig`;
 538            die "Problem applying binary changes! $!" if $?;
 539        }
 540    }
 541
 542    # bring in new files
 543    `rsync --archive --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
 544
 545    # deleted files are hinted from the commitlog processing
 546
 547    `rm -fr $tmp/changeset`;
 548}
 549
 550
 551# =for reference
 552# A log entry looks like 
 553# Revision: moodle-org--moodle--1.3.3--patch-15
 554# Archive: arch-eduforge@catalyst.net.nz--2004
 555# Creator: Penny Leach <penny@catalyst.net.nz>
 556# Date: Wed May 25 14:15:34 NZST 2005
 557# Standard-date: 2005-05-25 02:15:34 GMT
 558# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 559#     lang/de/.arch-ids/block_html.php.id
 560# New-directories: lang/de/help/questionnaire
 561#     lang/de/help/questionnaire/.arch-ids
 562# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 563#    db_sears.sql db/db_sears.sql
 564# Removed-files: lang/be/docs/.arch-ids/release.html.id
 565#     lang/be/docs/.arch-ids/releaseold.html.id
 566# Modified-files: admin/cron.php admin/delete.php
 567#     admin/editor.html backup/lib.php backup/restore.php
 568# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 569# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 570# Keywords:
 571#
 572# Updating yadda tadda tadda madda
 573sub parselog {
 574    my $log = shift;
 575    #print $log;
 576
 577    my (@add, @del, @mod, @ren, @kw, $sum, $msg );
 578
 579    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
 580        my $files = $1;
 581        @add = split(m/\s+/s, $files);
 582    }
 583       
 584    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
 585        my $files = $1;
 586        @del = split(m/\s+/s, $files);
 587    }
 588    
 589    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
 590        my $files = $1;
 591        @mod = split(m/\s+/s, $files);
 592    }
 593    
 594    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
 595        my $files = $1;
 596        @ren = split(m/\s+/s, $files);
 597    }
 598
 599    $sum ='';
 600    if ($log =~ m/^Summary:(.+?)$/m ) {
 601        $sum = $1;
 602        $sum =~ s/^\s+//;
 603        $sum =~ s/\s+$//;
 604    }
 605
 606    $msg = '';
 607    if ($log =~ m/\n\n(.+)$/s) {
 608        $msg = $1;
 609        $msg =~ s/^\s+//;
 610        $msg =~ s/\s+$//;
 611    }
 612
 613
 614    # cleanup the arrays
 615    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
 616        my @tmp = ();
 617        while (my $t = pop @$ref) {
 618            next unless length ($t);
 619            next if $t =~ m!\{arch\}/!;
 620            next if $t =~ m!\.arch-ids/!;
 621            next if $t =~ m!\.arch-inventory$!;
 622           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 623           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 624           if  ($t =~ /\\/ ){
 625               $t = `tla escape --unescaped '$t'`;
 626           }
 627            push (@tmp, shell_quote($t));
 628        }
 629        @$ref = @tmp;
 630    }
 631    
 632    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
 633    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
 634}
 635
 636# write/read a tag
 637sub tag {
 638    my ($tag, $commit) = @_;
 639 
 640    if ($opt_o) {
 641        $tag =~ s|/|--|g;
 642    } else {
 643        # don't use subdirs for tags yet, it could screw up other porcelains
 644        $tag =~ s|/|,|g;
 645    }
 646    
 647    if ($commit) {
 648        open(C,">","$git_dir/refs/tags/$tag")
 649            or die "Cannot create tag $tag: $!\n";
 650        print C "$commit\n"
 651            or die "Cannot write tag $tag: $!\n";
 652        close(C)
 653            or die "Cannot write tag $tag: $!\n";
 654        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 655    } else {                    # read
 656        open(C,"<","$git_dir/refs/tags/$tag")
 657            or die "Cannot read tag $tag: $!\n";
 658        $commit = <C>;
 659        chomp $commit;
 660        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 661        close(C)
 662            or die "Cannot read tag $tag: $!\n";
 663        return $commit;
 664    }
 665}
 666
 667# write/read a private tag
 668# reads fail softly if the tag isn't there
 669sub ptag {
 670    my ($tag, $commit) = @_;
 671
 672    # don't use subdirs for tags yet, it could screw up other porcelains
 673    $tag =~ s|/|,|g; 
 674    
 675    my $tag_file = "$ptag_dir/$tag";
 676    my $tag_branch_dir = dirname($tag_file);
 677    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 678
 679    if ($commit) {              # write
 680        open(C,">",$tag_file)
 681            or die "Cannot create tag $tag: $!\n";
 682        print C "$commit\n"
 683            or die "Cannot write tag $tag: $!\n";
 684        close(C)
 685            or die "Cannot write tag $tag: $!\n";
 686        $rptags{$commit} = $tag 
 687            unless $tag =~ m/--base-0$/;
 688    } else {                    # read
 689        # if the tag isn't there, return 0
 690        unless ( -s $tag_file) {
 691            return 0;
 692        }
 693        open(C,"<",$tag_file)
 694            or die "Cannot read tag $tag: $!\n";
 695        $commit = <C>;
 696        chomp $commit;
 697        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 698        close(C)
 699            or die "Cannot read tag $tag: $!\n";
 700        unless (defined $rptags{$commit}) {
 701            $rptags{$commit} = $tag;
 702        }
 703        return $commit;
 704    }
 705}
 706
 707sub find_parents {
 708    #
 709    # Identify what branches are merging into me
 710    # and whether we are fully merged
 711    # git-merge-base <headsha> <headsha> should tell
 712    # me what the base of the merge should be 
 713    #
 714    my $ps = shift;
 715
 716    my %branches; # holds an arrayref per branch
 717                  # the arrayref contains a list of
 718                  # merged patches between the base
 719                  # of the merge and the current head
 720
 721    my @parents;  # parents found for this commit
 722
 723    # simple loop to split the merges
 724    # per branch
 725    foreach my $merge (@{$ps->{merges}}) {
 726        my $branch = git_branchname($merge);
 727        unless (defined $branches{$branch} ){
 728            $branches{$branch} = [];
 729        }
 730        push @{$branches{$branch}}, $merge;
 731    }
 732
 733    #
 734    # foreach branch find a merge base and walk it to the 
 735    # head where we are, collecting the merged patchsets that
 736    # Arch has recorded. Keep that in @have
 737    # Compare that with the commits on the other branch
 738    # between merge-base and the tip of the branch (@need)
 739    # and see if we have a series of consecutive patches
 740    # starting from the merge base. The tip of the series
 741    # of consecutive patches merged is our new parent for 
 742    # that branch.
 743    #
 744    foreach my $branch (keys %branches) {
 745
 746        # check that we actually know about the branch
 747        next unless -e "$git_dir/refs/heads/$branch";
 748
 749        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 750        if ($?) { 
 751            # Don't die here, Arch supports one-way cherry-picking
 752            # between branches with no common base (or any relationship
 753            # at all beforehand)
 754            warn "Cannot find merge base for $branch and $ps->{branch}";
 755            next;
 756        }
 757        chomp $mergebase;
 758
 759        # now walk up to the mergepoint collecting what patches we have
 760        my $branchtip = git_rev_parse($ps->{branch});
 761        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 762        my %have; # collected merges this branch has
 763        foreach my $merge (@{$ps->{merges}}) {
 764            $have{$merge} = 1;
 765        }
 766        my %ancestorshave;
 767        foreach my $par (@ancestors) {
 768            $par = commitid2pset($par);
 769            if (defined $par->{merges}) {
 770                foreach my $merge (@{$par->{merges}}) {
 771                    $ancestorshave{$merge}=1;
 772                }
 773            }
 774        }
 775        # print "++++ Merges in $ps->{id} are....\n";
 776        # my @have = sort keys %have;   print Dumper(\@have);
 777
 778        # merge what we have with what ancestors have
 779        %have = (%have, %ancestorshave);
 780
 781        # see what the remote branch has - these are the merges we 
 782        # will want to have in a consecutive series from the mergebase
 783        my $otherbranchtip = git_rev_parse($branch);
 784        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 785        my @need;
 786        foreach my $needps (@needraw) {         # get the psets
 787            $needps = commitid2pset($needps);
 788            # git-rev-list will also
 789            # list commits merged in via earlier 
 790            # merges. we are only interested in commits
 791            # from the branch we're looking at
 792            if ($branch eq $needps->{branch}) {
 793                push @need, $needps->{id};
 794            }
 795        }
 796
 797        # print "++++ Merges from $branch we want are....\n";
 798        # print Dumper(\@need);
 799
 800        my $newparent;
 801        while (my $needed_commit = pop @need) {
 802            if ($have{$needed_commit}) {
 803                $newparent = $needed_commit;
 804            } else {
 805                last; # break out of the while
 806            }
 807        }
 808        if ($newparent) {
 809            push @parents, $newparent;
 810        }
 811
 812
 813    } # end foreach branch
 814
 815    # prune redundant parents
 816    my %parents;
 817    foreach my $p (@parents) {
 818        $parents{$p} = 1;
 819    }
 820    foreach my $p (@parents) {
 821        next unless exists $psets{$p}{merges};
 822        next unless ref    $psets{$p}{merges};
 823        my @merges = @{$psets{$p}{merges}};
 824        foreach my $merge (@merges) {
 825            if ($parents{$merge}) { 
 826                delete $parents{$merge};
 827            }
 828        }
 829    }
 830    @parents = keys %parents;
 831    @parents = map { " -p " . ptag($_) } @parents;
 832    return @parents;
 833}
 834
 835sub git_rev_parse {
 836    my $name = shift;
 837    my $val  = `git-rev-parse $name`;
 838    die "Error: git-rev-parse $name" if $?;
 839    chomp $val;
 840    return $val;
 841}
 842
 843# resolve a SHA1 to a known patchset
 844sub commitid2pset {
 845    my $commitid = shift;
 846    chomp $commitid;
 847    my $name = $rptags{$commitid} 
 848        || die "Cannot find reverse tag mapping for $commitid";
 849    $name =~ s|,|/|;
 850    my $ps   = $psets{$name} 
 851        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 852    return $ps;
 853}
 854
 855# an alterative to `command` that allows input to be passed as an array
 856# to work around shell problems with weird characters in arguments
 857sub safe_pipe_capture {
 858    my @output;
 859    if (my $pid = open my $child, '-|') {
 860        @output = (<$child>);
 861        close $child or die join(' ',@_).": $! $?";
 862    } else {
 863        exec(@_) or die $?; # exec() can fail the executable can't be found
 864    }
 865    return wantarray ? @output : join('',@output);
 866}
 867
 868