git-archimport.perlon commit GIT 0.99.9l aka 1.0rc4 (423325a)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Spec;
  57use File::Temp qw(tempfile tempdir);
  58use File::Path qw(mkpath);
  59use File::Basename qw(basename dirname);
  60use String::ShellQuote;
  61use Time::Local;
  62use IO::Socket;
  63use IO::Pipe;
  64use POSIX qw(strftime dup2);
  65use Data::Dumper qw/ Dumper /;
  66use IPC::Open2;
  67
  68$SIG{'PIPE'}="IGNORE";
  69$ENV{'TZ'}="UTC";
  70
  71my $git_dir = $ENV{"GIT_DIR"} || ".git";
  72$ENV{"GIT_DIR"} = $git_dir;
  73my $ptag_dir = "$git_dir/archimport/tags";
  74
  75our($opt_h,$opt_v, $opt_T,$opt_t,$opt_o);
  76
  77sub usage() {
  78    print STDERR <<END;
  79Usage: ${\basename $0}     # fetch/update GIT from Arch
  80       [ -o ] [ -h ] [ -v ] [ -T ] [ -t tempdir ] 
  81       repository/arch-branch [ repository/arch-branch] ...
  82END
  83    exit(1);
  84}
  85
  86getopts("Thvt:") or usage();
  87usage if $opt_h;
  88
  89@ARGV >= 1 or usage();
  90my @arch_roots = @ARGV;
  91
  92my ($tmpdir, $tmpdirname) = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  93my $tmp = $opt_t || 1;
  94$tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  95$opt_v && print "+ Using $tmp as temporary directory\n";
  96
  97my @psets  = ();                # the collection
  98my %psets  = ();                # the collection, by name
  99
 100my %rptags = ();                # my reverse private tags
 101                                # to map a SHA1 to a commitid
 102
 103foreach my $root (@arch_roots) {
 104    my ($arepo, $abranch) = split(m!/!, $root);
 105    open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |" 
 106        or die "Problems with tla abrowse: $!";
 107    
 108    my %ps        = ();         # the current one
 109    my $mode      = '';
 110    my $lastseen  = '';
 111    
 112    while (<ABROWSE>) {
 113        chomp;
 114        
 115        # first record padded w 8 spaces
 116        if (s/^\s{8}\b//) {
 117            
 118            # store the record we just captured
 119            if (%ps) {
 120                my %temp = %ps; # break references
 121                push (@psets, \%temp);
 122                $psets{$temp{id}} = \%temp;
 123                %ps = ();
 124            }
 125            
 126            my ($id, $type) = split(m/\s{3}/, $_);
 127            $ps{id}   = $id;
 128            $ps{repo} = $arepo;
 129
 130            # deal with types
 131            if ($type =~ m/^\(simple changeset\)/) {
 132                $ps{type} = 's';
 133            } elsif ($type eq '(initial import)') {
 134                $ps{type} = 'i';
 135            } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
 136                $ps{type} = 't';
 137                $ps{tag}  = $1;
 138            } else { 
 139                warn "Unknown type $type";
 140            }
 141            $lastseen = 'id';
 142        }
 143        
 144        if (s/^\s{10}//) { 
 145            # 10 leading spaces or more 
 146            # indicate commit metadata
 147            
 148            # date & author 
 149            if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
 150                
 151                my ($date, $authoremail) = split(m/\s{2,}/, $_);
 152                $ps{date}   = $date;
 153                $ps{date}   =~ s/\bGMT$//; # strip off trailign GMT
 154                if ($ps{date} =~ m/\b\w+$/) {
 155                    warn 'Arch dates not in GMT?! - imported dates will be wrong';
 156                }
 157            
 158                $authoremail =~ m/^(.+)\s(\S+)$/;
 159                $ps{author} = $1;
 160                $ps{email}  = $2;
 161            
 162                $lastseen = 'date';
 163            
 164            } elsif ($lastseen eq 'date') {
 165                # the only hint is position
 166                # subject is after date
 167                $ps{subj} = $_;
 168                $lastseen = 'subj';
 169            
 170            } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
 171                $ps{merges} = [];
 172                $lastseen = 'merges';
 173            
 174            } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 175                push (@{$ps{merges}}, $_);
 176            } else {
 177                warn 'more metadata after merges!?';
 178            }
 179            
 180        }
 181    }
 182
 183    if (%ps) {
 184        my %temp = %ps;         # break references
 185        push (@psets, \%temp);  
 186        $psets{ $temp{id} } = \%temp;
 187        %ps = ();
 188    }    
 189    close ABROWSE;
 190}                               # end foreach $root
 191
 192## Order patches by time
 193@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 194
 195#print Dumper \@psets;
 196
 197##
 198## TODO cleanup irrelevant patches
 199##      and put an initial import
 200##      or a full tag
 201my $import = 0;
 202unless (-d $git_dir) { # initial import
 203    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 204        print "Starting import from $psets[0]{id}\n";
 205        `git-init-db`;
 206        die $! if $?;
 207        $import = 1;
 208    } else {
 209        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 210    }
 211} else {    # progressing an import
 212    # load the rptags
 213    opendir(DIR, "$git_dir/archimport/tags")
 214        || die "can't opendir: $!";
 215    while (my $file = readdir(DIR)) {
 216        # skip non-interesting-files
 217        next unless -f "$ptag_dir/$file";
 218   
 219        # convert first '--' to '/' from old git-archimport to use
 220        # as an archivename/c--b--v private tag
 221        if ($file !~ m!,!) {
 222            my $oldfile = $file;
 223            $file =~ s!--!,!;
 224            print STDERR "converting old tag $oldfile to $file\n";
 225            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 226        }
 227        my $sha = ptag($file);
 228        chomp $sha;
 229        $rptags{$sha} = $file;
 230    }
 231    closedir DIR;
 232}
 233
 234# process patchsets
 235# extract the Arch repository name (Arch "archive" in Arch-speak)
 236sub extract_reponame {
 237    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 238    return (split(/\//, $fq_cvbr))[0];
 239}
 240 
 241sub extract_versionname {
 242    my $name = shift;
 243    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 244    return $name;
 245}
 246
 247# convert a fully-qualified revision or version to a unique dirname:
 248#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 249# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 250#
 251# the git notion of a branch is closer to
 252# archive/category--branch--version than archive/category--branch, so we
 253# use this to convert to git branch names.
 254# Also, keep archive names but replace '/' with ',' since it won't require
 255# subdirectories, and is safer than swapping '--' which could confuse
 256# reverse-mapping when dealing with bastard branches that
 257# are just archive/category--version  (no --branch)
 258sub tree_dirname {
 259    my $revision = shift;
 260    my $name = extract_versionname($revision);
 261    $name =~ s#/#,#;
 262    return $name;
 263}
 264
 265# old versions of git-archimport just use the <category--branch> part:
 266sub old_style_branchname {
 267    my $id = shift;
 268    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 269    chomp $ret;
 270    return $ret;
 271}
 272
 273*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 274
 275# process patchsets
 276foreach my $ps (@psets) {
 277    $ps->{branch} = git_branchname($ps->{id});
 278
 279    #
 280    # ensure we have a clean state 
 281    # 
 282    if (`git diff-files`) {
 283        die "Unclean tree when about to process $ps->{id} " .
 284            " - did we fail to commit cleanly before?";
 285    }
 286    die $! if $?;
 287
 288    #
 289    # skip commits already in repo
 290    #
 291    if (ptag($ps->{id})) {
 292      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 293      next;
 294    }
 295
 296    print " * Starting to work on $ps->{id}\n";
 297
 298    # 
 299    # create the branch if needed
 300    #
 301    if ($ps->{type} eq 'i' && !$import) {
 302        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 303    }
 304
 305    unless ($import) { # skip for import
 306        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 307            # we know about this branch
 308            `git checkout    $ps->{branch}`;
 309        } else {
 310            # new branch! we need to verify a few things
 311            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 312            my $branchpoint = ptag($ps->{tag});
 313            die "Tagging from unknown id unsupported: $ps->{tag}" 
 314                unless $branchpoint;
 315            
 316            # find where we are supposed to branch from
 317            `git checkout -b $ps->{branch} $branchpoint`;
 318
 319            # If we trust Arch with the fact that this is just 
 320            # a tag, and it does not affect the state of the tree
 321            # then we just tag and move on
 322            tag($ps->{id}, $branchpoint);
 323            ptag($ps->{id}, $branchpoint);
 324            print " * Tagged $ps->{id} at $branchpoint\n";
 325            next;
 326        } 
 327        die $! if $?;
 328    } 
 329
 330    #
 331    # Apply the import/changeset/merge into the working tree
 332    # 
 333    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 334        apply_import($ps) or die $!;
 335        $import=0;
 336    } elsif ($ps->{type} eq 's') {
 337        apply_cset($ps);
 338    }
 339
 340    #
 341    # prepare update git's index, based on what arch knows
 342    # about the pset, resolve parents, etc
 343    #
 344    my $tree;
 345    
 346    my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`; 
 347    die "Error in cat-archive-log: $!" if $?;
 348        
 349    # parselog will git-add/rm files
 350    # and generally prepare things for the commit
 351    # NOTE: parselog will shell-quote filenames! 
 352    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
 353    my $logmessage = "$sum\n$msg";
 354
 355
 356    # imports don't give us good info
 357    # on added files. Shame on them
 358    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
 359        `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
 360        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
 361    }
 362
 363    if (@$add) {
 364        while (@$add) {
 365            my @slice = splice(@$add, 0, 100);
 366            my $slice = join(' ', @slice);          
 367            `git-update-index --add $slice`;
 368            die "Error in git-update-index --add: $!" if $?;
 369        }
 370    }
 371    if (@$del) {
 372        foreach my $file (@$del) {
 373            unlink $file or die "Problems deleting $file : $!";
 374        }
 375        while (@$del) {
 376            my @slice = splice(@$del, 0, 100);
 377            my $slice = join(' ', @slice);
 378            `git-update-index --remove $slice`;
 379            die "Error in git-update-index --remove: $!" if $?;
 380        }
 381    }
 382    if (@$ren) {                # renamed
 383        if (@$ren % 2) {
 384            die "Odd number of entries in rename!?";
 385        }
 386        ;
 387        while (@$ren) {
 388            my $from = pop @$ren;
 389            my $to   = pop @$ren;           
 390
 391            unless (-d dirname($to)) {
 392                mkpath(dirname($to)); # will die on err
 393            }
 394            #print "moving $from $to";
 395            `mv $from $to`;
 396            die "Error renaming $from $to : $!" if $?;
 397            `git-update-index --remove $from`;
 398            die "Error in git-update-index --remove: $!" if $?;
 399            `git-update-index --add $to`;
 400            die "Error in git-update-index --add: $!" if $?;
 401        }
 402
 403    }
 404    if (@$mod) {                # must be _after_ renames
 405        while (@$mod) {
 406            my @slice = splice(@$mod, 0, 100);
 407            my $slice = join(' ', @slice);
 408            `git-update-index $slice`;
 409            die "Error in git-update-index: $!" if $?;
 410        }
 411    }
 412
 413    # warn "errors when running git-update-index! $!";
 414    $tree = `git-write-tree`;
 415    die "cannot write tree $!" if $?;
 416    chomp $tree;
 417        
 418    
 419    #
 420    # Who's your daddy?
 421    #
 422    my @par;
 423    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 424        if (open HEAD, "<$git_dir/refs/heads/$ps->{branch}") {
 425            my $p = <HEAD>;
 426            close HEAD;
 427            chomp $p;
 428            push @par, '-p', $p;
 429        } else { 
 430            if ($ps->{type} eq 's') {
 431                warn "Could not find the right head for the branch $ps->{branch}";
 432            }
 433        }
 434    }
 435    
 436    if ($ps->{merges}) {
 437        push @par, find_parents($ps);
 438    }
 439    my $par = join (' ', @par);
 440
 441    #    
 442    # Commit, tag and clean state
 443    #
 444    $ENV{TZ}                  = 'GMT';
 445    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 446    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 447    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 448    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 449    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 450    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 451
 452    my ($pid, $commit_rh, $commit_wh);
 453    $commit_rh = 'commit_rh';
 454    $commit_wh = 'commit_wh';
 455    
 456    $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par") 
 457        or die $!;
 458    print WRITER $logmessage;   # write
 459    close WRITER;
 460    my $commitid = <READER>;    # read
 461    chomp $commitid;
 462    close READER;
 463    waitpid $pid,0;             # close;
 464
 465    if (length $commitid != 40) {
 466        die "Something went wrong with the commit! $! $commitid";
 467    }
 468    #
 469    # Update the branch
 470    # 
 471    open  HEAD, ">$git_dir/refs/heads/$ps->{branch}";
 472    print HEAD $commitid;
 473    close HEAD;
 474    system('git-update-ref', 'HEAD', "$ps->{branch}");
 475
 476    # tag accordingly
 477    ptag($ps->{id}, $commitid); # private tag
 478    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 479        tag($ps->{id}, $commitid);
 480    }
 481    print " * Committed $ps->{id}\n";
 482    print "   + tree   $tree\n";
 483    print "   + commit $commitid\n";
 484    $opt_v && print "   + commit date is  $ps->{date} \n";
 485    $opt_v && print "   + parents:  $par \n";
 486}
 487
 488sub apply_import {
 489    my $ps = shift;
 490    my $bname = git_branchname($ps->{id});
 491
 492    `mkdir -p $tmp`;
 493
 494    `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
 495    die "Cannot get import: $!" if $?;    
 496    `rsync -v --archive --delete --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
 497    die "Cannot rsync import:$!" if $?;
 498    
 499    `rm -fr $tmp/import`;
 500    die "Cannot remove tempdir: $!" if $?;
 501    
 502
 503    return 1;
 504}
 505
 506sub apply_cset {
 507    my $ps = shift;
 508
 509    `mkdir -p $tmp`;
 510
 511    # get the changeset
 512    `tla get-changeset  -A $ps->{repo} $ps->{id} $tmp/changeset`;
 513    die "Cannot get changeset: $!" if $?;
 514    
 515    # apply patches
 516    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 517        # this can be sped up considerably by doing
 518        #    (find | xargs cat) | patch
 519        # but that cna get mucked up by patches
 520        # with missing trailing newlines or the standard 
 521        # 'missing newline' flag in the patch - possibly
 522        # produced with an old/buggy diff.
 523        # slow and safe, we invoke patch once per patchfile
 524        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 525        die "Problem applying patches! $!" if $?;
 526    }
 527
 528    # apply changed binary files
 529    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 530        foreach my $mod (@modified) {
 531            chomp $mod;
 532            my $orig = $mod;
 533            $orig =~ s/\.modified$//; # lazy
 534            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 535            #print "rsync -p '$mod' '$orig'";
 536            `rsync -p $mod ./$orig`;
 537            die "Problem applying binary changes! $!" if $?;
 538        }
 539    }
 540
 541    # bring in new files
 542    `rsync --archive --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
 543
 544    # deleted files are hinted from the commitlog processing
 545
 546    `rm -fr $tmp/changeset`;
 547}
 548
 549
 550# =for reference
 551# A log entry looks like 
 552# Revision: moodle-org--moodle--1.3.3--patch-15
 553# Archive: arch-eduforge@catalyst.net.nz--2004
 554# Creator: Penny Leach <penny@catalyst.net.nz>
 555# Date: Wed May 25 14:15:34 NZST 2005
 556# Standard-date: 2005-05-25 02:15:34 GMT
 557# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 558#     lang/de/.arch-ids/block_html.php.id
 559# New-directories: lang/de/help/questionnaire
 560#     lang/de/help/questionnaire/.arch-ids
 561# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 562#    db_sears.sql db/db_sears.sql
 563# Removed-files: lang/be/docs/.arch-ids/release.html.id
 564#     lang/be/docs/.arch-ids/releaseold.html.id
 565# Modified-files: admin/cron.php admin/delete.php
 566#     admin/editor.html backup/lib.php backup/restore.php
 567# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 568# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 569# Keywords:
 570#
 571# Updating yadda tadda tadda madda
 572sub parselog {
 573    my $log = shift;
 574    #print $log;
 575
 576    my (@add, @del, @mod, @ren, @kw, $sum, $msg );
 577
 578    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
 579        my $files = $1;
 580        @add = split(m/\s+/s, $files);
 581    }
 582       
 583    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
 584        my $files = $1;
 585        @del = split(m/\s+/s, $files);
 586    }
 587    
 588    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
 589        my $files = $1;
 590        @mod = split(m/\s+/s, $files);
 591    }
 592    
 593    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
 594        my $files = $1;
 595        @ren = split(m/\s+/s, $files);
 596    }
 597
 598    $sum ='';
 599    if ($log =~ m/^Summary:(.+?)$/m ) {
 600        $sum = $1;
 601        $sum =~ s/^\s+//;
 602        $sum =~ s/\s+$//;
 603    }
 604
 605    $msg = '';
 606    if ($log =~ m/\n\n(.+)$/s) {
 607        $msg = $1;
 608        $msg =~ s/^\s+//;
 609        $msg =~ s/\s+$//;
 610    }
 611
 612
 613    # cleanup the arrays
 614    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
 615        my @tmp = ();
 616        while (my $t = pop @$ref) {
 617            next unless length ($t);
 618            next if $t =~ m!\{arch\}/!;
 619            next if $t =~ m!\.arch-ids/!;
 620            next if $t =~ m!\.arch-inventory$!;
 621           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 622           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 623           if  ($t =~ /\\/ ){
 624               $t = `tla escape --unescaped '$t'`;
 625           }
 626            push (@tmp, shell_quote($t));
 627        }
 628        @$ref = @tmp;
 629    }
 630    
 631    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
 632    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
 633}
 634
 635# write/read a tag
 636sub tag {
 637    my ($tag, $commit) = @_;
 638 
 639    if ($opt_o) {
 640        $tag =~ s|/|--|g;
 641    } else {
 642        # don't use subdirs for tags yet, it could screw up other porcelains
 643        $tag =~ s|/|,|g;
 644    }
 645    
 646    if ($commit) {
 647        open(C,">","$git_dir/refs/tags/$tag")
 648            or die "Cannot create tag $tag: $!\n";
 649        print C "$commit\n"
 650            or die "Cannot write tag $tag: $!\n";
 651        close(C)
 652            or die "Cannot write tag $tag: $!\n";
 653        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 654    } else {                    # read
 655        open(C,"<","$git_dir/refs/tags/$tag")
 656            or die "Cannot read tag $tag: $!\n";
 657        $commit = <C>;
 658        chomp $commit;
 659        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 660        close(C)
 661            or die "Cannot read tag $tag: $!\n";
 662        return $commit;
 663    }
 664}
 665
 666# write/read a private tag
 667# reads fail softly if the tag isn't there
 668sub ptag {
 669    my ($tag, $commit) = @_;
 670
 671    # don't use subdirs for tags yet, it could screw up other porcelains
 672    $tag =~ s|/|,|g; 
 673    
 674    my $tag_file = "$ptag_dir/$tag";
 675    my $tag_branch_dir = dirname($tag_file);
 676    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 677
 678    if ($commit) {              # write
 679        open(C,">",$tag_file)
 680            or die "Cannot create tag $tag: $!\n";
 681        print C "$commit\n"
 682            or die "Cannot write tag $tag: $!\n";
 683        close(C)
 684            or die "Cannot write tag $tag: $!\n";
 685        $rptags{$commit} = $tag 
 686            unless $tag =~ m/--base-0$/;
 687    } else {                    # read
 688        # if the tag isn't there, return 0
 689        unless ( -s $tag_file) {
 690            return 0;
 691        }
 692        open(C,"<",$tag_file)
 693            or die "Cannot read tag $tag: $!\n";
 694        $commit = <C>;
 695        chomp $commit;
 696        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 697        close(C)
 698            or die "Cannot read tag $tag: $!\n";
 699        unless (defined $rptags{$commit}) {
 700            $rptags{$commit} = $tag;
 701        }
 702        return $commit;
 703    }
 704}
 705
 706sub find_parents {
 707    #
 708    # Identify what branches are merging into me
 709    # and whether we are fully merged
 710    # git-merge-base <headsha> <headsha> should tell
 711    # me what the base of the merge should be 
 712    #
 713    my $ps = shift;
 714
 715    my %branches; # holds an arrayref per branch
 716                  # the arrayref contains a list of
 717                  # merged patches between the base
 718                  # of the merge and the current head
 719
 720    my @parents;  # parents found for this commit
 721
 722    # simple loop to split the merges
 723    # per branch
 724    foreach my $merge (@{$ps->{merges}}) {
 725        my $branch = git_branchname($merge);
 726        unless (defined $branches{$branch} ){
 727            $branches{$branch} = [];
 728        }
 729        push @{$branches{$branch}}, $merge;
 730    }
 731
 732    #
 733    # foreach branch find a merge base and walk it to the 
 734    # head where we are, collecting the merged patchsets that
 735    # Arch has recorded. Keep that in @have
 736    # Compare that with the commits on the other branch
 737    # between merge-base and the tip of the branch (@need)
 738    # and see if we have a series of consecutive patches
 739    # starting from the merge base. The tip of the series
 740    # of consecutive patches merged is our new parent for 
 741    # that branch.
 742    #
 743    foreach my $branch (keys %branches) {
 744
 745        # check that we actually know about the branch
 746        next unless -e "$git_dir/refs/heads/$branch";
 747
 748        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 749        if ($?) { 
 750            # Don't die here, Arch supports one-way cherry-picking
 751            # between branches with no common base (or any relationship
 752            # at all beforehand)
 753            warn "Cannot find merge base for $branch and $ps->{branch}";
 754            next;
 755        }
 756        chomp $mergebase;
 757
 758        # now walk up to the mergepoint collecting what patches we have
 759        my $branchtip = git_rev_parse($ps->{branch});
 760        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 761        my %have; # collected merges this branch has
 762        foreach my $merge (@{$ps->{merges}}) {
 763            $have{$merge} = 1;
 764        }
 765        my %ancestorshave;
 766        foreach my $par (@ancestors) {
 767            $par = commitid2pset($par);
 768            if (defined $par->{merges}) {
 769                foreach my $merge (@{$par->{merges}}) {
 770                    $ancestorshave{$merge}=1;
 771                }
 772            }
 773        }
 774        # print "++++ Merges in $ps->{id} are....\n";
 775        # my @have = sort keys %have;   print Dumper(\@have);
 776
 777        # merge what we have with what ancestors have
 778        %have = (%have, %ancestorshave);
 779
 780        # see what the remote branch has - these are the merges we 
 781        # will want to have in a consecutive series from the mergebase
 782        my $otherbranchtip = git_rev_parse($branch);
 783        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 784        my @need;
 785        foreach my $needps (@needraw) {         # get the psets
 786            $needps = commitid2pset($needps);
 787            # git-rev-list will also
 788            # list commits merged in via earlier 
 789            # merges. we are only interested in commits
 790            # from the branch we're looking at
 791            if ($branch eq $needps->{branch}) {
 792                push @need, $needps->{id};
 793            }
 794        }
 795
 796        # print "++++ Merges from $branch we want are....\n";
 797        # print Dumper(\@need);
 798
 799        my $newparent;
 800        while (my $needed_commit = pop @need) {
 801            if ($have{$needed_commit}) {
 802                $newparent = $needed_commit;
 803            } else {
 804                last; # break out of the while
 805            }
 806        }
 807        if ($newparent) {
 808            push @parents, $newparent;
 809        }
 810
 811
 812    } # end foreach branch
 813
 814    # prune redundant parents
 815    my %parents;
 816    foreach my $p (@parents) {
 817        $parents{$p} = 1;
 818    }
 819    foreach my $p (@parents) {
 820        next unless exists $psets{$p}{merges};
 821        next unless ref    $psets{$p}{merges};
 822        my @merges = @{$psets{$p}{merges}};
 823        foreach my $merge (@merges) {
 824            if ($parents{$merge}) { 
 825                delete $parents{$merge};
 826            }
 827        }
 828    }
 829    @parents = keys %parents;
 830    @parents = map { " -p " . ptag($_) } @parents;
 831    return @parents;
 832}
 833
 834sub git_rev_parse {
 835    my $name = shift;
 836    my $val  = `git-rev-parse $name`;
 837    die "Error: git-rev-parse $name" if $?;
 838    chomp $val;
 839    return $val;
 840}
 841
 842# resolve a SHA1 to a known patchset
 843sub commitid2pset {
 844    my $commitid = shift;
 845    chomp $commitid;
 846    my $name = $rptags{$commitid} 
 847        || die "Cannot find reverse tag mapping for $commitid";
 848    $name =~ s|,|/|;
 849    my $ps   = $psets{$name} 
 850        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 851    return $ps;
 852}