git-archimport.perlon commit Disambiguate the term 'branch' in Arch vs git (22ff00f)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Spec;
  57use File::Temp qw(tempfile tempdir);
  58use File::Path qw(mkpath);
  59use File::Basename qw(basename dirname);
  60use String::ShellQuote;
  61use Time::Local;
  62use IO::Socket;
  63use IO::Pipe;
  64use POSIX qw(strftime dup2);
  65use Data::Dumper qw/ Dumper /;
  66use IPC::Open2;
  67
  68$SIG{'PIPE'}="IGNORE";
  69$ENV{'TZ'}="UTC";
  70
  71my $git_dir = $ENV{"GIT_DIR"} || ".git";
  72$ENV{"GIT_DIR"} = $git_dir;
  73my $ptag_dir = "$git_dir/archimport/tags";
  74
  75our($opt_h,$opt_v, $opt_T,
  76    $opt_C,$opt_t);
  77
  78sub usage() {
  79    print STDERR <<END;
  80Usage: ${\basename $0}     # fetch/update GIT from Arch
  81       [ -h ] [ -v ] [ -T ] [ -t tempdir ] 
  82       repository/arch-branch [ repository/arch-branch] ...
  83END
  84    exit(1);
  85}
  86
  87getopts("Thvt:") or usage();
  88usage if $opt_h;
  89
  90@ARGV >= 1 or usage();
  91my @arch_roots = @ARGV;
  92
  93my ($tmpdir, $tmpdirname) = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  94my $tmp = $opt_t || 1;
  95$tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  96$opt_v && print "+ Using $tmp as temporary directory\n";
  97
  98my @psets  = ();                # the collection
  99my %psets  = ();                # the collection, by name
 100
 101my %rptags = ();                # my reverse private tags
 102                                # to map a SHA1 to a commitid
 103
 104foreach my $root (@arch_roots) {
 105    my ($arepo, $abranch) = split(m!/!, $root);
 106    open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |" 
 107        or die "Problems with tla abrowse: $!";
 108    
 109    my %ps        = ();         # the current one
 110    my $mode      = '';
 111    my $lastseen  = '';
 112    
 113    while (<ABROWSE>) {
 114        chomp;
 115        
 116        # first record padded w 8 spaces
 117        if (s/^\s{8}\b//) {
 118            
 119            # store the record we just captured
 120            if (%ps) {
 121                my %temp = %ps; # break references
 122                push (@psets, \%temp);
 123                $psets{$temp{id}} = \%temp;
 124                %ps = ();
 125            }
 126            
 127            my ($id, $type) = split(m/\s{3}/, $_);
 128            $ps{id}   = $id;
 129            $ps{repo} = $arepo;
 130
 131            # deal with types
 132            if ($type =~ m/^\(simple changeset\)/) {
 133                $ps{type} = 's';
 134            } elsif ($type eq '(initial import)') {
 135                $ps{type} = 'i';
 136            } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
 137                $ps{type} = 't';
 138                $ps{tag}  = $1;
 139            } else { 
 140                warn "Unknown type $type";
 141            }
 142            $lastseen = 'id';
 143        }
 144        
 145        if (s/^\s{10}//) { 
 146            # 10 leading spaces or more 
 147            # indicate commit metadata
 148            
 149            # date & author 
 150            if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
 151                
 152                my ($date, $authoremail) = split(m/\s{2,}/, $_);
 153                $ps{date}   = $date;
 154                $ps{date}   =~ s/\bGMT$//; # strip off trailign GMT
 155                if ($ps{date} =~ m/\b\w+$/) {
 156                    warn 'Arch dates not in GMT?! - imported dates will be wrong';
 157                }
 158            
 159                $authoremail =~ m/^(.+)\s(\S+)$/;
 160                $ps{author} = $1;
 161                $ps{email}  = $2;
 162            
 163                $lastseen = 'date';
 164            
 165            } elsif ($lastseen eq 'date') {
 166                # the only hint is position
 167                # subject is after date
 168                $ps{subj} = $_;
 169                $lastseen = 'subj';
 170            
 171            } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
 172                $ps{merges} = [];
 173                $lastseen = 'merges';
 174            
 175            } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 176                push (@{$ps{merges}}, $_);
 177            } else {
 178                warn 'more metadata after merges!?';
 179            }
 180            
 181        }
 182    }
 183
 184    if (%ps) {
 185        my %temp = %ps;         # break references
 186        push (@psets, \%temp);  
 187        $psets{ $temp{id} } = \%temp;
 188        %ps = ();
 189    }    
 190    close ABROWSE;
 191}                               # end foreach $root
 192
 193## Order patches by time
 194@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 195
 196#print Dumper \@psets;
 197
 198##
 199## TODO cleanup irrelevant patches
 200##      and put an initial import
 201##      or a full tag
 202my $import = 0;
 203unless (-d $git_dir) { # initial import
 204    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 205        print "Starting import from $psets[0]{id}\n";
 206        `git-init-db`;
 207        die $! if $?;
 208        $import = 1;
 209    } else {
 210        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 211    }
 212} else {    # progressing an import
 213    # load the rptags
 214    opendir(DIR, "$git_dir/archimport/tags")
 215        || die "can't opendir: $!";
 216    while (my $file = readdir(DIR)) {
 217        # skip non-interesting-files
 218        next unless -f "$ptag_dir/$file";
 219   
 220        # convert first '--' to '/' from old git-archimport to use
 221        # as an archivename/c--b--v private tag
 222        if ($file !~ m!,!) {
 223            my $oldfile = $file;
 224            $file =~ s!--!,!;
 225            print STDERR "converting old tag $oldfile to $file\n";
 226            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 227        }
 228        my $sha = ptag($file);
 229        chomp $sha;
 230        $rptags{$sha} = $file;
 231    }
 232    closedir DIR;
 233}
 234
 235# process patchsets
 236# extract the Arch repository name (Arch "archive" in Arch-speak)
 237sub extract_reponame {
 238    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 239    return (split(/\//, $fq_cvbr))[0];
 240}
 241 
 242sub extract_versionname {
 243    my $name = shift;
 244    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 245    return $name;
 246}
 247
 248# convert a fully-qualified revision or version to a unique dirname:
 249#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 250# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 251#
 252# the git notion of a branch is closer to
 253# archive/category--branch--version than archive/category--branch, so we
 254# use this to convert to git branch names.
 255# Also, keep archive names but replace '/' with ',' since it won't require
 256# subdirectories, and is safer than swapping '--' which could confuse
 257# reverse-mapping when dealing with bastard branches that
 258# are just archive/category--version  (no --branch)
 259sub tree_dirname {
 260    my $revision = shift;
 261    my $name = extract_versionname($revision);
 262    $name =~ s#/#,#;
 263    return $name;
 264}
 265
 266*git_branchname = *tree_dirname;
 267
 268# process patchsets
 269foreach my $ps (@psets) {
 270    $ps->{branch} = git_branchname($ps->{id});
 271
 272    #
 273    # ensure we have a clean state 
 274    # 
 275    if (`git diff-files`) {
 276        die "Unclean tree when about to process $ps->{id} " .
 277            " - did we fail to commit cleanly before?";
 278    }
 279    die $! if $?;
 280
 281    #
 282    # skip commits already in repo
 283    #
 284    if (ptag($ps->{id})) {
 285      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 286      next;
 287    }
 288
 289    print " * Starting to work on $ps->{id}\n";
 290
 291    # 
 292    # create the branch if needed
 293    #
 294    if ($ps->{type} eq 'i' && !$import) {
 295        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 296    }
 297
 298    unless ($import) { # skip for import
 299        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 300            # we know about this branch
 301            `git checkout    $ps->{branch}`;
 302        } else {
 303            # new branch! we need to verify a few things
 304            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 305            my $branchpoint = ptag($ps->{tag});
 306            die "Tagging from unknown id unsupported: $ps->{tag}" 
 307                unless $branchpoint;
 308            
 309            # find where we are supposed to branch from
 310            `git checkout -b $ps->{branch} $branchpoint`;
 311
 312            # If we trust Arch with the fact that this is just 
 313            # a tag, and it does not affect the state of the tree
 314            # then we just tag and move on
 315            tag($ps->{id}, $branchpoint);
 316            ptag($ps->{id}, $branchpoint);
 317            print " * Tagged $ps->{id} at $branchpoint\n";
 318            next;
 319        } 
 320        die $! if $?;
 321    } 
 322
 323    #
 324    # Apply the import/changeset/merge into the working tree
 325    # 
 326    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 327        apply_import($ps) or die $!;
 328        $import=0;
 329    } elsif ($ps->{type} eq 's') {
 330        apply_cset($ps);
 331    }
 332
 333    #
 334    # prepare update git's index, based on what arch knows
 335    # about the pset, resolve parents, etc
 336    #
 337    my $tree;
 338    
 339    my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`; 
 340    die "Error in cat-archive-log: $!" if $?;
 341        
 342    # parselog will git-add/rm files
 343    # and generally prepare things for the commit
 344    # NOTE: parselog will shell-quote filenames! 
 345    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
 346    my $logmessage = "$sum\n$msg";
 347
 348
 349    # imports don't give us good info
 350    # on added files. Shame on them
 351    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
 352        `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
 353        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
 354    }
 355
 356    if (@$add) {
 357        while (@$add) {
 358            my @slice = splice(@$add, 0, 100);
 359            my $slice = join(' ', @slice);          
 360            `git-update-index --add $slice`;
 361            die "Error in git-update-index --add: $!" if $?;
 362        }
 363    }
 364    if (@$del) {
 365        foreach my $file (@$del) {
 366            unlink $file or die "Problems deleting $file : $!";
 367        }
 368        while (@$del) {
 369            my @slice = splice(@$del, 0, 100);
 370            my $slice = join(' ', @slice);
 371            `git-update-index --remove $slice`;
 372            die "Error in git-update-index --remove: $!" if $?;
 373        }
 374    }
 375    if (@$ren) {                # renamed
 376        if (@$ren % 2) {
 377            die "Odd number of entries in rename!?";
 378        }
 379        ;
 380        while (@$ren) {
 381            my $from = pop @$ren;
 382            my $to   = pop @$ren;           
 383
 384            unless (-d dirname($to)) {
 385                mkpath(dirname($to)); # will die on err
 386            }
 387            #print "moving $from $to";
 388            `mv $from $to`;
 389            die "Error renaming $from $to : $!" if $?;
 390            `git-update-index --remove $from`;
 391            die "Error in git-update-index --remove: $!" if $?;
 392            `git-update-index --add $to`;
 393            die "Error in git-update-index --add: $!" if $?;
 394        }
 395
 396    }
 397    if (@$mod) {                # must be _after_ renames
 398        while (@$mod) {
 399            my @slice = splice(@$mod, 0, 100);
 400            my $slice = join(' ', @slice);
 401            `git-update-index $slice`;
 402            die "Error in git-update-index: $!" if $?;
 403        }
 404    }
 405
 406    # warn "errors when running git-update-index! $!";
 407    $tree = `git-write-tree`;
 408    die "cannot write tree $!" if $?;
 409    chomp $tree;
 410        
 411    
 412    #
 413    # Who's your daddy?
 414    #
 415    my @par;
 416    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 417        if (open HEAD, "<$git_dir/refs/heads/$ps->{branch}") {
 418            my $p = <HEAD>;
 419            close HEAD;
 420            chomp $p;
 421            push @par, '-p', $p;
 422        } else { 
 423            if ($ps->{type} eq 's') {
 424                warn "Could not find the right head for the branch $ps->{branch}";
 425            }
 426        }
 427    }
 428    
 429    if ($ps->{merges}) {
 430        push @par, find_parents($ps);
 431    }
 432    my $par = join (' ', @par);
 433
 434    #    
 435    # Commit, tag and clean state
 436    #
 437    $ENV{TZ}                  = 'GMT';
 438    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 439    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 440    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 441    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 442    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 443    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 444
 445    my ($pid, $commit_rh, $commit_wh);
 446    $commit_rh = 'commit_rh';
 447    $commit_wh = 'commit_wh';
 448    
 449    $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par") 
 450        or die $!;
 451    print WRITER $logmessage;   # write
 452    close WRITER;
 453    my $commitid = <READER>;    # read
 454    chomp $commitid;
 455    close READER;
 456    waitpid $pid,0;             # close;
 457
 458    if (length $commitid != 40) {
 459        die "Something went wrong with the commit! $! $commitid";
 460    }
 461    #
 462    # Update the branch
 463    # 
 464    open  HEAD, ">$git_dir/refs/heads/$ps->{branch}";
 465    print HEAD $commitid;
 466    close HEAD;
 467    system('git-update-ref', 'HEAD', "$ps->{branch}");
 468
 469    # tag accordingly
 470    ptag($ps->{id}, $commitid); # private tag
 471    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 472        tag($ps->{id}, $commitid);
 473    }
 474    print " * Committed $ps->{id}\n";
 475    print "   + tree   $tree\n";
 476    print "   + commit $commitid\n";
 477    $opt_v && print "   + commit date is  $ps->{date} \n";
 478    $opt_v && print "   + parents:  $par \n";
 479}
 480
 481sub apply_import {
 482    my $ps = shift;
 483    my $bname = git_branchname($ps->{id});
 484
 485    `mkdir -p $tmp`;
 486
 487    `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
 488    die "Cannot get import: $!" if $?;    
 489    `rsync -v --archive --delete --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
 490    die "Cannot rsync import:$!" if $?;
 491    
 492    `rm -fr $tmp/import`;
 493    die "Cannot remove tempdir: $!" if $?;
 494    
 495
 496    return 1;
 497}
 498
 499sub apply_cset {
 500    my $ps = shift;
 501
 502    `mkdir -p $tmp`;
 503
 504    # get the changeset
 505    `tla get-changeset  -A $ps->{repo} $ps->{id} $tmp/changeset`;
 506    die "Cannot get changeset: $!" if $?;
 507    
 508    # apply patches
 509    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 510        # this can be sped up considerably by doing
 511        #    (find | xargs cat) | patch
 512        # but that cna get mucked up by patches
 513        # with missing trailing newlines or the standard 
 514        # 'missing newline' flag in the patch - possibly
 515        # produced with an old/buggy diff.
 516        # slow and safe, we invoke patch once per patchfile
 517        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 518        die "Problem applying patches! $!" if $?;
 519    }
 520
 521    # apply changed binary files
 522    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 523        foreach my $mod (@modified) {
 524            chomp $mod;
 525            my $orig = $mod;
 526            $orig =~ s/\.modified$//; # lazy
 527            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 528            #print "rsync -p '$mod' '$orig'";
 529            `rsync -p $mod ./$orig`;
 530            die "Problem applying binary changes! $!" if $?;
 531        }
 532    }
 533
 534    # bring in new files
 535    `rsync --archive --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
 536
 537    # deleted files are hinted from the commitlog processing
 538
 539    `rm -fr $tmp/changeset`;
 540}
 541
 542
 543# =for reference
 544# A log entry looks like 
 545# Revision: moodle-org--moodle--1.3.3--patch-15
 546# Archive: arch-eduforge@catalyst.net.nz--2004
 547# Creator: Penny Leach <penny@catalyst.net.nz>
 548# Date: Wed May 25 14:15:34 NZST 2005
 549# Standard-date: 2005-05-25 02:15:34 GMT
 550# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 551#     lang/de/.arch-ids/block_html.php.id
 552# New-directories: lang/de/help/questionnaire
 553#     lang/de/help/questionnaire/.arch-ids
 554# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 555#    db_sears.sql db/db_sears.sql
 556# Removed-files: lang/be/docs/.arch-ids/release.html.id
 557#     lang/be/docs/.arch-ids/releaseold.html.id
 558# Modified-files: admin/cron.php admin/delete.php
 559#     admin/editor.html backup/lib.php backup/restore.php
 560# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 561# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 562# Keywords:
 563#
 564# Updating yadda tadda tadda madda
 565sub parselog {
 566    my $log = shift;
 567    #print $log;
 568
 569    my (@add, @del, @mod, @ren, @kw, $sum, $msg );
 570
 571    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
 572        my $files = $1;
 573        @add = split(m/\s+/s, $files);
 574    }
 575       
 576    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
 577        my $files = $1;
 578        @del = split(m/\s+/s, $files);
 579    }
 580    
 581    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
 582        my $files = $1;
 583        @mod = split(m/\s+/s, $files);
 584    }
 585    
 586    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
 587        my $files = $1;
 588        @ren = split(m/\s+/s, $files);
 589    }
 590
 591    $sum ='';
 592    if ($log =~ m/^Summary:(.+?)$/m ) {
 593        $sum = $1;
 594        $sum =~ s/^\s+//;
 595        $sum =~ s/\s+$//;
 596    }
 597
 598    $msg = '';
 599    if ($log =~ m/\n\n(.+)$/s) {
 600        $msg = $1;
 601        $msg =~ s/^\s+//;
 602        $msg =~ s/\s+$//;
 603    }
 604
 605
 606    # cleanup the arrays
 607    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
 608        my @tmp = ();
 609        while (my $t = pop @$ref) {
 610            next unless length ($t);
 611            next if $t =~ m!\{arch\}/!;
 612            next if $t =~ m!\.arch-ids/!;
 613            next if $t =~ m!\.arch-inventory$!;
 614           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 615           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 616           if  ($t =~ /\\/ ){
 617               $t = `tla escape --unescaped '$t'`;
 618           }
 619            push (@tmp, shell_quote($t));
 620        }
 621        @$ref = @tmp;
 622    }
 623    
 624    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
 625    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
 626}
 627
 628# write/read a tag
 629sub tag {
 630    my ($tag, $commit) = @_;
 631 
 632    # don't use subdirs for tags yet, it could screw up other porcelains
 633    $tag =~ s|/|,|;
 634    
 635    if ($commit) {
 636        open(C,">","$git_dir/refs/tags/$tag")
 637            or die "Cannot create tag $tag: $!\n";
 638        print C "$commit\n"
 639            or die "Cannot write tag $tag: $!\n";
 640        close(C)
 641            or die "Cannot write tag $tag: $!\n";
 642        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 643    } else {                    # read
 644        open(C,"<","$git_dir/refs/tags/$tag")
 645            or die "Cannot read tag $tag: $!\n";
 646        $commit = <C>;
 647        chomp $commit;
 648        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 649        close(C)
 650            or die "Cannot read tag $tag: $!\n";
 651        return $commit;
 652    }
 653}
 654
 655# write/read a private tag
 656# reads fail softly if the tag isn't there
 657sub ptag {
 658    my ($tag, $commit) = @_;
 659
 660    # don't use subdirs for tags yet, it could screw up other porcelains
 661    $tag =~ s|/|,|g; 
 662    
 663    my $tag_file = "$ptag_dir/$tag";
 664    my $tag_branch_dir = dirname($tag_file);
 665    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 666
 667    if ($commit) {              # write
 668        open(C,">",$tag_file)
 669            or die "Cannot create tag $tag: $!\n";
 670        print C "$commit\n"
 671            or die "Cannot write tag $tag: $!\n";
 672        close(C)
 673            or die "Cannot write tag $tag: $!\n";
 674        $rptags{$commit} = $tag 
 675            unless $tag =~ m/--base-0$/;
 676    } else {                    # read
 677        # if the tag isn't there, return 0
 678        unless ( -s $tag_file) {
 679            return 0;
 680        }
 681        open(C,"<",$tag_file)
 682            or die "Cannot read tag $tag: $!\n";
 683        $commit = <C>;
 684        chomp $commit;
 685        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 686        close(C)
 687            or die "Cannot read tag $tag: $!\n";
 688        unless (defined $rptags{$commit}) {
 689            $rptags{$commit} = $tag;
 690        }
 691        return $commit;
 692    }
 693}
 694
 695sub find_parents {
 696    #
 697    # Identify what branches are merging into me
 698    # and whether we are fully merged
 699    # git-merge-base <headsha> <headsha> should tell
 700    # me what the base of the merge should be 
 701    #
 702    my $ps = shift;
 703
 704    my %branches; # holds an arrayref per branch
 705                  # the arrayref contains a list of
 706                  # merged patches between the base
 707                  # of the merge and the current head
 708
 709    my @parents;  # parents found for this commit
 710
 711    # simple loop to split the merges
 712    # per branch
 713    foreach my $merge (@{$ps->{merges}}) {
 714        my $branch = git_branchname($merge);
 715        unless (defined $branches{$branch} ){
 716            $branches{$branch} = [];
 717        }
 718        push @{$branches{$branch}}, $merge;
 719    }
 720
 721    #
 722    # foreach branch find a merge base and walk it to the 
 723    # head where we are, collecting the merged patchsets that
 724    # Arch has recorded. Keep that in @have
 725    # Compare that with the commits on the other branch
 726    # between merge-base and the tip of the branch (@need)
 727    # and see if we have a series of consecutive patches
 728    # starting from the merge base. The tip of the series
 729    # of consecutive patches merged is our new parent for 
 730    # that branch.
 731    #
 732    foreach my $branch (keys %branches) {
 733
 734        # check that we actually know about the branch
 735        next unless -e "$git_dir/refs/heads/$branch";
 736
 737        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 738        if ($?) { 
 739            # Don't die here, Arch supports one-way cherry-picking
 740            # between branches with no common base (or any relationship
 741            # at all beforehand)
 742            warn "Cannot find merge base for $branch and $ps->{branch}";
 743            next;
 744        }
 745        chomp $mergebase;
 746
 747        # now walk up to the mergepoint collecting what patches we have
 748        my $branchtip = git_rev_parse($ps->{branch});
 749        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 750        my %have; # collected merges this branch has
 751        foreach my $merge (@{$ps->{merges}}) {
 752            $have{$merge} = 1;
 753        }
 754        my %ancestorshave;
 755        foreach my $par (@ancestors) {
 756            $par = commitid2pset($par);
 757            if (defined $par->{merges}) {
 758                foreach my $merge (@{$par->{merges}}) {
 759                    $ancestorshave{$merge}=1;
 760                }
 761            }
 762        }
 763        # print "++++ Merges in $ps->{id} are....\n";
 764        # my @have = sort keys %have;   print Dumper(\@have);
 765
 766        # merge what we have with what ancestors have
 767        %have = (%have, %ancestorshave);
 768
 769        # see what the remote branch has - these are the merges we 
 770        # will want to have in a consecutive series from the mergebase
 771        my $otherbranchtip = git_rev_parse($branch);
 772        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 773        my @need;
 774        foreach my $needps (@needraw) {         # get the psets
 775            $needps = commitid2pset($needps);
 776            # git-rev-list will also
 777            # list commits merged in via earlier 
 778            # merges. we are only interested in commits
 779            # from the branch we're looking at
 780            if ($branch eq $needps->{branch}) {
 781                push @need, $needps->{id};
 782            }
 783        }
 784
 785        # print "++++ Merges from $branch we want are....\n";
 786        # print Dumper(\@need);
 787
 788        my $newparent;
 789        while (my $needed_commit = pop @need) {
 790            if ($have{$needed_commit}) {
 791                $newparent = $needed_commit;
 792            } else {
 793                last; # break out of the while
 794            }
 795        }
 796        if ($newparent) {
 797            push @parents, $newparent;
 798        }
 799
 800
 801    } # end foreach branch
 802
 803    # prune redundant parents
 804    my %parents;
 805    foreach my $p (@parents) {
 806        $parents{$p} = 1;
 807    }
 808    foreach my $p (@parents) {
 809        next unless exists $psets{$p}{merges};
 810        next unless ref    $psets{$p}{merges};
 811        my @merges = @{$psets{$p}{merges}};
 812        foreach my $merge (@merges) {
 813            if ($parents{$merge}) { 
 814                delete $parents{$merge};
 815            }
 816        }
 817    }
 818    @parents = keys %parents;
 819    @parents = map { " -p " . ptag($_) } @parents;
 820    return @parents;
 821}
 822
 823sub git_rev_parse {
 824    my $name = shift;
 825    my $val  = `git-rev-parse $name`;
 826    die "Error: git-rev-parse $name" if $?;
 827    chomp $val;
 828    return $val;
 829}
 830
 831# resolve a SHA1 to a known patchset
 832sub commitid2pset {
 833    my $commitid = shift;
 834    chomp $commitid;
 835    my $name = $rptags{$commitid} 
 836        || die "Cannot find reverse tag mapping for $commitid";
 837    $name =~ s|,|/|;
 838    my $ps   = $psets{$name} 
 839        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 840    return $ps;
 841}