8676f35f0f94470384140d536729ce842e0e6c73
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Temp qw(tempdir);
  57use File::Path qw(mkpath rmtree);
  58use File::Basename qw(basename dirname);
  59use Data::Dumper qw/ Dumper /;
  60use IPC::Open2;
  61
  62$SIG{'PIPE'}="IGNORE";
  63$ENV{'TZ'}="UTC";
  64
  65my $git_dir = $ENV{"GIT_DIR"} || ".git";
  66$ENV{"GIT_DIR"} = $git_dir;
  67my $ptag_dir = "$git_dir/archimport/tags";
  68
  69our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
  70
  71sub usage() {
  72    print STDERR <<END;
  73Usage: ${\basename $0}     # fetch/update GIT from Arch
  74       [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth  ] [ -t tempdir ]
  75       repository/arch-branch [ repository/arch-branch] ...
  76END
  77    exit(1);
  78}
  79
  80getopts("Thvat:D:") or usage();
  81usage if $opt_h;
  82
  83@ARGV >= 1 or usage();
  84# $arch_branches:
  85# values associated with keys:
  86#   =1 - Arch version / git 'branch' detected via abrowse on a limit
  87#   >1 - Arch version / git 'branch' of an auxilliary branch we've merged
  88my %arch_branches = map { $_ => 1 } @ARGV;
  89
  90$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  91my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  92$opt_v && print "+ Using $tmp as temporary directory\n";
  93
  94my %reachable = ();             # Arch repositories we can access
  95my %unreachable = ();           # Arch repositories we can't access :<
  96my @psets  = ();                # the collection
  97my %psets  = ();                # the collection, by name
  98
  99my %rptags = ();                # my reverse private tags
 100                                # to map a SHA1 to a commitid
 101my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 102
 103sub do_abrowse {
 104    my $stage = shift;
 105    while (my ($limit, $level) = each %arch_branches) {
 106        next unless $level == $stage;
 107        
 108        open ABROWSE, "$TLA abrowse -fkD --merges $limit |" 
 109                                or die "Problems with tla abrowse: $!";
 110    
 111        my %ps        = ();         # the current one
 112        my $lastseen  = '';
 113    
 114        while (<ABROWSE>) {
 115            chomp;
 116            
 117            # first record padded w 8 spaces
 118            if (s/^\s{8}\b//) {
 119                my ($id, $type) = split(m/\s+/, $_, 2);
 120
 121                my %last_ps;
 122                # store the record we just captured
 123                if (%ps && !exists $psets{ $ps{id} }) {
 124                    %last_ps = %ps; # break references
 125                    push (@psets, \%last_ps);
 126                    $psets{ $last_ps{id} } = \%last_ps;
 127                }
 128                
 129                my $branch = extract_versionname($id);
 130                %ps = ( id => $id, branch => $branch );
 131                if (%last_ps && ($last_ps{branch} eq $branch)) {
 132                    $ps{parent_id} = $last_ps{id};
 133                }
 134                
 135                $arch_branches{$branch} = 1;
 136                $lastseen = 'id';
 137
 138                # deal with types (should work with baz or tla):
 139                if ($type =~ m/\(.*changeset\)/) {
 140                    $ps{type} = 's';
 141                } elsif ($type =~ /\(.*import\)/) {
 142                    $ps{type} = 'i';
 143                } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
 144                    $ps{type} = 't';
 145                    # read which revision we've tagged when we parse the log
 146                    $ps{tag}  = $1;
 147                } else { 
 148                    warn "Unknown type $type";
 149                }
 150
 151                $arch_branches{$branch} = 1;
 152                $lastseen = 'id';
 153            } elsif (s/^\s{10}//) { 
 154                # 10 leading spaces or more 
 155                # indicate commit metadata
 156                
 157                # date
 158                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
 159                    $ps{date}   = $1;
 160                    $lastseen = 'date';
 161                } elsif ($_ eq 'merges in:') {
 162                    $ps{merges} = [];
 163                    $lastseen = 'merges';
 164                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 165                    my $id = $_;
 166                    push (@{$ps{merges}}, $id);
 167                   
 168                    # aggressive branch finding:
 169                    if ($opt_D) {
 170                        my $branch = extract_versionname($id);
 171                        my $repo = extract_reponame($branch);
 172                        
 173                        if (archive_reachable($repo) &&
 174                                !defined $arch_branches{$branch}) {
 175                            $arch_branches{$branch} = $stage + 1;
 176                        }
 177                    }
 178                } else {
 179                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
 180                }
 181            }
 182        }
 183
 184        if (%ps && !exists $psets{ $ps{id} }) {
 185            my %temp = %ps;         # break references
 186            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
 187                $temp{parent_id} = $psets[$#psets]{id};
 188            }
 189            push (@psets, \%temp);  
 190            $psets{ $temp{id} } = \%temp;
 191        }    
 192        
 193        close ABROWSE or die "$TLA abrowse failed on $limit\n";
 194    }
 195}                               # end foreach $root
 196
 197do_abrowse(1);
 198my $depth = 2;
 199$opt_D ||= 0;
 200while ($depth <= $opt_D) {
 201    do_abrowse($depth);
 202    $depth++;
 203}
 204
 205## Order patches by time
 206# FIXME see if we can find a more optimal way to do this by graphing
 207# the ancestry data and walking it, that way we won't have to rely on
 208# client-supplied dates
 209@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 210
 211#print Dumper \@psets;
 212
 213##
 214## TODO cleanup irrelevant patches
 215##      and put an initial import
 216##      or a full tag
 217my $import = 0;
 218unless (-d $git_dir) { # initial import
 219    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 220        print "Starting import from $psets[0]{id}\n";
 221        `git-init-db`;
 222        die $! if $?;
 223        $import = 1;
 224    } else {
 225        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 226    }
 227} else {    # progressing an import
 228    # load the rptags
 229    opendir(DIR, $ptag_dir)
 230        || die "can't opendir: $!";
 231    while (my $file = readdir(DIR)) {
 232        # skip non-interesting-files
 233        next unless -f "$ptag_dir/$file";
 234   
 235        # convert first '--' to '/' from old git-archimport to use
 236        # as an archivename/c--b--v private tag
 237        if ($file !~ m!,!) {
 238            my $oldfile = $file;
 239            $file =~ s!--!,!;
 240            print STDERR "converting old tag $oldfile to $file\n";
 241            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 242        }
 243        my $sha = ptag($file);
 244        chomp $sha;
 245        $rptags{$sha} = $file;
 246    }
 247    closedir DIR;
 248}
 249
 250# process patchsets
 251# extract the Arch repository name (Arch "archive" in Arch-speak)
 252sub extract_reponame {
 253    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 254    return (split(/\//, $fq_cvbr))[0];
 255}
 256 
 257sub extract_versionname {
 258    my $name = shift;
 259    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 260    return $name;
 261}
 262
 263# convert a fully-qualified revision or version to a unique dirname:
 264#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 265# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 266#
 267# the git notion of a branch is closer to
 268# archive/category--branch--version than archive/category--branch, so we
 269# use this to convert to git branch names.
 270# Also, keep archive names but replace '/' with ',' since it won't require
 271# subdirectories, and is safer than swapping '--' which could confuse
 272# reverse-mapping when dealing with bastard branches that
 273# are just archive/category--version  (no --branch)
 274sub tree_dirname {
 275    my $revision = shift;
 276    my $name = extract_versionname($revision);
 277    $name =~ s#/#,#;
 278    return $name;
 279}
 280
 281# old versions of git-archimport just use the <category--branch> part:
 282sub old_style_branchname {
 283    my $id = shift;
 284    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 285    chomp $ret;
 286    return $ret;
 287}
 288
 289*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 290
 291# process patchsets
 292foreach my $ps (@psets) {
 293    $ps->{branch} = git_branchname($ps->{id});
 294
 295    #
 296    # ensure we have a clean state 
 297    # 
 298    if (`git-diff-files`) {
 299        die "Unclean tree when about to process $ps->{id} " .
 300            " - did we fail to commit cleanly before?";
 301    }
 302    die $! if $?;
 303
 304    #
 305    # skip commits already in repo
 306    #
 307    if (ptag($ps->{id})) {
 308      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 309      next;
 310    }
 311
 312    print " * Starting to work on $ps->{id}\n";
 313
 314    # 
 315    # create the branch if needed
 316    #
 317    if ($ps->{type} eq 'i' && !$import) {
 318        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 319    }
 320
 321    unless ($import) { # skip for import
 322        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 323            # we know about this branch
 324            system('git-checkout',$ps->{branch});
 325        } else {
 326            # new branch! we need to verify a few things
 327            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 328            my $branchpoint = ptag($ps->{tag});
 329            die "Tagging from unknown id unsupported: $ps->{tag}" 
 330                unless $branchpoint;
 331            
 332            # find where we are supposed to branch from
 333            system('git-checkout','-b',$ps->{branch},$branchpoint);
 334
 335            # If we trust Arch with the fact that this is just 
 336            # a tag, and it does not affect the state of the tree
 337            # then we just tag and move on
 338            tag($ps->{id}, $branchpoint);
 339            ptag($ps->{id}, $branchpoint);
 340            print " * Tagged $ps->{id} at $branchpoint\n";
 341            next;
 342        } 
 343        die $! if $?;
 344    } 
 345
 346    #
 347    # Apply the import/changeset/merge into the working tree
 348    # 
 349    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 350        apply_import($ps) or die $!;
 351        $import=0;
 352    } elsif ($ps->{type} eq 's') {
 353        apply_cset($ps);
 354    }
 355
 356    #
 357    # prepare update git's index, based on what arch knows
 358    # about the pset, resolve parents, etc
 359    #
 360    my $tree;
 361    
 362    my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 363    die "Error in cat-archive-log: $!" if $?;
 364        
 365    parselog($ps,\@commitlog);
 366
 367    # imports don't give us good info
 368    # on added files. Shame on them
 369    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 370        system('git-ls-files --others -z | '.
 371                'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 372        system('git-ls-files --deleted -z | '.
 373                'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 374    }
 375
 376    # TODO: handle removed_directories and renamed_directories:
 377   
 378    if (my $add = $ps->{new_files}) {
 379        while (@$add) {
 380            my @slice = splice(@$add, 0, 100);
 381            system('git-update-index','--add','--',@slice) == 0 or
 382                            die "Error in git-update-index --add: $! $?\n";
 383        }
 384    }
 385   
 386    if (my $del = $ps->{removed_files}) {
 387        unlink @$del;
 388        while (@$del) {
 389            my @slice = splice(@$del, 0, 100);
 390            system('git-update-index','--remove','--',@slice) == 0 or
 391                            die "Error in git-update-index --remove: $! $?\n";
 392        }
 393    }
 394
 395    if (my $ren = $ps->{renamed_files}) {                # renamed
 396        if (@$ren % 2) {
 397            die "Odd number of entries in rename!?";
 398        }
 399        
 400        while (@$ren) {
 401            my $from = shift @$ren;
 402            my $to   = shift @$ren;           
 403
 404            unless (-d dirname($to)) {
 405                mkpath(dirname($to)); # will die on err
 406            }
 407            print "moving $from $to";
 408            rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
 409            system('git-update-index','--remove','--',$from) == 0 or
 410                            die "Error in git-update-index --remove: $! $?\n";
 411            system('git-update-index','--add','--',$to) == 0 or
 412                            die "Error in git-update-index --add: $! $?\n";
 413        }
 414
 415    }
 416
 417    if (my $mod = $ps->{modified_files}) {
 418        while (@$mod) {
 419            my @slice = splice(@$mod, 0, 100);
 420            system('git-update-index','--',@slice) == 0 or
 421                            die "Error in git-update-index: $! $?\n";
 422        }
 423    }
 424    
 425    # warn "errors when running git-update-index! $!";
 426    $tree = `git-write-tree`;
 427    die "cannot write tree $!" if $?;
 428    chomp $tree;
 429    
 430    #
 431    # Who's your daddy?
 432    #
 433    my @par;
 434    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 435        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 436            my $p = <HEAD>;
 437            close HEAD;
 438            chomp $p;
 439            push @par, '-p', $p;
 440        } else { 
 441            if ($ps->{type} eq 's') {
 442                warn "Could not find the right head for the branch $ps->{branch}";
 443            }
 444        }
 445    }
 446    
 447    if ($ps->{merges}) {
 448        push @par, find_parents($ps);
 449    }
 450
 451    #    
 452    # Commit, tag and clean state
 453    #
 454    $ENV{TZ}                  = 'GMT';
 455    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 456    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 457    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 458    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 459    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 460    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 461
 462    my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 463        or die $!;
 464    print WRITER $ps->{summary},"\n";
 465    print WRITER $ps->{message},"\n";
 466    
 467    # make it easy to backtrack and figure out which Arch revision this was:
 468    print WRITER 'git-archimport-id: ',$ps->{id},"\n";
 469    
 470    close WRITER;
 471    my $commitid = <READER>;    # read
 472    chomp $commitid;
 473    close READER;
 474    waitpid $pid,0;             # close;
 475
 476    if (length $commitid != 40) {
 477        die "Something went wrong with the commit! $! $commitid";
 478    }
 479    #
 480    # Update the branch
 481    # 
 482    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 483    print HEAD $commitid;
 484    close HEAD;
 485    system('git-update-ref', 'HEAD', "$ps->{branch}");
 486
 487    # tag accordingly
 488    ptag($ps->{id}, $commitid); # private tag
 489    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 490        tag($ps->{id}, $commitid);
 491    }
 492    print " * Committed $ps->{id}\n";
 493    print "   + tree   $tree\n";
 494    print "   + commit $commitid\n";
 495    $opt_v && print "   + commit date is  $ps->{date} \n";
 496    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 497}
 498
 499sub apply_import {
 500    my $ps = shift;
 501    my $bname = git_branchname($ps->{id});
 502
 503    mkpath($tmp);
 504
 505    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 506    die "Cannot get import: $!" if $?;    
 507    system('rsync','-aI','--delete', '--exclude',$git_dir,
 508                '--exclude','.arch-ids','--exclude','{arch}',
 509                "$tmp/import/", './');
 510    die "Cannot rsync import:$!" if $?;
 511    
 512    rmtree("$tmp/import");
 513    die "Cannot remove tempdir: $!" if $?;
 514    
 515
 516    return 1;
 517}
 518
 519sub apply_cset {
 520    my $ps = shift;
 521
 522    mkpath($tmp);
 523
 524    # get the changeset
 525    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 526    die "Cannot get changeset: $!" if $?;
 527    
 528    # apply patches
 529    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 530        # this can be sped up considerably by doing
 531        #    (find | xargs cat) | patch
 532        # but that cna get mucked up by patches
 533        # with missing trailing newlines or the standard 
 534        # 'missing newline' flag in the patch - possibly
 535        # produced with an old/buggy diff.
 536        # slow and safe, we invoke patch once per patchfile
 537        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 538        die "Problem applying patches! $!" if $?;
 539    }
 540
 541    # apply changed binary files
 542    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 543        foreach my $mod (@modified) {
 544            chomp $mod;
 545            my $orig = $mod;
 546            $orig =~ s/\.modified$//; # lazy
 547            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 548            #print "rsync -p '$mod' '$orig'";
 549            system('rsync','-p',$mod,"./$orig");
 550            die "Problem applying binary changes! $!" if $?;
 551        }
 552    }
 553
 554    # bring in new files
 555    system('rsync','-aI','--exclude',$git_dir,
 556                '--exclude','.arch-ids',
 557                '--exclude', '{arch}',
 558                "$tmp/changeset/new-files-archive/",'./');
 559
 560    # deleted files are hinted from the commitlog processing
 561
 562    rmtree("$tmp/changeset");
 563}
 564
 565
 566# =for reference
 567# notes: *-files/-directories keys cannot have spaces, they're always
 568# pika-escaped.  Everything after the first newline
 569# A log entry looks like:
 570# Revision: moodle-org--moodle--1.3.3--patch-15
 571# Archive: arch-eduforge@catalyst.net.nz--2004
 572# Creator: Penny Leach <penny@catalyst.net.nz>
 573# Date: Wed May 25 14:15:34 NZST 2005
 574# Standard-date: 2005-05-25 02:15:34 GMT
 575# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 576#     lang/de/.arch-ids/block_html.php.id
 577# New-directories: lang/de/help/questionnaire
 578#     lang/de/help/questionnaire/.arch-ids
 579# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 580#    db_sears.sql db/db_sears.sql
 581# Removed-files: lang/be/docs/.arch-ids/release.html.id
 582#     lang/be/docs/.arch-ids/releaseold.html.id
 583# Modified-files: admin/cron.php admin/delete.php
 584#     admin/editor.html backup/lib.php backup/restore.php
 585# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 586# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 587#   summary can be multiline with a leading space just like the above fields
 588# Keywords:
 589#
 590# Updating yadda tadda tadda madda
 591sub parselog {
 592    my ($ps, $log) = @_;
 593    my $key = undef;
 594
 595    # headers we want that contain filenames:
 596    my %want_headers = (
 597        new_files => 1,
 598        modified_files => 1,
 599        renamed_files => 1,
 600        renamed_directories => 1,
 601        removed_files => 1,
 602        removed_directories => 1,
 603    );
 604    
 605    chomp (@$log);
 606    while ($_ = shift @$log) {
 607        if (/^Continuation-of:\s*(.*)/) {
 608            $ps->{tag} = $1;
 609            $key = undef;
 610        } elsif (/^Summary:\s*(.*)$/ ) {
 611            # summary can be multiline as long as it has a leading space
 612            $ps->{summary} = [ $1 ];
 613            $key = 'summary';
 614        } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
 615            $ps->{author} = $1;
 616            $ps->{email} = $2;
 617            $key = undef;
 618        # any *-files or *-directories can be read here:
 619        } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
 620            my $val = $2;
 621            $key = lc $1;
 622            $key =~ tr/-/_/; # too lazy to quote :P
 623            if ($want_headers{$key}) {
 624                push @{$ps->{$key}}, split(/\s+/, $val);
 625            } else {
 626                $key = undef;
 627            }
 628        } elsif (/^$/) {
 629            last; # remainder of @$log that didn't get shifted off is message
 630        } elsif ($key) {
 631            if (/^\s+(.*)$/) {
 632                if ($key eq 'summary') {
 633                    push @{$ps->{$key}}, $1;
 634                } else { # files/directories:
 635                    push @{$ps->{$key}}, split(/\s+/, $1);
 636                }
 637            } else {
 638                $key = undef;
 639            }
 640        }
 641    }
 642   
 643    # post-processing:
 644    $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
 645    $ps->{message} = join("\n",@$log);
 646    
 647    # skip Arch control files, unescape pika-escaped files
 648    foreach my $k (keys %want_headers) {
 649        next unless (defined $ps->{$k});
 650        my @tmp;
 651        foreach my $t (@{$ps->{$k}}) {
 652           next unless length ($t);
 653           next if $t =~ m!\{arch\}/!;
 654           next if $t =~ m!\.arch-ids/!;
 655           # should we skip this?
 656           next if $t =~ m!\.arch-inventory$!;
 657           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 658           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 659           if ($t =~ /\\/ ){
 660               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 661           }
 662           push @tmp, $t;
 663        }
 664        $ps->{$k} = \@tmp if scalar @tmp;
 665    }
 666}
 667
 668# write/read a tag
 669sub tag {
 670    my ($tag, $commit) = @_;
 671 
 672    if ($opt_o) {
 673        $tag =~ s|/|--|g;
 674    } else {
 675        # don't use subdirs for tags yet, it could screw up other porcelains
 676        $tag =~ s|/|,|g;
 677    }
 678    
 679    if ($commit) {
 680        open(C,">","$git_dir/refs/tags/$tag")
 681            or die "Cannot create tag $tag: $!\n";
 682        print C "$commit\n"
 683            or die "Cannot write tag $tag: $!\n";
 684        close(C)
 685            or die "Cannot write tag $tag: $!\n";
 686        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 687    } else {                    # read
 688        open(C,"<","$git_dir/refs/tags/$tag")
 689            or die "Cannot read tag $tag: $!\n";
 690        $commit = <C>;
 691        chomp $commit;
 692        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 693        close(C)
 694            or die "Cannot read tag $tag: $!\n";
 695        return $commit;
 696    }
 697}
 698
 699# write/read a private tag
 700# reads fail softly if the tag isn't there
 701sub ptag {
 702    my ($tag, $commit) = @_;
 703
 704    # don't use subdirs for tags yet, it could screw up other porcelains
 705    $tag =~ s|/|,|g; 
 706    
 707    my $tag_file = "$ptag_dir/$tag";
 708    my $tag_branch_dir = dirname($tag_file);
 709    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 710
 711    if ($commit) {              # write
 712        open(C,">",$tag_file)
 713            or die "Cannot create tag $tag: $!\n";
 714        print C "$commit\n"
 715            or die "Cannot write tag $tag: $!\n";
 716        close(C)
 717            or die "Cannot write tag $tag: $!\n";
 718        $rptags{$commit} = $tag 
 719            unless $tag =~ m/--base-0$/;
 720    } else {                    # read
 721        # if the tag isn't there, return 0
 722        unless ( -s $tag_file) {
 723            return 0;
 724        }
 725        open(C,"<",$tag_file)
 726            or die "Cannot read tag $tag: $!\n";
 727        $commit = <C>;
 728        chomp $commit;
 729        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 730        close(C)
 731            or die "Cannot read tag $tag: $!\n";
 732        unless (defined $rptags{$commit}) {
 733            $rptags{$commit} = $tag;
 734        }
 735        return $commit;
 736    }
 737}
 738
 739sub find_parents {
 740    #
 741    # Identify what branches are merging into me
 742    # and whether we are fully merged
 743    # git-merge-base <headsha> <headsha> should tell
 744    # me what the base of the merge should be 
 745    #
 746    my $ps = shift;
 747
 748    my %branches; # holds an arrayref per branch
 749                  # the arrayref contains a list of
 750                  # merged patches between the base
 751                  # of the merge and the current head
 752
 753    my @parents;  # parents found for this commit
 754
 755    # simple loop to split the merges
 756    # per branch
 757    foreach my $merge (@{$ps->{merges}}) {
 758        my $branch = git_branchname($merge);
 759        unless (defined $branches{$branch} ){
 760            $branches{$branch} = [];
 761        }
 762        push @{$branches{$branch}}, $merge;
 763    }
 764
 765    #
 766    # foreach branch find a merge base and walk it to the 
 767    # head where we are, collecting the merged patchsets that
 768    # Arch has recorded. Keep that in @have
 769    # Compare that with the commits on the other branch
 770    # between merge-base and the tip of the branch (@need)
 771    # and see if we have a series of consecutive patches
 772    # starting from the merge base. The tip of the series
 773    # of consecutive patches merged is our new parent for 
 774    # that branch.
 775    #
 776    foreach my $branch (keys %branches) {
 777
 778        # check that we actually know about the branch
 779        next unless -e "$git_dir/refs/heads/$branch";
 780
 781        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 782        if ($?) { 
 783            # Don't die here, Arch supports one-way cherry-picking
 784            # between branches with no common base (or any relationship
 785            # at all beforehand)
 786            warn "Cannot find merge base for $branch and $ps->{branch}";
 787            next;
 788        }
 789        chomp $mergebase;
 790
 791        # now walk up to the mergepoint collecting what patches we have
 792        my $branchtip = git_rev_parse($ps->{branch});
 793        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 794        my %have; # collected merges this branch has
 795        foreach my $merge (@{$ps->{merges}}) {
 796            $have{$merge} = 1;
 797        }
 798        my %ancestorshave;
 799        foreach my $par (@ancestors) {
 800            $par = commitid2pset($par);
 801            if (defined $par->{merges}) {
 802                foreach my $merge (@{$par->{merges}}) {
 803                    $ancestorshave{$merge}=1;
 804                }
 805            }
 806        }
 807        # print "++++ Merges in $ps->{id} are....\n";
 808        # my @have = sort keys %have;   print Dumper(\@have);
 809
 810        # merge what we have with what ancestors have
 811        %have = (%have, %ancestorshave);
 812
 813        # see what the remote branch has - these are the merges we 
 814        # will want to have in a consecutive series from the mergebase
 815        my $otherbranchtip = git_rev_parse($branch);
 816        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 817        my @need;
 818        foreach my $needps (@needraw) {         # get the psets
 819            $needps = commitid2pset($needps);
 820            # git-rev-list will also
 821            # list commits merged in via earlier 
 822            # merges. we are only interested in commits
 823            # from the branch we're looking at
 824            if ($branch eq $needps->{branch}) {
 825                push @need, $needps->{id};
 826            }
 827        }
 828
 829        # print "++++ Merges from $branch we want are....\n";
 830        # print Dumper(\@need);
 831
 832        my $newparent;
 833        while (my $needed_commit = pop @need) {
 834            if ($have{$needed_commit}) {
 835                $newparent = $needed_commit;
 836            } else {
 837                last; # break out of the while
 838            }
 839        }
 840        if ($newparent) {
 841            push @parents, $newparent;
 842        }
 843
 844
 845    } # end foreach branch
 846
 847    # prune redundant parents
 848    my %parents;
 849    foreach my $p (@parents) {
 850        $parents{$p} = 1;
 851    }
 852    foreach my $p (@parents) {
 853        next unless exists $psets{$p}{merges};
 854        next unless ref    $psets{$p}{merges};
 855        my @merges = @{$psets{$p}{merges}};
 856        foreach my $merge (@merges) {
 857            if ($parents{$merge}) { 
 858                delete $parents{$merge};
 859            }
 860        }
 861    }
 862
 863    @parents = ();
 864    foreach (keys %parents) {
 865        push @parents, '-p', ptag($_);
 866    }
 867    return @parents;
 868}
 869
 870sub git_rev_parse {
 871    my $name = shift;
 872    my $val  = `git-rev-parse $name`;
 873    die "Error: git-rev-parse $name" if $?;
 874    chomp $val;
 875    return $val;
 876}
 877
 878# resolve a SHA1 to a known patchset
 879sub commitid2pset {
 880    my $commitid = shift;
 881    chomp $commitid;
 882    my $name = $rptags{$commitid} 
 883        || die "Cannot find reverse tag mapping for $commitid";
 884    $name =~ s|,|/|;
 885    my $ps   = $psets{$name} 
 886        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 887    return $ps;
 888}
 889
 890
 891# an alterative to `command` that allows input to be passed as an array
 892# to work around shell problems with weird characters in arguments
 893sub safe_pipe_capture {
 894    my @output;
 895    if (my $pid = open my $child, '-|') {
 896        @output = (<$child>);
 897        close $child or die join(' ',@_).": $! $?";
 898    } else {
 899        exec(@_) or die $?; # exec() can fail the executable can't be found
 900    }
 901    return wantarray ? @output : join('',@output);
 902}
 903
 904# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
 905sub arch_tree_id {
 906    my $dir = shift;
 907    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
 908    return $ret;
 909}
 910
 911sub archive_reachable {
 912    my $archive = shift;
 913    return 1 if $reachable{$archive};
 914    return 0 if $unreachable{$archive};
 915    
 916    if (system "$TLA whereis-archive $archive >/dev/null") {
 917        if ($opt_a && (system($TLA,'register-archive',
 918                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
 919            $reachable{$archive} = 1;
 920            return 1;
 921        }
 922        print STDERR "Archive is unreachable: $archive\n";
 923        $unreachable{$archive} = 1;
 924        return 0;
 925    } else {
 926        $reachable{$archive} = 1;
 927        return 1;
 928    }
 929}
 930