1cf126181ba008ab11d9fb23afccb8623cb00b48
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28 - sort and apply patches by graphing ancestry relations instead of just
  29   relying in dates supplied in the changeset itself.
  30   tla ancestry-graph -m could be helpful here...
  31
  32=head1 Devel tricks
  33
  34Add print in front of the shell commands invoked via backticks. 
  35
  36=head1 Devel Notes
  37
  38There are several places where Arch and git terminology are intermixed
  39and potentially confused.
  40
  41The notion of a "branch" in git is approximately equivalent to
  42a "archive/category--branch--version" in Arch.  Also, it should be noted
  43that the "--branch" portion of "archive/category--branch--version" is really
  44optional in Arch although not many people (nor tools!) seem to know this.
  45This means that "archive/category--version" is also a valid "branch"
  46in git terms.
  47
  48We always refer to Arch names by their fully qualified variant (which
  49means the "archive" name is prefixed.
  50
  51For people unfamiliar with Arch, an "archive" is the term for "repository",
  52and can contain multiple, unrelated branches.
  53
  54=cut
  55
  56use strict;
  57use warnings;
  58use Getopt::Std;
  59use File::Temp qw(tempdir);
  60use File::Path qw(mkpath rmtree);
  61use File::Basename qw(basename dirname);
  62use Data::Dumper qw/ Dumper /;
  63use IPC::Open2;
  64
  65$SIG{'PIPE'}="IGNORE";
  66$ENV{'TZ'}="UTC";
  67
  68my $git_dir = $ENV{"GIT_DIR"} || ".git";
  69$ENV{"GIT_DIR"} = $git_dir;
  70my $ptag_dir = "$git_dir/archimport/tags";
  71
  72our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
  73
  74sub usage() {
  75    print STDERR <<END;
  76Usage: ${\basename $0}     # fetch/update GIT from Arch
  77       [ -f ] [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth  ] [ -t tempdir ]
  78       repository/arch-branch [ repository/arch-branch] ...
  79END
  80    exit(1);
  81}
  82
  83getopts("fThvat:D:") or usage();
  84usage if $opt_h;
  85
  86@ARGV >= 1 or usage();
  87# $arch_branches:
  88# values associated with keys:
  89#   =1 - Arch version / git 'branch' detected via abrowse on a limit
  90#   >1 - Arch version / git 'branch' of an auxilliary branch we've merged
  91my %arch_branches = map { $_ => 1 } @ARGV;
  92
  93$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  94my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  95$opt_v && print "+ Using $tmp as temporary directory\n";
  96
  97my %reachable = ();             # Arch repositories we can access
  98my %unreachable = ();           # Arch repositories we can't access :<
  99my @psets  = ();                # the collection
 100my %psets  = ();                # the collection, by name
 101my %stats  = (                  # Track which strategy we used to import:
 102        get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
 103        simple_changeset => 0, import_or_tag => 0
 104);
 105
 106my %rptags = ();                # my reverse private tags
 107                                # to map a SHA1 to a commitid
 108my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 109
 110sub do_abrowse {
 111    my $stage = shift;
 112    while (my ($limit, $level) = each %arch_branches) {
 113        next unless $level == $stage;
 114        
 115        open ABROWSE, "$TLA abrowse -fkD --merges $limit |" 
 116                                or die "Problems with tla abrowse: $!";
 117    
 118        my %ps        = ();         # the current one
 119        my $lastseen  = '';
 120    
 121        while (<ABROWSE>) {
 122            chomp;
 123            
 124            # first record padded w 8 spaces
 125            if (s/^\s{8}\b//) {
 126                my ($id, $type) = split(m/\s+/, $_, 2);
 127
 128                my %last_ps;
 129                # store the record we just captured
 130                if (%ps && !exists $psets{ $ps{id} }) {
 131                    %last_ps = %ps; # break references
 132                    push (@psets, \%last_ps);
 133                    $psets{ $last_ps{id} } = \%last_ps;
 134                }
 135                
 136                my $branch = extract_versionname($id);
 137                %ps = ( id => $id, branch => $branch );
 138                if (%last_ps && ($last_ps{branch} eq $branch)) {
 139                    $ps{parent_id} = $last_ps{id};
 140                }
 141                
 142                $arch_branches{$branch} = 1;
 143                $lastseen = 'id';
 144
 145                # deal with types (should work with baz or tla):
 146                if ($type =~ m/\(.*changeset\)/) {
 147                    $ps{type} = 's';
 148                } elsif ($type =~ /\(.*import\)/) {
 149                    $ps{type} = 'i';
 150                } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
 151                    $ps{type} = 't';
 152                    # read which revision we've tagged when we parse the log
 153                    $ps{tag}  = $1;
 154                } else { 
 155                    warn "Unknown type $type";
 156                }
 157
 158                $arch_branches{$branch} = 1;
 159                $lastseen = 'id';
 160            } elsif (s/^\s{10}//) { 
 161                # 10 leading spaces or more 
 162                # indicate commit metadata
 163                
 164                # date
 165                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
 166                    $ps{date}   = $1;
 167                    $lastseen = 'date';
 168                } elsif ($_ eq 'merges in:') {
 169                    $ps{merges} = [];
 170                    $lastseen = 'merges';
 171                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 172                    my $id = $_;
 173                    push (@{$ps{merges}}, $id);
 174                   
 175                    # aggressive branch finding:
 176                    if ($opt_D) {
 177                        my $branch = extract_versionname($id);
 178                        my $repo = extract_reponame($branch);
 179                        
 180                        if (archive_reachable($repo) &&
 181                                !defined $arch_branches{$branch}) {
 182                            $arch_branches{$branch} = $stage + 1;
 183                        }
 184                    }
 185                } else {
 186                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
 187                }
 188            }
 189        }
 190
 191        if (%ps && !exists $psets{ $ps{id} }) {
 192            my %temp = %ps;         # break references
 193            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
 194                $temp{parent_id} = $psets[$#psets]{id};
 195            }
 196            push (@psets, \%temp);  
 197            $psets{ $temp{id} } = \%temp;
 198        }    
 199        
 200        close ABROWSE or die "$TLA abrowse failed on $limit\n";
 201    }
 202}                               # end foreach $root
 203
 204do_abrowse(1);
 205my $depth = 2;
 206$opt_D ||= 0;
 207while ($depth <= $opt_D) {
 208    do_abrowse($depth);
 209    $depth++;
 210}
 211
 212## Order patches by time
 213# FIXME see if we can find a more optimal way to do this by graphing
 214# the ancestry data and walking it, that way we won't have to rely on
 215# client-supplied dates
 216@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 217
 218#print Dumper \@psets;
 219
 220##
 221## TODO cleanup irrelevant patches
 222##      and put an initial import
 223##      or a full tag
 224my $import = 0;
 225unless (-d $git_dir) { # initial import
 226    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 227        print "Starting import from $psets[0]{id}\n";
 228        `git-init-db`;
 229        die $! if $?;
 230        $import = 1;
 231    } else {
 232        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 233    }
 234} else {    # progressing an import
 235    # load the rptags
 236    opendir(DIR, $ptag_dir)
 237        || die "can't opendir: $!";
 238    while (my $file = readdir(DIR)) {
 239        # skip non-interesting-files
 240        next unless -f "$ptag_dir/$file";
 241   
 242        # convert first '--' to '/' from old git-archimport to use
 243        # as an archivename/c--b--v private tag
 244        if ($file !~ m!,!) {
 245            my $oldfile = $file;
 246            $file =~ s!--!,!;
 247            print STDERR "converting old tag $oldfile to $file\n";
 248            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 249        }
 250        my $sha = ptag($file);
 251        chomp $sha;
 252        $rptags{$sha} = $file;
 253    }
 254    closedir DIR;
 255}
 256
 257# process patchsets
 258# extract the Arch repository name (Arch "archive" in Arch-speak)
 259sub extract_reponame {
 260    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 261    return (split(/\//, $fq_cvbr))[0];
 262}
 263 
 264sub extract_versionname {
 265    my $name = shift;
 266    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 267    return $name;
 268}
 269
 270# convert a fully-qualified revision or version to a unique dirname:
 271#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 272# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 273#
 274# the git notion of a branch is closer to
 275# archive/category--branch--version than archive/category--branch, so we
 276# use this to convert to git branch names.
 277# Also, keep archive names but replace '/' with ',' since it won't require
 278# subdirectories, and is safer than swapping '--' which could confuse
 279# reverse-mapping when dealing with bastard branches that
 280# are just archive/category--version  (no --branch)
 281sub tree_dirname {
 282    my $revision = shift;
 283    my $name = extract_versionname($revision);
 284    $name =~ s#/#,#;
 285    return $name;
 286}
 287
 288# old versions of git-archimport just use the <category--branch> part:
 289sub old_style_branchname {
 290    my $id = shift;
 291    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 292    chomp $ret;
 293    return $ret;
 294}
 295
 296*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 297
 298sub process_patchset_accurate {
 299    my $ps = shift;
 300    
 301    # switch to that branch if we're not already in that branch:
 302    if (-e "$git_dir/refs/heads/$ps->{branch}") {
 303       system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
 304
 305       # remove any old stuff that got leftover:
 306       my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 307       rmtree(split(/\0/,$rm)) if $rm;
 308    }
 309    
 310    # Apply the import/changeset/merge into the working tree
 311    my $dir = sync_to_ps($ps);
 312    # read the new log entry:
 313    my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
 314    die "Error in cat-log: $!" if $?;
 315    chomp @commitlog;
 316
 317    # grab variables we want from the log, new fields get added to $ps:
 318    # (author, date, email, summary, message body ...)
 319    parselog($ps, \@commitlog);
 320
 321    if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
 322        # this should work when importing continuations 
 323        if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
 324            
 325            # find where we are supposed to branch from
 326            system('git-checkout','-f','-b',$ps->{branch},
 327                            $branchpoint) == 0 or die "$! $?\n";
 328            
 329            # remove any old stuff that got leftover:
 330            my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 331            rmtree(split(/\0/,$rm)) if $rm;
 332
 333            # If we trust Arch with the fact that this is just 
 334            # a tag, and it does not affect the state of the tree
 335            # then we just tag and move on
 336            tag($ps->{id}, $branchpoint);
 337            ptag($ps->{id}, $branchpoint);
 338            print " * Tagged $ps->{id} at $branchpoint\n";
 339            return 0;
 340        } else {
 341            warn "Tagging from unknown id unsupported\n" if $ps->{tag};
 342        }
 343        # allow multiple bases/imports here since Arch supports cherry-picks
 344        # from unrelated trees
 345    } 
 346    
 347    # update the index with all the changes we got
 348    system('git-ls-files --others -z | '.
 349            'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 350    system('git-ls-files --deleted -z | '.
 351            'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 352    system('git-ls-files -z | '.
 353             'git-update-index -z --stdin') == 0 or die "$! $?\n";
 354    return 1;
 355}
 356
 357# the native changeset processing strategy.  This is very fast, but
 358# does not handle permissions or any renames involving directories
 359sub process_patchset_fast {
 360    my $ps = shift;
 361    # 
 362    # create the branch if needed
 363    #
 364    if ($ps->{type} eq 'i' && !$import) {
 365        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 366    }
 367
 368    unless ($import) { # skip for import
 369        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 370            # we know about this branch
 371            system('git-checkout',$ps->{branch});
 372        } else {
 373            # new branch! we need to verify a few things
 374            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 375            my $branchpoint = ptag($ps->{tag});
 376            die "Tagging from unknown id unsupported: $ps->{tag}" 
 377                unless $branchpoint;
 378            
 379            # find where we are supposed to branch from
 380            system('git-checkout','-b',$ps->{branch},$branchpoint);
 381
 382            # If we trust Arch with the fact that this is just 
 383            # a tag, and it does not affect the state of the tree
 384            # then we just tag and move on
 385            tag($ps->{id}, $branchpoint);
 386            ptag($ps->{id}, $branchpoint);
 387            print " * Tagged $ps->{id} at $branchpoint\n";
 388            return 0;
 389        } 
 390        die $! if $?;
 391    } 
 392
 393    #
 394    # Apply the import/changeset/merge into the working tree
 395    # 
 396    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 397        apply_import($ps) or die $!;
 398        $stats{import_or_tag}++;
 399        $import=0;
 400    } elsif ($ps->{type} eq 's') {
 401        apply_cset($ps);
 402        $stats{simple_changeset}++;
 403    }
 404
 405    #
 406    # prepare update git's index, based on what arch knows
 407    # about the pset, resolve parents, etc
 408    #
 409    
 410    my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 411    die "Error in cat-archive-log: $!" if $?;
 412        
 413    parselog($ps,\@commitlog);
 414
 415    # imports don't give us good info
 416    # on added files. Shame on them
 417    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 418        system('git-ls-files --others -z | '.
 419                'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 420        system('git-ls-files --deleted -z | '.
 421                'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 422    }
 423
 424    # TODO: handle removed_directories and renamed_directories:
 425   
 426    if (my $add = $ps->{new_files}) {
 427        while (@$add) {
 428            my @slice = splice(@$add, 0, 100);
 429            system('git-update-index','--add','--',@slice) == 0 or
 430                            die "Error in git-update-index --add: $! $?\n";
 431        }
 432    }
 433   
 434    if (my $del = $ps->{removed_files}) {
 435        unlink @$del;
 436        while (@$del) {
 437            my @slice = splice(@$del, 0, 100);
 438            system('git-update-index','--remove','--',@slice) == 0 or
 439                            die "Error in git-update-index --remove: $! $?\n";
 440        }
 441    }
 442
 443    if (my $ren = $ps->{renamed_files}) {                # renamed
 444        if (@$ren % 2) {
 445            die "Odd number of entries in rename!?";
 446        }
 447        
 448        while (@$ren) {
 449            my $from = shift @$ren;
 450            my $to   = shift @$ren;           
 451
 452            unless (-d dirname($to)) {
 453                mkpath(dirname($to)); # will die on err
 454            }
 455            # print "moving $from $to";
 456            rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
 457            system('git-update-index','--remove','--',$from) == 0 or
 458                            die "Error in git-update-index --remove: $! $?\n";
 459            system('git-update-index','--add','--',$to) == 0 or
 460                            die "Error in git-update-index --add: $! $?\n";
 461        }
 462    }
 463
 464    if (my $mod = $ps->{modified_files}) {
 465        while (@$mod) {
 466            my @slice = splice(@$mod, 0, 100);
 467            system('git-update-index','--',@slice) == 0 or
 468                            die "Error in git-update-index: $! $?\n";
 469        }
 470    }
 471    return 1; # we successfully applied the changeset
 472}
 473
 474if ($opt_f) {
 475    print "Will import patchsets using the fast strategy\n",
 476            "Renamed directories and permission changes will be missed\n";
 477    *process_patchset = *process_patchset_fast;
 478} else {
 479    print "Using the default (accurate) import strategy.\n",
 480            "Things may be a bit slow\n";
 481    *process_patchset = *process_patchset_accurate;
 482}
 483    
 484foreach my $ps (@psets) {
 485    # process patchsets
 486    $ps->{branch} = git_branchname($ps->{id});
 487
 488    #
 489    # ensure we have a clean state 
 490    # 
 491    if (my $dirty = `git-diff-files`) {
 492        die "Unclean tree when about to process $ps->{id} " .
 493            " - did we fail to commit cleanly before?\n$dirty";
 494    }
 495    die $! if $?;
 496    
 497    #
 498    # skip commits already in repo
 499    #
 500    if (ptag($ps->{id})) {
 501      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 502      return 0;
 503    }
 504
 505    print " * Starting to work on $ps->{id}\n";
 506
 507    process_patchset($ps) or next;
 508
 509    # warn "errors when running git-update-index! $!";
 510    my $tree = `git-write-tree`;
 511    die "cannot write tree $!" if $?;
 512    chomp $tree;
 513    
 514    #
 515    # Who's your daddy?
 516    #
 517    my @par;
 518    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 519        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 520            my $p = <HEAD>;
 521            close HEAD;
 522            chomp $p;
 523            push @par, '-p', $p;
 524        } else { 
 525            if ($ps->{type} eq 's') {
 526                warn "Could not find the right head for the branch $ps->{branch}";
 527            }
 528        }
 529    }
 530    
 531    if ($ps->{merges}) {
 532        push @par, find_parents($ps);
 533    }
 534
 535    #    
 536    # Commit, tag and clean state
 537    #
 538    $ENV{TZ}                  = 'GMT';
 539    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 540    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 541    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 542    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 543    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 544    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 545
 546    my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 547        or die $!;
 548    print WRITER $ps->{summary},"\n";
 549    print WRITER $ps->{message},"\n";
 550    
 551    # make it easy to backtrack and figure out which Arch revision this was:
 552    print WRITER 'git-archimport-id: ',$ps->{id},"\n";
 553    
 554    close WRITER;
 555    my $commitid = <READER>;    # read
 556    chomp $commitid;
 557    close READER;
 558    waitpid $pid,0;             # close;
 559
 560    if (length $commitid != 40) {
 561        die "Something went wrong with the commit! $! $commitid";
 562    }
 563    #
 564    # Update the branch
 565    # 
 566    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 567    print HEAD $commitid;
 568    close HEAD;
 569    system('git-update-ref', 'HEAD', "$ps->{branch}");
 570
 571    # tag accordingly
 572    ptag($ps->{id}, $commitid); # private tag
 573    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 574        tag($ps->{id}, $commitid);
 575    }
 576    print " * Committed $ps->{id}\n";
 577    print "   + tree   $tree\n";
 578    print "   + commit $commitid\n";
 579    $opt_v && print "   + commit date is  $ps->{date} \n";
 580    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 581    if (my $dirty = `git-diff-files`) {
 582        die "22 Unclean tree when about to process $ps->{id} " .
 583            " - did we fail to commit cleanly before?\n$dirty";
 584    }
 585}
 586
 587if ($opt_v) {
 588    foreach (sort keys %stats) {
 589        print" $_: $stats{$_}\n";
 590    }
 591}
 592exit 0;
 593
 594# used by the accurate strategy:
 595sub sync_to_ps {
 596    my $ps = shift;
 597    my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
 598    
 599    $opt_v && print "sync_to_ps($ps->{id}) method: ";
 600
 601    if (-d $tree_dir) {
 602        if ($ps->{type} eq 't') {
 603            $opt_v && print "get (tag)\n";
 604            # looks like a tag-only or (worse,) a mixed tags/changeset branch,
 605            # can't rely on replay to work correctly on these
 606            rmtree($tree_dir);
 607            safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 608            $stats{get_tag}++;
 609        } else {
 610                my $tree_id = arch_tree_id($tree_dir);
 611                if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
 612                    # the common case (hopefully)
 613                    $opt_v && print "replay\n";
 614                    safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
 615                    $stats{replay}++;
 616                } else {
 617                    # getting one tree is usually faster than getting two trees
 618                    # and applying the delta ...
 619                    rmtree($tree_dir);
 620                    $opt_v && print "apply-delta\n";
 621                    safe_pipe_capture($TLA,'get','--no-pristine',
 622                                        $ps->{id},$tree_dir);
 623                    $stats{get_delta}++;
 624                }
 625        }
 626    } else {
 627        # new branch work
 628        $opt_v && print "get (new tree)\n";
 629        safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 630        $stats{get_new}++;
 631    }
 632   
 633    # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
 634    system('rsync','-aI','--delete','--exclude',$git_dir,
 635#               '--exclude','.arch-inventory',
 636                '--exclude','.arch-ids','--exclude','{arch}',
 637                '--exclude','+*','--exclude',',*',
 638                "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
 639    return $tree_dir;
 640}
 641
 642sub apply_import {
 643    my $ps = shift;
 644    my $bname = git_branchname($ps->{id});
 645
 646    mkpath($tmp);
 647
 648    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 649    die "Cannot get import: $!" if $?;    
 650    system('rsync','-aI','--delete', '--exclude',$git_dir,
 651                '--exclude','.arch-ids','--exclude','{arch}',
 652                "$tmp/import/", './');
 653    die "Cannot rsync import:$!" if $?;
 654    
 655    rmtree("$tmp/import");
 656    die "Cannot remove tempdir: $!" if $?;
 657    
 658
 659    return 1;
 660}
 661
 662sub apply_cset {
 663    my $ps = shift;
 664
 665    mkpath($tmp);
 666
 667    # get the changeset
 668    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 669    die "Cannot get changeset: $!" if $?;
 670    
 671    # apply patches
 672    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 673        # this can be sped up considerably by doing
 674        #    (find | xargs cat) | patch
 675        # but that cna get mucked up by patches
 676        # with missing trailing newlines or the standard 
 677        # 'missing newline' flag in the patch - possibly
 678        # produced with an old/buggy diff.
 679        # slow and safe, we invoke patch once per patchfile
 680        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 681        die "Problem applying patches! $!" if $?;
 682    }
 683
 684    # apply changed binary files
 685    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 686        foreach my $mod (@modified) {
 687            chomp $mod;
 688            my $orig = $mod;
 689            $orig =~ s/\.modified$//; # lazy
 690            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 691            #print "rsync -p '$mod' '$orig'";
 692            system('rsync','-p',$mod,"./$orig");
 693            die "Problem applying binary changes! $!" if $?;
 694        }
 695    }
 696
 697    # bring in new files
 698    system('rsync','-aI','--exclude',$git_dir,
 699                '--exclude','.arch-ids',
 700                '--exclude', '{arch}',
 701                "$tmp/changeset/new-files-archive/",'./');
 702
 703    # deleted files are hinted from the commitlog processing
 704
 705    rmtree("$tmp/changeset");
 706}
 707
 708
 709# =for reference
 710# notes: *-files/-directories keys cannot have spaces, they're always
 711# pika-escaped.  Everything after the first newline
 712# A log entry looks like:
 713# Revision: moodle-org--moodle--1.3.3--patch-15
 714# Archive: arch-eduforge@catalyst.net.nz--2004
 715# Creator: Penny Leach <penny@catalyst.net.nz>
 716# Date: Wed May 25 14:15:34 NZST 2005
 717# Standard-date: 2005-05-25 02:15:34 GMT
 718# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 719#     lang/de/.arch-ids/block_html.php.id
 720# New-directories: lang/de/help/questionnaire
 721#     lang/de/help/questionnaire/.arch-ids
 722# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 723#    db_sears.sql db/db_sears.sql
 724# Removed-files: lang/be/docs/.arch-ids/release.html.id
 725#     lang/be/docs/.arch-ids/releaseold.html.id
 726# Modified-files: admin/cron.php admin/delete.php
 727#     admin/editor.html backup/lib.php backup/restore.php
 728# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 729# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 730#   summary can be multiline with a leading space just like the above fields
 731# Keywords:
 732#
 733# Updating yadda tadda tadda madda
 734sub parselog {
 735    my ($ps, $log) = @_;
 736    my $key = undef;
 737
 738    # headers we want that contain filenames:
 739    my %want_headers = (
 740        new_files => 1,
 741        modified_files => 1,
 742        renamed_files => 1,
 743        renamed_directories => 1,
 744        removed_files => 1,
 745        removed_directories => 1,
 746    );
 747    
 748    chomp (@$log);
 749    while ($_ = shift @$log) {
 750        if (/^Continuation-of:\s*(.*)/) {
 751            $ps->{tag} = $1;
 752            $key = undef;
 753        } elsif (/^Summary:\s*(.*)$/ ) {
 754            # summary can be multiline as long as it has a leading space
 755            $ps->{summary} = [ $1 ];
 756            $key = 'summary';
 757        } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
 758            $ps->{author} = $1;
 759            $ps->{email} = $2;
 760            $key = undef;
 761        # any *-files or *-directories can be read here:
 762        } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
 763            my $val = $2;
 764            $key = lc $1;
 765            $key =~ tr/-/_/; # too lazy to quote :P
 766            if ($want_headers{$key}) {
 767                push @{$ps->{$key}}, split(/\s+/, $val);
 768            } else {
 769                $key = undef;
 770            }
 771        } elsif (/^$/) {
 772            last; # remainder of @$log that didn't get shifted off is message
 773        } elsif ($key) {
 774            if (/^\s+(.*)$/) {
 775                if ($key eq 'summary') {
 776                    push @{$ps->{$key}}, $1;
 777                } else { # files/directories:
 778                    push @{$ps->{$key}}, split(/\s+/, $1);
 779                }
 780            } else {
 781                $key = undef;
 782            }
 783        }
 784    }
 785   
 786    # post-processing:
 787    $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
 788    $ps->{message} = join("\n",@$log);
 789    
 790    # skip Arch control files, unescape pika-escaped files
 791    foreach my $k (keys %want_headers) {
 792        next unless (defined $ps->{$k});
 793        my @tmp;
 794        foreach my $t (@{$ps->{$k}}) {
 795           next unless length ($t);
 796           next if $t =~ m!\{arch\}/!;
 797           next if $t =~ m!\.arch-ids/!;
 798           # should we skip this?
 799           next if $t =~ m!\.arch-inventory$!;
 800           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 801           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 802           if ($t =~ /\\/ ){
 803               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 804           }
 805           push @tmp, $t;
 806        }
 807        $ps->{$k} = \@tmp if scalar @tmp;
 808    }
 809}
 810
 811# write/read a tag
 812sub tag {
 813    my ($tag, $commit) = @_;
 814 
 815    if ($opt_o) {
 816        $tag =~ s|/|--|g;
 817    } else {
 818        # don't use subdirs for tags yet, it could screw up other porcelains
 819        $tag =~ s|/|,|g;
 820    }
 821    
 822    if ($commit) {
 823        open(C,">","$git_dir/refs/tags/$tag")
 824            or die "Cannot create tag $tag: $!\n";
 825        print C "$commit\n"
 826            or die "Cannot write tag $tag: $!\n";
 827        close(C)
 828            or die "Cannot write tag $tag: $!\n";
 829        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 830    } else {                    # read
 831        open(C,"<","$git_dir/refs/tags/$tag")
 832            or die "Cannot read tag $tag: $!\n";
 833        $commit = <C>;
 834        chomp $commit;
 835        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 836        close(C)
 837            or die "Cannot read tag $tag: $!\n";
 838        return $commit;
 839    }
 840}
 841
 842# write/read a private tag
 843# reads fail softly if the tag isn't there
 844sub ptag {
 845    my ($tag, $commit) = @_;
 846
 847    # don't use subdirs for tags yet, it could screw up other porcelains
 848    $tag =~ s|/|,|g; 
 849    
 850    my $tag_file = "$ptag_dir/$tag";
 851    my $tag_branch_dir = dirname($tag_file);
 852    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 853
 854    if ($commit) {              # write
 855        open(C,">",$tag_file)
 856            or die "Cannot create tag $tag: $!\n";
 857        print C "$commit\n"
 858            or die "Cannot write tag $tag: $!\n";
 859        close(C)
 860            or die "Cannot write tag $tag: $!\n";
 861        $rptags{$commit} = $tag 
 862            unless $tag =~ m/--base-0$/;
 863    } else {                    # read
 864        # if the tag isn't there, return 0
 865        unless ( -s $tag_file) {
 866            return 0;
 867        }
 868        open(C,"<",$tag_file)
 869            or die "Cannot read tag $tag: $!\n";
 870        $commit = <C>;
 871        chomp $commit;
 872        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 873        close(C)
 874            or die "Cannot read tag $tag: $!\n";
 875        unless (defined $rptags{$commit}) {
 876            $rptags{$commit} = $tag;
 877        }
 878        return $commit;
 879    }
 880}
 881
 882sub find_parents {
 883    #
 884    # Identify what branches are merging into me
 885    # and whether we are fully merged
 886    # git-merge-base <headsha> <headsha> should tell
 887    # me what the base of the merge should be 
 888    #
 889    my $ps = shift;
 890
 891    my %branches; # holds an arrayref per branch
 892                  # the arrayref contains a list of
 893                  # merged patches between the base
 894                  # of the merge and the current head
 895
 896    my @parents;  # parents found for this commit
 897
 898    # simple loop to split the merges
 899    # per branch
 900    foreach my $merge (@{$ps->{merges}}) {
 901        my $branch = git_branchname($merge);
 902        unless (defined $branches{$branch} ){
 903            $branches{$branch} = [];
 904        }
 905        push @{$branches{$branch}}, $merge;
 906    }
 907
 908    #
 909    # foreach branch find a merge base and walk it to the 
 910    # head where we are, collecting the merged patchsets that
 911    # Arch has recorded. Keep that in @have
 912    # Compare that with the commits on the other branch
 913    # between merge-base and the tip of the branch (@need)
 914    # and see if we have a series of consecutive patches
 915    # starting from the merge base. The tip of the series
 916    # of consecutive patches merged is our new parent for 
 917    # that branch.
 918    #
 919    foreach my $branch (keys %branches) {
 920
 921        # check that we actually know about the branch
 922        next unless -e "$git_dir/refs/heads/$branch";
 923
 924        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 925        if ($?) { 
 926            # Don't die here, Arch supports one-way cherry-picking
 927            # between branches with no common base (or any relationship
 928            # at all beforehand)
 929            warn "Cannot find merge base for $branch and $ps->{branch}";
 930            next;
 931        }
 932        chomp $mergebase;
 933
 934        # now walk up to the mergepoint collecting what patches we have
 935        my $branchtip = git_rev_parse($ps->{branch});
 936        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 937        my %have; # collected merges this branch has
 938        foreach my $merge (@{$ps->{merges}}) {
 939            $have{$merge} = 1;
 940        }
 941        my %ancestorshave;
 942        foreach my $par (@ancestors) {
 943            $par = commitid2pset($par);
 944            if (defined $par->{merges}) {
 945                foreach my $merge (@{$par->{merges}}) {
 946                    $ancestorshave{$merge}=1;
 947                }
 948            }
 949        }
 950        # print "++++ Merges in $ps->{id} are....\n";
 951        # my @have = sort keys %have;   print Dumper(\@have);
 952
 953        # merge what we have with what ancestors have
 954        %have = (%have, %ancestorshave);
 955
 956        # see what the remote branch has - these are the merges we 
 957        # will want to have in a consecutive series from the mergebase
 958        my $otherbranchtip = git_rev_parse($branch);
 959        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 960        my @need;
 961        foreach my $needps (@needraw) {         # get the psets
 962            $needps = commitid2pset($needps);
 963            # git-rev-list will also
 964            # list commits merged in via earlier 
 965            # merges. we are only interested in commits
 966            # from the branch we're looking at
 967            if ($branch eq $needps->{branch}) {
 968                push @need, $needps->{id};
 969            }
 970        }
 971
 972        # print "++++ Merges from $branch we want are....\n";
 973        # print Dumper(\@need);
 974
 975        my $newparent;
 976        while (my $needed_commit = pop @need) {
 977            if ($have{$needed_commit}) {
 978                $newparent = $needed_commit;
 979            } else {
 980                last; # break out of the while
 981            }
 982        }
 983        if ($newparent) {
 984            push @parents, $newparent;
 985        }
 986
 987
 988    } # end foreach branch
 989
 990    # prune redundant parents
 991    my %parents;
 992    foreach my $p (@parents) {
 993        $parents{$p} = 1;
 994    }
 995    foreach my $p (@parents) {
 996        next unless exists $psets{$p}{merges};
 997        next unless ref    $psets{$p}{merges};
 998        my @merges = @{$psets{$p}{merges}};
 999        foreach my $merge (@merges) {
1000            if ($parents{$merge}) { 
1001                delete $parents{$merge};
1002            }
1003        }
1004    }
1005
1006    @parents = ();
1007    foreach (keys %parents) {
1008        push @parents, '-p', ptag($_);
1009    }
1010    return @parents;
1011}
1012
1013sub git_rev_parse {
1014    my $name = shift;
1015    my $val  = `git-rev-parse $name`;
1016    die "Error: git-rev-parse $name" if $?;
1017    chomp $val;
1018    return $val;
1019}
1020
1021# resolve a SHA1 to a known patchset
1022sub commitid2pset {
1023    my $commitid = shift;
1024    chomp $commitid;
1025    my $name = $rptags{$commitid} 
1026        || die "Cannot find reverse tag mapping for $commitid";
1027    $name =~ s|,|/|;
1028    my $ps   = $psets{$name} 
1029        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1030    return $ps;
1031}
1032
1033
1034# an alterative to `command` that allows input to be passed as an array
1035# to work around shell problems with weird characters in arguments
1036sub safe_pipe_capture {
1037    my @output;
1038    if (my $pid = open my $child, '-|') {
1039        @output = (<$child>);
1040        close $child or die join(' ',@_).": $! $?";
1041    } else {
1042        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1043    }
1044    return wantarray ? @output : join('',@output);
1045}
1046
1047# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1048sub arch_tree_id {
1049    my $dir = shift;
1050    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1051    return $ret;
1052}
1053
1054sub archive_reachable {
1055    my $archive = shift;
1056    return 1 if $reachable{$archive};
1057    return 0 if $unreachable{$archive};
1058    
1059    if (system "$TLA whereis-archive $archive >/dev/null") {
1060        if ($opt_a && (system($TLA,'register-archive',
1061                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1062            $reachable{$archive} = 1;
1063            return 1;
1064        }
1065        print STDERR "Archive is unreachable: $archive\n";
1066        $unreachable{$archive} = 1;
1067        return 0;
1068    } else {
1069        $reachable{$archive} = 1;
1070        return 1;
1071    }
1072}
1073