git-archimport.perlon commit Add howto about separating topics. (5b766ea)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] 
  13        [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  14
  15Imports a project from one or more Arch repositories. It will follow branches
  16and repositories within the namespaces defined by the <archive/branch>
  17parameters suppplied. If it cannot find the remote branch a merge comes from
  18it will just import it as a regular commit. If it can find it, it will mark it 
  19as a merge whenever possible.
  20
  21See man (1) git-archimport for more details.
  22
  23=head1 TODO
  24
  25 - create tag objects instead of ref tags
  26 - audit shell-escaping of filenames
  27 - hide our private tags somewhere smarter
  28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  29 - sort and apply patches by graphing ancestry relations instead of just
  30   relying in dates supplied in the changeset itself.
  31   tla ancestry-graph -m could be helpful here...
  32
  33=head1 Devel tricks
  34
  35Add print in front of the shell commands invoked via backticks. 
  36
  37=head1 Devel Notes
  38
  39There are several places where Arch and git terminology are intermixed
  40and potentially confused.
  41
  42The notion of a "branch" in git is approximately equivalent to
  43a "archive/category--branch--version" in Arch.  Also, it should be noted
  44that the "--branch" portion of "archive/category--branch--version" is really
  45optional in Arch although not many people (nor tools!) seem to know this.
  46This means that "archive/category--version" is also a valid "branch"
  47in git terms.
  48
  49We always refer to Arch names by their fully qualified variant (which
  50means the "archive" name is prefixed.
  51
  52For people unfamiliar with Arch, an "archive" is the term for "repository",
  53and can contain multiple, unrelated branches.
  54
  55=cut
  56
  57use strict;
  58use warnings;
  59use Getopt::Std;
  60use File::Temp qw(tempdir);
  61use File::Path qw(mkpath rmtree);
  62use File::Basename qw(basename dirname);
  63use Data::Dumper qw/ Dumper /;
  64use IPC::Open2;
  65
  66$SIG{'PIPE'}="IGNORE";
  67$ENV{'TZ'}="UTC";
  68
  69my $git_dir = $ENV{"GIT_DIR"} || ".git";
  70$ENV{"GIT_DIR"} = $git_dir;
  71my $ptag_dir = "$git_dir/archimport/tags";
  72
  73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
  74
  75sub usage() {
  76    print STDERR <<END;
  77Usage: ${\basename $0}     # fetch/update GIT from Arch
  78       [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
  79       repository/arch-branch [ repository/arch-branch] ...
  80END
  81    exit(1);
  82}
  83
  84getopts("fThvat:D:") or usage();
  85usage if $opt_h;
  86
  87@ARGV >= 1 or usage();
  88# $arch_branches:
  89# values associated with keys:
  90#   =1 - Arch version / git 'branch' detected via abrowse on a limit
  91#   >1 - Arch version / git 'branch' of an auxilliary branch we've merged
  92my %arch_branches = map { $_ => 1 } @ARGV;
  93
  94$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  95my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  96$opt_v && print "+ Using $tmp as temporary directory\n";
  97
  98my %reachable = ();             # Arch repositories we can access
  99my %unreachable = ();           # Arch repositories we can't access :<
 100my @psets  = ();                # the collection
 101my %psets  = ();                # the collection, by name
 102my %stats  = (                  # Track which strategy we used to import:
 103        get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
 104        simple_changeset => 0, import_or_tag => 0
 105);
 106
 107my %rptags = ();                # my reverse private tags
 108                                # to map a SHA1 to a commitid
 109my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 110
 111sub do_abrowse {
 112    my $stage = shift;
 113    while (my ($limit, $level) = each %arch_branches) {
 114        next unless $level == $stage;
 115        
 116        open ABROWSE, "$TLA abrowse -fkD --merges $limit |" 
 117                                or die "Problems with tla abrowse: $!";
 118    
 119        my %ps        = ();         # the current one
 120        my $lastseen  = '';
 121    
 122        while (<ABROWSE>) {
 123            chomp;
 124            
 125            # first record padded w 8 spaces
 126            if (s/^\s{8}\b//) {
 127                my ($id, $type) = split(m/\s+/, $_, 2);
 128
 129                my %last_ps;
 130                # store the record we just captured
 131                if (%ps && !exists $psets{ $ps{id} }) {
 132                    %last_ps = %ps; # break references
 133                    push (@psets, \%last_ps);
 134                    $psets{ $last_ps{id} } = \%last_ps;
 135                }
 136                
 137                my $branch = extract_versionname($id);
 138                %ps = ( id => $id, branch => $branch );
 139                if (%last_ps && ($last_ps{branch} eq $branch)) {
 140                    $ps{parent_id} = $last_ps{id};
 141                }
 142                
 143                $arch_branches{$branch} = 1;
 144                $lastseen = 'id';
 145
 146                # deal with types (should work with baz or tla):
 147                if ($type =~ m/\(.*changeset\)/) {
 148                    $ps{type} = 's';
 149                } elsif ($type =~ /\(.*import\)/) {
 150                    $ps{type} = 'i';
 151                } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
 152                    $ps{type} = 't';
 153                    # read which revision we've tagged when we parse the log
 154                    $ps{tag}  = $1;
 155                } else { 
 156                    warn "Unknown type $type";
 157                }
 158
 159                $arch_branches{$branch} = 1;
 160                $lastseen = 'id';
 161            } elsif (s/^\s{10}//) { 
 162                # 10 leading spaces or more 
 163                # indicate commit metadata
 164                
 165                # date
 166                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
 167                    $ps{date}   = $1;
 168                    $lastseen = 'date';
 169                } elsif ($_ eq 'merges in:') {
 170                    $ps{merges} = [];
 171                    $lastseen = 'merges';
 172                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 173                    my $id = $_;
 174                    push (@{$ps{merges}}, $id);
 175                   
 176                    # aggressive branch finding:
 177                    if ($opt_D) {
 178                        my $branch = extract_versionname($id);
 179                        my $repo = extract_reponame($branch);
 180                        
 181                        if (archive_reachable($repo) &&
 182                                !defined $arch_branches{$branch}) {
 183                            $arch_branches{$branch} = $stage + 1;
 184                        }
 185                    }
 186                } else {
 187                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
 188                }
 189            }
 190        }
 191
 192        if (%ps && !exists $psets{ $ps{id} }) {
 193            my %temp = %ps;         # break references
 194            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
 195                $temp{parent_id} = $psets[$#psets]{id};
 196            }
 197            push (@psets, \%temp);  
 198            $psets{ $temp{id} } = \%temp;
 199        }    
 200        
 201        close ABROWSE or die "$TLA abrowse failed on $limit\n";
 202    }
 203}                               # end foreach $root
 204
 205do_abrowse(1);
 206my $depth = 2;
 207$opt_D ||= 0;
 208while ($depth <= $opt_D) {
 209    do_abrowse($depth);
 210    $depth++;
 211}
 212
 213## Order patches by time
 214# FIXME see if we can find a more optimal way to do this by graphing
 215# the ancestry data and walking it, that way we won't have to rely on
 216# client-supplied dates
 217@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 218
 219#print Dumper \@psets;
 220
 221##
 222## TODO cleanup irrelevant patches
 223##      and put an initial import
 224##      or a full tag
 225my $import = 0;
 226unless (-d $git_dir) { # initial import
 227    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 228        print "Starting import from $psets[0]{id}\n";
 229        `git-init-db`;
 230        die $! if $?;
 231        $import = 1;
 232    } else {
 233        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 234    }
 235} else {    # progressing an import
 236    # load the rptags
 237    opendir(DIR, $ptag_dir)
 238        || die "can't opendir: $!";
 239    while (my $file = readdir(DIR)) {
 240        # skip non-interesting-files
 241        next unless -f "$ptag_dir/$file";
 242   
 243        # convert first '--' to '/' from old git-archimport to use
 244        # as an archivename/c--b--v private tag
 245        if ($file !~ m!,!) {
 246            my $oldfile = $file;
 247            $file =~ s!--!,!;
 248            print STDERR "converting old tag $oldfile to $file\n";
 249            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 250        }
 251        my $sha = ptag($file);
 252        chomp $sha;
 253        $rptags{$sha} = $file;
 254    }
 255    closedir DIR;
 256}
 257
 258# process patchsets
 259# extract the Arch repository name (Arch "archive" in Arch-speak)
 260sub extract_reponame {
 261    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 262    return (split(/\//, $fq_cvbr))[0];
 263}
 264 
 265sub extract_versionname {
 266    my $name = shift;
 267    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 268    return $name;
 269}
 270
 271# convert a fully-qualified revision or version to a unique dirname:
 272#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 273# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 274#
 275# the git notion of a branch is closer to
 276# archive/category--branch--version than archive/category--branch, so we
 277# use this to convert to git branch names.
 278# Also, keep archive names but replace '/' with ',' since it won't require
 279# subdirectories, and is safer than swapping '--' which could confuse
 280# reverse-mapping when dealing with bastard branches that
 281# are just archive/category--version  (no --branch)
 282sub tree_dirname {
 283    my $revision = shift;
 284    my $name = extract_versionname($revision);
 285    $name =~ s#/#,#;
 286    return $name;
 287}
 288
 289# old versions of git-archimport just use the <category--branch> part:
 290sub old_style_branchname {
 291    my $id = shift;
 292    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 293    chomp $ret;
 294    return $ret;
 295}
 296
 297*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 298
 299sub process_patchset_accurate {
 300    my $ps = shift;
 301    
 302    # switch to that branch if we're not already in that branch:
 303    if (-e "$git_dir/refs/heads/$ps->{branch}") {
 304       system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
 305
 306       # remove any old stuff that got leftover:
 307       my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 308       rmtree(split(/\0/,$rm)) if $rm;
 309    }
 310    
 311    # Apply the import/changeset/merge into the working tree
 312    my $dir = sync_to_ps($ps);
 313    # read the new log entry:
 314    my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
 315    die "Error in cat-log: $!" if $?;
 316    chomp @commitlog;
 317
 318    # grab variables we want from the log, new fields get added to $ps:
 319    # (author, date, email, summary, message body ...)
 320    parselog($ps, \@commitlog);
 321
 322    if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
 323        # this should work when importing continuations 
 324        if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
 325            
 326            # find where we are supposed to branch from
 327            system('git-checkout','-f','-b',$ps->{branch},
 328                            $branchpoint) == 0 or die "$! $?\n";
 329            
 330            # remove any old stuff that got leftover:
 331            my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 332            rmtree(split(/\0/,$rm)) if $rm;
 333
 334            # If we trust Arch with the fact that this is just 
 335            # a tag, and it does not affect the state of the tree
 336            # then we just tag and move on
 337            tag($ps->{id}, $branchpoint);
 338            ptag($ps->{id}, $branchpoint);
 339            print " * Tagged $ps->{id} at $branchpoint\n";
 340            return 0;
 341        } else {
 342            warn "Tagging from unknown id unsupported\n" if $ps->{tag};
 343        }
 344        # allow multiple bases/imports here since Arch supports cherry-picks
 345        # from unrelated trees
 346    } 
 347    
 348    # update the index with all the changes we got
 349    system('git-ls-files --others -z | '.
 350            'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 351    system('git-ls-files --deleted -z | '.
 352            'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 353    system('git-ls-files -z | '.
 354             'git-update-index -z --stdin') == 0 or die "$! $?\n";
 355    return 1;
 356}
 357
 358# the native changeset processing strategy.  This is very fast, but
 359# does not handle permissions or any renames involving directories
 360sub process_patchset_fast {
 361    my $ps = shift;
 362    # 
 363    # create the branch if needed
 364    #
 365    if ($ps->{type} eq 'i' && !$import) {
 366        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 367    }
 368
 369    unless ($import) { # skip for import
 370        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 371            # we know about this branch
 372            system('git-checkout',$ps->{branch});
 373        } else {
 374            # new branch! we need to verify a few things
 375            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 376            my $branchpoint = ptag($ps->{tag});
 377            die "Tagging from unknown id unsupported: $ps->{tag}" 
 378                unless $branchpoint;
 379            
 380            # find where we are supposed to branch from
 381            system('git-checkout','-b',$ps->{branch},$branchpoint);
 382
 383            # If we trust Arch with the fact that this is just 
 384            # a tag, and it does not affect the state of the tree
 385            # then we just tag and move on
 386            tag($ps->{id}, $branchpoint);
 387            ptag($ps->{id}, $branchpoint);
 388            print " * Tagged $ps->{id} at $branchpoint\n";
 389            return 0;
 390        } 
 391        die $! if $?;
 392    } 
 393
 394    #
 395    # Apply the import/changeset/merge into the working tree
 396    # 
 397    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 398        apply_import($ps) or die $!;
 399        $stats{import_or_tag}++;
 400        $import=0;
 401    } elsif ($ps->{type} eq 's') {
 402        apply_cset($ps);
 403        $stats{simple_changeset}++;
 404    }
 405
 406    #
 407    # prepare update git's index, based on what arch knows
 408    # about the pset, resolve parents, etc
 409    #
 410    
 411    my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 412    die "Error in cat-archive-log: $!" if $?;
 413        
 414    parselog($ps,\@commitlog);
 415
 416    # imports don't give us good info
 417    # on added files. Shame on them
 418    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 419        system('git-ls-files --others -z | '.
 420                'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 421        system('git-ls-files --deleted -z | '.
 422                'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 423    }
 424
 425    # TODO: handle removed_directories and renamed_directories:
 426   
 427    if (my $add = $ps->{new_files}) {
 428        while (@$add) {
 429            my @slice = splice(@$add, 0, 100);
 430            system('git-update-index','--add','--',@slice) == 0 or
 431                            die "Error in git-update-index --add: $! $?\n";
 432        }
 433    }
 434   
 435    if (my $del = $ps->{removed_files}) {
 436        unlink @$del;
 437        while (@$del) {
 438            my @slice = splice(@$del, 0, 100);
 439            system('git-update-index','--remove','--',@slice) == 0 or
 440                            die "Error in git-update-index --remove: $! $?\n";
 441        }
 442    }
 443
 444    if (my $ren = $ps->{renamed_files}) {                # renamed
 445        if (@$ren % 2) {
 446            die "Odd number of entries in rename!?";
 447        }
 448        
 449        while (@$ren) {
 450            my $from = shift @$ren;
 451            my $to   = shift @$ren;           
 452
 453            unless (-d dirname($to)) {
 454                mkpath(dirname($to)); # will die on err
 455            }
 456            # print "moving $from $to";
 457            rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
 458            system('git-update-index','--remove','--',$from) == 0 or
 459                            die "Error in git-update-index --remove: $! $?\n";
 460            system('git-update-index','--add','--',$to) == 0 or
 461                            die "Error in git-update-index --add: $! $?\n";
 462        }
 463    }
 464
 465    if (my $mod = $ps->{modified_files}) {
 466        while (@$mod) {
 467            my @slice = splice(@$mod, 0, 100);
 468            system('git-update-index','--',@slice) == 0 or
 469                            die "Error in git-update-index: $! $?\n";
 470        }
 471    }
 472    return 1; # we successfully applied the changeset
 473}
 474
 475if ($opt_f) {
 476    print "Will import patchsets using the fast strategy\n",
 477            "Renamed directories and permission changes will be missed\n";
 478    *process_patchset = *process_patchset_fast;
 479} else {
 480    print "Using the default (accurate) import strategy.\n",
 481            "Things may be a bit slow\n";
 482    *process_patchset = *process_patchset_accurate;
 483}
 484    
 485foreach my $ps (@psets) {
 486    # process patchsets
 487    $ps->{branch} = git_branchname($ps->{id});
 488
 489    #
 490    # ensure we have a clean state 
 491    # 
 492    if (my $dirty = `git-diff-files`) {
 493        die "Unclean tree when about to process $ps->{id} " .
 494            " - did we fail to commit cleanly before?\n$dirty";
 495    }
 496    die $! if $?;
 497    
 498    #
 499    # skip commits already in repo
 500    #
 501    if (ptag($ps->{id})) {
 502      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 503      next;
 504    }
 505
 506    print " * Starting to work on $ps->{id}\n";
 507
 508    process_patchset($ps) or next;
 509
 510    # warn "errors when running git-update-index! $!";
 511    my $tree = `git-write-tree`;
 512    die "cannot write tree $!" if $?;
 513    chomp $tree;
 514    
 515    #
 516    # Who's your daddy?
 517    #
 518    my @par;
 519    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 520        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 521            my $p = <HEAD>;
 522            close HEAD;
 523            chomp $p;
 524            push @par, '-p', $p;
 525        } else { 
 526            if ($ps->{type} eq 's') {
 527                warn "Could not find the right head for the branch $ps->{branch}";
 528            }
 529        }
 530    }
 531    
 532    if ($ps->{merges}) {
 533        push @par, find_parents($ps);
 534    }
 535
 536    #    
 537    # Commit, tag and clean state
 538    #
 539    $ENV{TZ}                  = 'GMT';
 540    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 541    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 542    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 543    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 544    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 545    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 546
 547    my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 548        or die $!;
 549    print WRITER $ps->{summary},"\n";
 550    print WRITER $ps->{message},"\n";
 551    
 552    # make it easy to backtrack and figure out which Arch revision this was:
 553    print WRITER 'git-archimport-id: ',$ps->{id},"\n";
 554    
 555    close WRITER;
 556    my $commitid = <READER>;    # read
 557    chomp $commitid;
 558    close READER;
 559    waitpid $pid,0;             # close;
 560
 561    if (length $commitid != 40) {
 562        die "Something went wrong with the commit! $! $commitid";
 563    }
 564    #
 565    # Update the branch
 566    # 
 567    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 568    print HEAD $commitid;
 569    close HEAD;
 570    system('git-update-ref', 'HEAD', "$ps->{branch}");
 571
 572    # tag accordingly
 573    ptag($ps->{id}, $commitid); # private tag
 574    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 575        tag($ps->{id}, $commitid);
 576    }
 577    print " * Committed $ps->{id}\n";
 578    print "   + tree   $tree\n";
 579    print "   + commit $commitid\n";
 580    $opt_v && print "   + commit date is  $ps->{date} \n";
 581    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 582}
 583
 584if ($opt_v) {
 585    foreach (sort keys %stats) {
 586        print" $_: $stats{$_}\n";
 587    }
 588}
 589exit 0;
 590
 591# used by the accurate strategy:
 592sub sync_to_ps {
 593    my $ps = shift;
 594    my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
 595    
 596    $opt_v && print "sync_to_ps($ps->{id}) method: ";
 597
 598    if (-d $tree_dir) {
 599        if ($ps->{type} eq 't') {
 600            $opt_v && print "get (tag)\n";
 601            # looks like a tag-only or (worse,) a mixed tags/changeset branch,
 602            # can't rely on replay to work correctly on these
 603            rmtree($tree_dir);
 604            safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 605            $stats{get_tag}++;
 606        } else {
 607                my $tree_id = arch_tree_id($tree_dir);
 608                if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
 609                    # the common case (hopefully)
 610                    $opt_v && print "replay\n";
 611                    safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
 612                    $stats{replay}++;
 613                } else {
 614                    # getting one tree is usually faster than getting two trees
 615                    # and applying the delta ...
 616                    rmtree($tree_dir);
 617                    $opt_v && print "apply-delta\n";
 618                    safe_pipe_capture($TLA,'get','--no-pristine',
 619                                        $ps->{id},$tree_dir);
 620                    $stats{get_delta}++;
 621                }
 622        }
 623    } else {
 624        # new branch work
 625        $opt_v && print "get (new tree)\n";
 626        safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 627        $stats{get_new}++;
 628    }
 629   
 630    # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
 631    system('rsync','-aI','--delete','--exclude',$git_dir,
 632#               '--exclude','.arch-inventory',
 633                '--exclude','.arch-ids','--exclude','{arch}',
 634                '--exclude','+*','--exclude',',*',
 635                "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
 636    return $tree_dir;
 637}
 638
 639sub apply_import {
 640    my $ps = shift;
 641    my $bname = git_branchname($ps->{id});
 642
 643    mkpath($tmp);
 644
 645    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 646    die "Cannot get import: $!" if $?;    
 647    system('rsync','-aI','--delete', '--exclude',$git_dir,
 648                '--exclude','.arch-ids','--exclude','{arch}',
 649                "$tmp/import/", './');
 650    die "Cannot rsync import:$!" if $?;
 651    
 652    rmtree("$tmp/import");
 653    die "Cannot remove tempdir: $!" if $?;
 654    
 655
 656    return 1;
 657}
 658
 659sub apply_cset {
 660    my $ps = shift;
 661
 662    mkpath($tmp);
 663
 664    # get the changeset
 665    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 666    die "Cannot get changeset: $!" if $?;
 667    
 668    # apply patches
 669    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 670        # this can be sped up considerably by doing
 671        #    (find | xargs cat) | patch
 672        # but that cna get mucked up by patches
 673        # with missing trailing newlines or the standard 
 674        # 'missing newline' flag in the patch - possibly
 675        # produced with an old/buggy diff.
 676        # slow and safe, we invoke patch once per patchfile
 677        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 678        die "Problem applying patches! $!" if $?;
 679    }
 680
 681    # apply changed binary files
 682    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 683        foreach my $mod (@modified) {
 684            chomp $mod;
 685            my $orig = $mod;
 686            $orig =~ s/\.modified$//; # lazy
 687            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 688            #print "rsync -p '$mod' '$orig'";
 689            system('rsync','-p',$mod,"./$orig");
 690            die "Problem applying binary changes! $!" if $?;
 691        }
 692    }
 693
 694    # bring in new files
 695    system('rsync','-aI','--exclude',$git_dir,
 696                '--exclude','.arch-ids',
 697                '--exclude', '{arch}',
 698                "$tmp/changeset/new-files-archive/",'./');
 699
 700    # deleted files are hinted from the commitlog processing
 701
 702    rmtree("$tmp/changeset");
 703}
 704
 705
 706# =for reference
 707# notes: *-files/-directories keys cannot have spaces, they're always
 708# pika-escaped.  Everything after the first newline
 709# A log entry looks like:
 710# Revision: moodle-org--moodle--1.3.3--patch-15
 711# Archive: arch-eduforge@catalyst.net.nz--2004
 712# Creator: Penny Leach <penny@catalyst.net.nz>
 713# Date: Wed May 25 14:15:34 NZST 2005
 714# Standard-date: 2005-05-25 02:15:34 GMT
 715# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 716#     lang/de/.arch-ids/block_html.php.id
 717# New-directories: lang/de/help/questionnaire
 718#     lang/de/help/questionnaire/.arch-ids
 719# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 720#    db_sears.sql db/db_sears.sql
 721# Removed-files: lang/be/docs/.arch-ids/release.html.id
 722#     lang/be/docs/.arch-ids/releaseold.html.id
 723# Modified-files: admin/cron.php admin/delete.php
 724#     admin/editor.html backup/lib.php backup/restore.php
 725# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 726# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 727#   summary can be multiline with a leading space just like the above fields
 728# Keywords:
 729#
 730# Updating yadda tadda tadda madda
 731sub parselog {
 732    my ($ps, $log) = @_;
 733    my $key = undef;
 734
 735    # headers we want that contain filenames:
 736    my %want_headers = (
 737        new_files => 1,
 738        modified_files => 1,
 739        renamed_files => 1,
 740        renamed_directories => 1,
 741        removed_files => 1,
 742        removed_directories => 1,
 743    );
 744    
 745    chomp (@$log);
 746    while ($_ = shift @$log) {
 747        if (/^Continuation-of:\s*(.*)/) {
 748            $ps->{tag} = $1;
 749            $key = undef;
 750        } elsif (/^Summary:\s*(.*)$/ ) {
 751            # summary can be multiline as long as it has a leading space
 752            $ps->{summary} = [ $1 ];
 753            $key = 'summary';
 754        } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
 755            $ps->{author} = $1;
 756            $ps->{email} = $2;
 757            $key = undef;
 758        # any *-files or *-directories can be read here:
 759        } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
 760            my $val = $2;
 761            $key = lc $1;
 762            $key =~ tr/-/_/; # too lazy to quote :P
 763            if ($want_headers{$key}) {
 764                push @{$ps->{$key}}, split(/\s+/, $val);
 765            } else {
 766                $key = undef;
 767            }
 768        } elsif (/^$/) {
 769            last; # remainder of @$log that didn't get shifted off is message
 770        } elsif ($key) {
 771            if (/^\s+(.*)$/) {
 772                if ($key eq 'summary') {
 773                    push @{$ps->{$key}}, $1;
 774                } else { # files/directories:
 775                    push @{$ps->{$key}}, split(/\s+/, $1);
 776                }
 777            } else {
 778                $key = undef;
 779            }
 780        }
 781    }
 782   
 783    # post-processing:
 784    $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
 785    $ps->{message} = join("\n",@$log);
 786    
 787    # skip Arch control files, unescape pika-escaped files
 788    foreach my $k (keys %want_headers) {
 789        next unless (defined $ps->{$k});
 790        my @tmp = ();
 791        foreach my $t (@{$ps->{$k}}) {
 792           next unless length ($t);
 793           next if $t =~ m!\{arch\}/!;
 794           next if $t =~ m!\.arch-ids/!;
 795           # should we skip this?
 796           next if $t =~ m!\.arch-inventory$!;
 797           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 798           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 799           if ($t =~ /\\/ ){
 800               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 801           }
 802           push @tmp, $t;
 803        }
 804        $ps->{$k} = \@tmp;
 805    }
 806}
 807
 808# write/read a tag
 809sub tag {
 810    my ($tag, $commit) = @_;
 811 
 812    if ($opt_o) {
 813        $tag =~ s|/|--|g;
 814    } else {
 815        # don't use subdirs for tags yet, it could screw up other porcelains
 816        $tag =~ s|/|,|g;
 817    }
 818    
 819    if ($commit) {
 820        open(C,">","$git_dir/refs/tags/$tag")
 821            or die "Cannot create tag $tag: $!\n";
 822        print C "$commit\n"
 823            or die "Cannot write tag $tag: $!\n";
 824        close(C)
 825            or die "Cannot write tag $tag: $!\n";
 826        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 827    } else {                    # read
 828        open(C,"<","$git_dir/refs/tags/$tag")
 829            or die "Cannot read tag $tag: $!\n";
 830        $commit = <C>;
 831        chomp $commit;
 832        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 833        close(C)
 834            or die "Cannot read tag $tag: $!\n";
 835        return $commit;
 836    }
 837}
 838
 839# write/read a private tag
 840# reads fail softly if the tag isn't there
 841sub ptag {
 842    my ($tag, $commit) = @_;
 843
 844    # don't use subdirs for tags yet, it could screw up other porcelains
 845    $tag =~ s|/|,|g; 
 846    
 847    my $tag_file = "$ptag_dir/$tag";
 848    my $tag_branch_dir = dirname($tag_file);
 849    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 850
 851    if ($commit) {              # write
 852        open(C,">",$tag_file)
 853            or die "Cannot create tag $tag: $!\n";
 854        print C "$commit\n"
 855            or die "Cannot write tag $tag: $!\n";
 856        close(C)
 857            or die "Cannot write tag $tag: $!\n";
 858        $rptags{$commit} = $tag 
 859            unless $tag =~ m/--base-0$/;
 860    } else {                    # read
 861        # if the tag isn't there, return 0
 862        unless ( -s $tag_file) {
 863            return 0;
 864        }
 865        open(C,"<",$tag_file)
 866            or die "Cannot read tag $tag: $!\n";
 867        $commit = <C>;
 868        chomp $commit;
 869        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 870        close(C)
 871            or die "Cannot read tag $tag: $!\n";
 872        unless (defined $rptags{$commit}) {
 873            $rptags{$commit} = $tag;
 874        }
 875        return $commit;
 876    }
 877}
 878
 879sub find_parents {
 880    #
 881    # Identify what branches are merging into me
 882    # and whether we are fully merged
 883    # git-merge-base <headsha> <headsha> should tell
 884    # me what the base of the merge should be 
 885    #
 886    my $ps = shift;
 887
 888    my %branches; # holds an arrayref per branch
 889                  # the arrayref contains a list of
 890                  # merged patches between the base
 891                  # of the merge and the current head
 892
 893    my @parents;  # parents found for this commit
 894
 895    # simple loop to split the merges
 896    # per branch
 897    foreach my $merge (@{$ps->{merges}}) {
 898        my $branch = git_branchname($merge);
 899        unless (defined $branches{$branch} ){
 900            $branches{$branch} = [];
 901        }
 902        push @{$branches{$branch}}, $merge;
 903    }
 904
 905    #
 906    # foreach branch find a merge base and walk it to the 
 907    # head where we are, collecting the merged patchsets that
 908    # Arch has recorded. Keep that in @have
 909    # Compare that with the commits on the other branch
 910    # between merge-base and the tip of the branch (@need)
 911    # and see if we have a series of consecutive patches
 912    # starting from the merge base. The tip of the series
 913    # of consecutive patches merged is our new parent for 
 914    # that branch.
 915    #
 916    foreach my $branch (keys %branches) {
 917
 918        # check that we actually know about the branch
 919        next unless -e "$git_dir/refs/heads/$branch";
 920
 921        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 922        if ($?) { 
 923            # Don't die here, Arch supports one-way cherry-picking
 924            # between branches with no common base (or any relationship
 925            # at all beforehand)
 926            warn "Cannot find merge base for $branch and $ps->{branch}";
 927            next;
 928        }
 929        chomp $mergebase;
 930
 931        # now walk up to the mergepoint collecting what patches we have
 932        my $branchtip = git_rev_parse($ps->{branch});
 933        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 934        my %have; # collected merges this branch has
 935        foreach my $merge (@{$ps->{merges}}) {
 936            $have{$merge} = 1;
 937        }
 938        my %ancestorshave;
 939        foreach my $par (@ancestors) {
 940            $par = commitid2pset($par);
 941            if (defined $par->{merges}) {
 942                foreach my $merge (@{$par->{merges}}) {
 943                    $ancestorshave{$merge}=1;
 944                }
 945            }
 946        }
 947        # print "++++ Merges in $ps->{id} are....\n";
 948        # my @have = sort keys %have;   print Dumper(\@have);
 949
 950        # merge what we have with what ancestors have
 951        %have = (%have, %ancestorshave);
 952
 953        # see what the remote branch has - these are the merges we 
 954        # will want to have in a consecutive series from the mergebase
 955        my $otherbranchtip = git_rev_parse($branch);
 956        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 957        my @need;
 958        foreach my $needps (@needraw) {         # get the psets
 959            $needps = commitid2pset($needps);
 960            # git-rev-list will also
 961            # list commits merged in via earlier 
 962            # merges. we are only interested in commits
 963            # from the branch we're looking at
 964            if ($branch eq $needps->{branch}) {
 965                push @need, $needps->{id};
 966            }
 967        }
 968
 969        # print "++++ Merges from $branch we want are....\n";
 970        # print Dumper(\@need);
 971
 972        my $newparent;
 973        while (my $needed_commit = pop @need) {
 974            if ($have{$needed_commit}) {
 975                $newparent = $needed_commit;
 976            } else {
 977                last; # break out of the while
 978            }
 979        }
 980        if ($newparent) {
 981            push @parents, $newparent;
 982        }
 983
 984
 985    } # end foreach branch
 986
 987    # prune redundant parents
 988    my %parents;
 989    foreach my $p (@parents) {
 990        $parents{$p} = 1;
 991    }
 992    foreach my $p (@parents) {
 993        next unless exists $psets{$p}{merges};
 994        next unless ref    $psets{$p}{merges};
 995        my @merges = @{$psets{$p}{merges}};
 996        foreach my $merge (@merges) {
 997            if ($parents{$merge}) { 
 998                delete $parents{$merge};
 999            }
1000        }
1001    }
1002
1003    @parents = ();
1004    foreach (keys %parents) {
1005        push @parents, '-p', ptag($_);
1006    }
1007    return @parents;
1008}
1009
1010sub git_rev_parse {
1011    my $name = shift;
1012    my $val  = `git-rev-parse $name`;
1013    die "Error: git-rev-parse $name" if $?;
1014    chomp $val;
1015    return $val;
1016}
1017
1018# resolve a SHA1 to a known patchset
1019sub commitid2pset {
1020    my $commitid = shift;
1021    chomp $commitid;
1022    my $name = $rptags{$commitid} 
1023        || die "Cannot find reverse tag mapping for $commitid";
1024    $name =~ s|,|/|;
1025    my $ps   = $psets{$name} 
1026        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1027    return $ps;
1028}
1029
1030
1031# an alterative to `command` that allows input to be passed as an array
1032# to work around shell problems with weird characters in arguments
1033sub safe_pipe_capture {
1034    my @output;
1035    if (my $pid = open my $child, '-|') {
1036        @output = (<$child>);
1037        close $child or die join(' ',@_).": $! $?";
1038    } else {
1039        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1040    }
1041    return wantarray ? @output : join('',@output);
1042}
1043
1044# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1045sub arch_tree_id {
1046    my $dir = shift;
1047    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1048    return $ret;
1049}
1050
1051sub archive_reachable {
1052    my $archive = shift;
1053    return 1 if $reachable{$archive};
1054    return 0 if $unreachable{$archive};
1055    
1056    if (system "$TLA whereis-archive $archive >/dev/null") {
1057        if ($opt_a && (system($TLA,'register-archive',
1058                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1059            $reachable{$archive} = 1;
1060            return 1;
1061        }
1062        print STDERR "Archive is unreachable: $archive\n";
1063        $unreachable{$archive} = 1;
1064        return 0;
1065    } else {
1066        $reachable{$archive} = 1;
1067        return 1;
1068    }
1069}
1070