git-archimport.perlon commit true built-in diff: run everything in-core. (cebff98)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] 
  13        [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  14
  15Imports a project from one or more Arch repositories. It will follow branches
  16and repositories within the namespaces defined by the <archive/branch>
  17parameters suppplied. If it cannot find the remote branch a merge comes from
  18it will just import it as a regular commit. If it can find it, it will mark it 
  19as a merge whenever possible.
  20
  21See man (1) git-archimport for more details.
  22
  23=head1 TODO
  24
  25 - create tag objects instead of ref tags
  26 - audit shell-escaping of filenames
  27 - hide our private tags somewhere smarter
  28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  29 - sort and apply patches by graphing ancestry relations instead of just
  30   relying in dates supplied in the changeset itself.
  31   tla ancestry-graph -m could be helpful here...
  32
  33=head1 Devel tricks
  34
  35Add print in front of the shell commands invoked via backticks. 
  36
  37=head1 Devel Notes
  38
  39There are several places where Arch and git terminology are intermixed
  40and potentially confused.
  41
  42The notion of a "branch" in git is approximately equivalent to
  43a "archive/category--branch--version" in Arch.  Also, it should be noted
  44that the "--branch" portion of "archive/category--branch--version" is really
  45optional in Arch although not many people (nor tools!) seem to know this.
  46This means that "archive/category--version" is also a valid "branch"
  47in git terms.
  48
  49We always refer to Arch names by their fully qualified variant (which
  50means the "archive" name is prefixed.
  51
  52For people unfamiliar with Arch, an "archive" is the term for "repository",
  53and can contain multiple, unrelated branches.
  54
  55=cut
  56
  57use strict;
  58use warnings;
  59use Getopt::Std;
  60use File::Temp qw(tempdir);
  61use File::Path qw(mkpath rmtree);
  62use File::Basename qw(basename dirname);
  63use Data::Dumper qw/ Dumper /;
  64use IPC::Open2;
  65
  66$SIG{'PIPE'}="IGNORE";
  67$ENV{'TZ'}="UTC";
  68
  69my $git_dir = $ENV{"GIT_DIR"} || ".git";
  70$ENV{"GIT_DIR"} = $git_dir;
  71my $ptag_dir = "$git_dir/archimport/tags";
  72
  73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
  74
  75sub usage() {
  76    print STDERR <<END;
  77Usage: ${\basename $0}     # fetch/update GIT from Arch
  78       [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
  79       repository/arch-branch [ repository/arch-branch] ...
  80END
  81    exit(1);
  82}
  83
  84getopts("fThvat:D:") or usage();
  85usage if $opt_h;
  86
  87@ARGV >= 1 or usage();
  88# $arch_branches:
  89# values associated with keys:
  90#   =1 - Arch version / git 'branch' detected via abrowse on a limit
  91#   >1 - Arch version / git 'branch' of an auxilliary branch we've merged
  92my %arch_branches = map { $_ => 1 } @ARGV;
  93
  94$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  95my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  96$opt_v && print "+ Using $tmp as temporary directory\n";
  97
  98my %reachable = ();             # Arch repositories we can access
  99my %unreachable = ();           # Arch repositories we can't access :<
 100my @psets  = ();                # the collection
 101my %psets  = ();                # the collection, by name
 102my %stats  = (                  # Track which strategy we used to import:
 103        get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
 104        simple_changeset => 0, import_or_tag => 0
 105);
 106
 107my %rptags = ();                # my reverse private tags
 108                                # to map a SHA1 to a commitid
 109my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 110
 111sub do_abrowse {
 112    my $stage = shift;
 113    while (my ($limit, $level) = each %arch_branches) {
 114        next unless $level == $stage;
 115        
 116        open ABROWSE, "$TLA abrowse -fkD --merges $limit |" 
 117                                or die "Problems with tla abrowse: $!";
 118    
 119        my %ps        = ();         # the current one
 120        my $lastseen  = '';
 121    
 122        while (<ABROWSE>) {
 123            chomp;
 124            
 125            # first record padded w 8 spaces
 126            if (s/^\s{8}\b//) {
 127                my ($id, $type) = split(m/\s+/, $_, 2);
 128
 129                my %last_ps;
 130                # store the record we just captured
 131                if (%ps && !exists $psets{ $ps{id} }) {
 132                    %last_ps = %ps; # break references
 133                    push (@psets, \%last_ps);
 134                    $psets{ $last_ps{id} } = \%last_ps;
 135                }
 136                
 137                my $branch = extract_versionname($id);
 138                %ps = ( id => $id, branch => $branch );
 139                if (%last_ps && ($last_ps{branch} eq $branch)) {
 140                    $ps{parent_id} = $last_ps{id};
 141                }
 142                
 143                $arch_branches{$branch} = 1;
 144                $lastseen = 'id';
 145
 146                # deal with types (should work with baz or tla):
 147                if ($type =~ m/\(.*changeset\)/) {
 148                    $ps{type} = 's';
 149                } elsif ($type =~ /\(.*import\)/) {
 150                    $ps{type} = 'i';
 151                } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
 152                    $ps{type} = 't';
 153                    # read which revision we've tagged when we parse the log
 154                    $ps{tag}  = $1;
 155                } else { 
 156                    warn "Unknown type $type";
 157                }
 158
 159                $arch_branches{$branch} = 1;
 160                $lastseen = 'id';
 161            } elsif (s/^\s{10}//) { 
 162                # 10 leading spaces or more 
 163                # indicate commit metadata
 164                
 165                # date
 166                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
 167                    $ps{date}   = $1;
 168                    $lastseen = 'date';
 169                } elsif ($_ eq 'merges in:') {
 170                    $ps{merges} = [];
 171                    $lastseen = 'merges';
 172                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 173                    my $id = $_;
 174                    push (@{$ps{merges}}, $id);
 175                   
 176                    # aggressive branch finding:
 177                    if ($opt_D) {
 178                        my $branch = extract_versionname($id);
 179                        my $repo = extract_reponame($branch);
 180                        
 181                        if (archive_reachable($repo) &&
 182                                !defined $arch_branches{$branch}) {
 183                            $arch_branches{$branch} = $stage + 1;
 184                        }
 185                    }
 186                } else {
 187                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
 188                }
 189            }
 190        }
 191
 192        if (%ps && !exists $psets{ $ps{id} }) {
 193            my %temp = %ps;         # break references
 194            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
 195                $temp{parent_id} = $psets[$#psets]{id};
 196            }
 197            push (@psets, \%temp);  
 198            $psets{ $temp{id} } = \%temp;
 199        }    
 200        
 201        close ABROWSE or die "$TLA abrowse failed on $limit\n";
 202    }
 203}                               # end foreach $root
 204
 205do_abrowse(1);
 206my $depth = 2;
 207$opt_D ||= 0;
 208while ($depth <= $opt_D) {
 209    do_abrowse($depth);
 210    $depth++;
 211}
 212
 213## Order patches by time
 214# FIXME see if we can find a more optimal way to do this by graphing
 215# the ancestry data and walking it, that way we won't have to rely on
 216# client-supplied dates
 217@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 218
 219#print Dumper \@psets;
 220
 221##
 222## TODO cleanup irrelevant patches
 223##      and put an initial import
 224##      or a full tag
 225my $import = 0;
 226unless (-d $git_dir) { # initial import
 227    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 228        print "Starting import from $psets[0]{id}\n";
 229        `git-init-db`;
 230        die $! if $?;
 231        $import = 1;
 232    } else {
 233        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 234    }
 235} else {    # progressing an import
 236    # load the rptags
 237    opendir(DIR, $ptag_dir)
 238        || die "can't opendir: $!";
 239    while (my $file = readdir(DIR)) {
 240        # skip non-interesting-files
 241        next unless -f "$ptag_dir/$file";
 242   
 243        # convert first '--' to '/' from old git-archimport to use
 244        # as an archivename/c--b--v private tag
 245        if ($file !~ m!,!) {
 246            my $oldfile = $file;
 247            $file =~ s!--!,!;
 248            print STDERR "converting old tag $oldfile to $file\n";
 249            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 250        }
 251        my $sha = ptag($file);
 252        chomp $sha;
 253        $rptags{$sha} = $file;
 254    }
 255    closedir DIR;
 256}
 257
 258# process patchsets
 259# extract the Arch repository name (Arch "archive" in Arch-speak)
 260sub extract_reponame {
 261    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 262    return (split(/\//, $fq_cvbr))[0];
 263}
 264 
 265sub extract_versionname {
 266    my $name = shift;
 267    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 268    return $name;
 269}
 270
 271# convert a fully-qualified revision or version to a unique dirname:
 272#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 273# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 274#
 275# the git notion of a branch is closer to
 276# archive/category--branch--version than archive/category--branch, so we
 277# use this to convert to git branch names.
 278# Also, keep archive names but replace '/' with ',' since it won't require
 279# subdirectories, and is safer than swapping '--' which could confuse
 280# reverse-mapping when dealing with bastard branches that
 281# are just archive/category--version  (no --branch)
 282sub tree_dirname {
 283    my $revision = shift;
 284    my $name = extract_versionname($revision);
 285    $name =~ s#/#,#;
 286    return $name;
 287}
 288
 289# old versions of git-archimport just use the <category--branch> part:
 290sub old_style_branchname {
 291    my $id = shift;
 292    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 293    chomp $ret;
 294    return $ret;
 295}
 296
 297*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 298
 299sub process_patchset_accurate {
 300    my $ps = shift;
 301    
 302    # switch to that branch if we're not already in that branch:
 303    if (-e "$git_dir/refs/heads/$ps->{branch}") {
 304       system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
 305
 306       # remove any old stuff that got leftover:
 307       my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 308       rmtree(split(/\0/,$rm)) if $rm;
 309    }
 310    
 311    # Apply the import/changeset/merge into the working tree
 312    my $dir = sync_to_ps($ps);
 313    # read the new log entry:
 314    my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
 315    die "Error in cat-log: $!" if $?;
 316    chomp @commitlog;
 317
 318    # grab variables we want from the log, new fields get added to $ps:
 319    # (author, date, email, summary, message body ...)
 320    parselog($ps, \@commitlog);
 321
 322    if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
 323        # this should work when importing continuations 
 324        if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
 325            
 326            # find where we are supposed to branch from
 327            system('git-checkout','-f','-b',$ps->{branch},
 328                            $branchpoint) == 0 or die "$! $?\n";
 329            
 330            # remove any old stuff that got leftover:
 331            my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 332            rmtree(split(/\0/,$rm)) if $rm;
 333
 334            # If we trust Arch with the fact that this is just 
 335            # a tag, and it does not affect the state of the tree
 336            # then we just tag and move on
 337            tag($ps->{id}, $branchpoint);
 338            ptag($ps->{id}, $branchpoint);
 339            print " * Tagged $ps->{id} at $branchpoint\n";
 340            return 0;
 341        } else {
 342            warn "Tagging from unknown id unsupported\n" if $ps->{tag};
 343        }
 344        # allow multiple bases/imports here since Arch supports cherry-picks
 345        # from unrelated trees
 346    } 
 347    
 348    # update the index with all the changes we got
 349    system('git-diff-files --name-only -z | '.
 350            'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 351    system('git-ls-files --others -z | '.
 352            'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 353    return 1;
 354}
 355
 356# the native changeset processing strategy.  This is very fast, but
 357# does not handle permissions or any renames involving directories
 358sub process_patchset_fast {
 359    my $ps = shift;
 360    # 
 361    # create the branch if needed
 362    #
 363    if ($ps->{type} eq 'i' && !$import) {
 364        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 365    }
 366
 367    unless ($import) { # skip for import
 368        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 369            # we know about this branch
 370            system('git-checkout',$ps->{branch});
 371        } else {
 372            # new branch! we need to verify a few things
 373            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 374            my $branchpoint = ptag($ps->{tag});
 375            die "Tagging from unknown id unsupported: $ps->{tag}" 
 376                unless $branchpoint;
 377            
 378            # find where we are supposed to branch from
 379            system('git-checkout','-b',$ps->{branch},$branchpoint);
 380
 381            # If we trust Arch with the fact that this is just 
 382            # a tag, and it does not affect the state of the tree
 383            # then we just tag and move on
 384            tag($ps->{id}, $branchpoint);
 385            ptag($ps->{id}, $branchpoint);
 386            print " * Tagged $ps->{id} at $branchpoint\n";
 387            return 0;
 388        } 
 389        die $! if $?;
 390    } 
 391
 392    #
 393    # Apply the import/changeset/merge into the working tree
 394    # 
 395    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 396        apply_import($ps) or die $!;
 397        $stats{import_or_tag}++;
 398        $import=0;
 399    } elsif ($ps->{type} eq 's') {
 400        apply_cset($ps);
 401        $stats{simple_changeset}++;
 402    }
 403
 404    #
 405    # prepare update git's index, based on what arch knows
 406    # about the pset, resolve parents, etc
 407    #
 408    
 409    my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 410    die "Error in cat-archive-log: $!" if $?;
 411        
 412    parselog($ps,\@commitlog);
 413
 414    # imports don't give us good info
 415    # on added files. Shame on them
 416    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 417        system('git-ls-files --deleted -z | '.
 418                'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 419        system('git-ls-files --others -z | '.
 420                'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 421    }
 422
 423    # TODO: handle removed_directories and renamed_directories:
 424
 425    if (my $del = $ps->{removed_files}) {
 426        unlink @$del;
 427        while (@$del) {
 428            my @slice = splice(@$del, 0, 100);
 429            system('git-update-index','--remove','--',@slice) == 0 or
 430                            die "Error in git-update-index --remove: $! $?\n";
 431        }
 432    }
 433
 434    if (my $ren = $ps->{renamed_files}) {                # renamed
 435        if (@$ren % 2) {
 436            die "Odd number of entries in rename!?";
 437        }
 438        
 439        while (@$ren) {
 440            my $from = shift @$ren;
 441            my $to   = shift @$ren;           
 442
 443            unless (-d dirname($to)) {
 444                mkpath(dirname($to)); # will die on err
 445            }
 446            # print "moving $from $to";
 447            rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
 448            system('git-update-index','--remove','--',$from) == 0 or
 449                            die "Error in git-update-index --remove: $! $?\n";
 450            system('git-update-index','--add','--',$to) == 0 or
 451                            die "Error in git-update-index --add: $! $?\n";
 452        }
 453    }
 454
 455    if (my $add = $ps->{new_files}) {
 456        while (@$add) {
 457            my @slice = splice(@$add, 0, 100);
 458            system('git-update-index','--add','--',@slice) == 0 or
 459                            die "Error in git-update-index --add: $! $?\n";
 460        }
 461    }
 462
 463    if (my $mod = $ps->{modified_files}) {
 464        while (@$mod) {
 465            my @slice = splice(@$mod, 0, 100);
 466            system('git-update-index','--',@slice) == 0 or
 467                            die "Error in git-update-index: $! $?\n";
 468        }
 469    }
 470    return 1; # we successfully applied the changeset
 471}
 472
 473if ($opt_f) {
 474    print "Will import patchsets using the fast strategy\n",
 475            "Renamed directories and permission changes will be missed\n";
 476    *process_patchset = *process_patchset_fast;
 477} else {
 478    print "Using the default (accurate) import strategy.\n",
 479            "Things may be a bit slow\n";
 480    *process_patchset = *process_patchset_accurate;
 481}
 482    
 483foreach my $ps (@psets) {
 484    # process patchsets
 485    $ps->{branch} = git_branchname($ps->{id});
 486
 487    #
 488    # ensure we have a clean state 
 489    # 
 490    if (my $dirty = `git-diff-files`) {
 491        die "Unclean tree when about to process $ps->{id} " .
 492            " - did we fail to commit cleanly before?\n$dirty";
 493    }
 494    die $! if $?;
 495    
 496    #
 497    # skip commits already in repo
 498    #
 499    if (ptag($ps->{id})) {
 500      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 501      next;
 502    }
 503
 504    print " * Starting to work on $ps->{id}\n";
 505
 506    process_patchset($ps) or next;
 507
 508    # warn "errors when running git-update-index! $!";
 509    my $tree = `git-write-tree`;
 510    die "cannot write tree $!" if $?;
 511    chomp $tree;
 512    
 513    #
 514    # Who's your daddy?
 515    #
 516    my @par;
 517    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 518        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 519            my $p = <HEAD>;
 520            close HEAD;
 521            chomp $p;
 522            push @par, '-p', $p;
 523        } else { 
 524            if ($ps->{type} eq 's') {
 525                warn "Could not find the right head for the branch $ps->{branch}";
 526            }
 527        }
 528    }
 529    
 530    if ($ps->{merges}) {
 531        push @par, find_parents($ps);
 532    }
 533
 534    #    
 535    # Commit, tag and clean state
 536    #
 537    $ENV{TZ}                  = 'GMT';
 538    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 539    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 540    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 541    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 542    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 543    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 544
 545    my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 546        or die $!;
 547    print WRITER $ps->{summary},"\n";
 548    print WRITER $ps->{message},"\n";
 549    
 550    # make it easy to backtrack and figure out which Arch revision this was:
 551    print WRITER 'git-archimport-id: ',$ps->{id},"\n";
 552    
 553    close WRITER;
 554    my $commitid = <READER>;    # read
 555    chomp $commitid;
 556    close READER;
 557    waitpid $pid,0;             # close;
 558
 559    if (length $commitid != 40) {
 560        die "Something went wrong with the commit! $! $commitid";
 561    }
 562    #
 563    # Update the branch
 564    # 
 565    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 566    print HEAD $commitid;
 567    close HEAD;
 568    system('git-update-ref', 'HEAD', "$ps->{branch}");
 569
 570    # tag accordingly
 571    ptag($ps->{id}, $commitid); # private tag
 572    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 573        tag($ps->{id}, $commitid);
 574    }
 575    print " * Committed $ps->{id}\n";
 576    print "   + tree   $tree\n";
 577    print "   + commit $commitid\n";
 578    $opt_v && print "   + commit date is  $ps->{date} \n";
 579    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 580}
 581
 582if ($opt_v) {
 583    foreach (sort keys %stats) {
 584        print" $_: $stats{$_}\n";
 585    }
 586}
 587exit 0;
 588
 589# used by the accurate strategy:
 590sub sync_to_ps {
 591    my $ps = shift;
 592    my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
 593    
 594    $opt_v && print "sync_to_ps($ps->{id}) method: ";
 595
 596    if (-d $tree_dir) {
 597        if ($ps->{type} eq 't') {
 598            $opt_v && print "get (tag)\n";
 599            # looks like a tag-only or (worse,) a mixed tags/changeset branch,
 600            # can't rely on replay to work correctly on these
 601            rmtree($tree_dir);
 602            safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 603            $stats{get_tag}++;
 604        } else {
 605                my $tree_id = arch_tree_id($tree_dir);
 606                if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
 607                    # the common case (hopefully)
 608                    $opt_v && print "replay\n";
 609                    safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
 610                    $stats{replay}++;
 611                } else {
 612                    # getting one tree is usually faster than getting two trees
 613                    # and applying the delta ...
 614                    rmtree($tree_dir);
 615                    $opt_v && print "apply-delta\n";
 616                    safe_pipe_capture($TLA,'get','--no-pristine',
 617                                        $ps->{id},$tree_dir);
 618                    $stats{get_delta}++;
 619                }
 620        }
 621    } else {
 622        # new branch work
 623        $opt_v && print "get (new tree)\n";
 624        safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 625        $stats{get_new}++;
 626    }
 627   
 628    # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
 629    system('rsync','-aI','--delete','--exclude',$git_dir,
 630#               '--exclude','.arch-inventory',
 631                '--exclude','.arch-ids','--exclude','{arch}',
 632                '--exclude','+*','--exclude',',*',
 633                "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
 634    return $tree_dir;
 635}
 636
 637sub apply_import {
 638    my $ps = shift;
 639    my $bname = git_branchname($ps->{id});
 640
 641    mkpath($tmp);
 642
 643    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 644    die "Cannot get import: $!" if $?;    
 645    system('rsync','-aI','--delete', '--exclude',$git_dir,
 646                '--exclude','.arch-ids','--exclude','{arch}',
 647                "$tmp/import/", './');
 648    die "Cannot rsync import:$!" if $?;
 649    
 650    rmtree("$tmp/import");
 651    die "Cannot remove tempdir: $!" if $?;
 652    
 653
 654    return 1;
 655}
 656
 657sub apply_cset {
 658    my $ps = shift;
 659
 660    mkpath($tmp);
 661
 662    # get the changeset
 663    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 664    die "Cannot get changeset: $!" if $?;
 665    
 666    # apply patches
 667    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 668        # this can be sped up considerably by doing
 669        #    (find | xargs cat) | patch
 670        # but that cna get mucked up by patches
 671        # with missing trailing newlines or the standard 
 672        # 'missing newline' flag in the patch - possibly
 673        # produced with an old/buggy diff.
 674        # slow and safe, we invoke patch once per patchfile
 675        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 676        die "Problem applying patches! $!" if $?;
 677    }
 678
 679    # apply changed binary files
 680    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 681        foreach my $mod (@modified) {
 682            chomp $mod;
 683            my $orig = $mod;
 684            $orig =~ s/\.modified$//; # lazy
 685            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 686            #print "rsync -p '$mod' '$orig'";
 687            system('rsync','-p',$mod,"./$orig");
 688            die "Problem applying binary changes! $!" if $?;
 689        }
 690    }
 691
 692    # bring in new files
 693    system('rsync','-aI','--exclude',$git_dir,
 694                '--exclude','.arch-ids',
 695                '--exclude', '{arch}',
 696                "$tmp/changeset/new-files-archive/",'./');
 697
 698    # deleted files are hinted from the commitlog processing
 699
 700    rmtree("$tmp/changeset");
 701}
 702
 703
 704# =for reference
 705# notes: *-files/-directories keys cannot have spaces, they're always
 706# pika-escaped.  Everything after the first newline
 707# A log entry looks like:
 708# Revision: moodle-org--moodle--1.3.3--patch-15
 709# Archive: arch-eduforge@catalyst.net.nz--2004
 710# Creator: Penny Leach <penny@catalyst.net.nz>
 711# Date: Wed May 25 14:15:34 NZST 2005
 712# Standard-date: 2005-05-25 02:15:34 GMT
 713# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 714#     lang/de/.arch-ids/block_html.php.id
 715# New-directories: lang/de/help/questionnaire
 716#     lang/de/help/questionnaire/.arch-ids
 717# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 718#    db_sears.sql db/db_sears.sql
 719# Removed-files: lang/be/docs/.arch-ids/release.html.id
 720#     lang/be/docs/.arch-ids/releaseold.html.id
 721# Modified-files: admin/cron.php admin/delete.php
 722#     admin/editor.html backup/lib.php backup/restore.php
 723# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 724# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 725#   summary can be multiline with a leading space just like the above fields
 726# Keywords:
 727#
 728# Updating yadda tadda tadda madda
 729sub parselog {
 730    my ($ps, $log) = @_;
 731    my $key = undef;
 732
 733    # headers we want that contain filenames:
 734    my %want_headers = (
 735        new_files => 1,
 736        modified_files => 1,
 737        renamed_files => 1,
 738        renamed_directories => 1,
 739        removed_files => 1,
 740        removed_directories => 1,
 741    );
 742    
 743    chomp (@$log);
 744    while ($_ = shift @$log) {
 745        if (/^Continuation-of:\s*(.*)/) {
 746            $ps->{tag} = $1;
 747            $key = undef;
 748        } elsif (/^Summary:\s*(.*)$/ ) {
 749            # summary can be multiline as long as it has a leading space
 750            $ps->{summary} = [ $1 ];
 751            $key = 'summary';
 752        } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
 753            $ps->{author} = $1;
 754            $ps->{email} = $2;
 755            $key = undef;
 756        # any *-files or *-directories can be read here:
 757        } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
 758            my $val = $2;
 759            $key = lc $1;
 760            $key =~ tr/-/_/; # too lazy to quote :P
 761            if ($want_headers{$key}) {
 762                push @{$ps->{$key}}, split(/\s+/, $val);
 763            } else {
 764                $key = undef;
 765            }
 766        } elsif (/^$/) {
 767            last; # remainder of @$log that didn't get shifted off is message
 768        } elsif ($key) {
 769            if (/^\s+(.*)$/) {
 770                if ($key eq 'summary') {
 771                    push @{$ps->{$key}}, $1;
 772                } else { # files/directories:
 773                    push @{$ps->{$key}}, split(/\s+/, $1);
 774                }
 775            } else {
 776                $key = undef;
 777            }
 778        }
 779    }
 780   
 781    # post-processing:
 782    $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
 783    $ps->{message} = join("\n",@$log);
 784    
 785    # skip Arch control files, unescape pika-escaped files
 786    foreach my $k (keys %want_headers) {
 787        next unless (defined $ps->{$k});
 788        my @tmp = ();
 789        foreach my $t (@{$ps->{$k}}) {
 790           next unless length ($t);
 791           next if $t =~ m!\{arch\}/!;
 792           next if $t =~ m!\.arch-ids/!;
 793           # should we skip this?
 794           next if $t =~ m!\.arch-inventory$!;
 795           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 796           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 797           if ($t =~ /\\/ ){
 798               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 799           }
 800           push @tmp, $t;
 801        }
 802        $ps->{$k} = \@tmp;
 803    }
 804}
 805
 806# write/read a tag
 807sub tag {
 808    my ($tag, $commit) = @_;
 809 
 810    if ($opt_o) {
 811        $tag =~ s|/|--|g;
 812    } else {
 813        # don't use subdirs for tags yet, it could screw up other porcelains
 814        $tag =~ s|/|,|g;
 815    }
 816    
 817    if ($commit) {
 818        open(C,">","$git_dir/refs/tags/$tag")
 819            or die "Cannot create tag $tag: $!\n";
 820        print C "$commit\n"
 821            or die "Cannot write tag $tag: $!\n";
 822        close(C)
 823            or die "Cannot write tag $tag: $!\n";
 824        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 825    } else {                    # read
 826        open(C,"<","$git_dir/refs/tags/$tag")
 827            or die "Cannot read tag $tag: $!\n";
 828        $commit = <C>;
 829        chomp $commit;
 830        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 831        close(C)
 832            or die "Cannot read tag $tag: $!\n";
 833        return $commit;
 834    }
 835}
 836
 837# write/read a private tag
 838# reads fail softly if the tag isn't there
 839sub ptag {
 840    my ($tag, $commit) = @_;
 841
 842    # don't use subdirs for tags yet, it could screw up other porcelains
 843    $tag =~ s|/|,|g; 
 844    
 845    my $tag_file = "$ptag_dir/$tag";
 846    my $tag_branch_dir = dirname($tag_file);
 847    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 848
 849    if ($commit) {              # write
 850        open(C,">",$tag_file)
 851            or die "Cannot create tag $tag: $!\n";
 852        print C "$commit\n"
 853            or die "Cannot write tag $tag: $!\n";
 854        close(C)
 855            or die "Cannot write tag $tag: $!\n";
 856        $rptags{$commit} = $tag 
 857            unless $tag =~ m/--base-0$/;
 858    } else {                    # read
 859        # if the tag isn't there, return 0
 860        unless ( -s $tag_file) {
 861            return 0;
 862        }
 863        open(C,"<",$tag_file)
 864            or die "Cannot read tag $tag: $!\n";
 865        $commit = <C>;
 866        chomp $commit;
 867        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 868        close(C)
 869            or die "Cannot read tag $tag: $!\n";
 870        unless (defined $rptags{$commit}) {
 871            $rptags{$commit} = $tag;
 872        }
 873        return $commit;
 874    }
 875}
 876
 877sub find_parents {
 878    #
 879    # Identify what branches are merging into me
 880    # and whether we are fully merged
 881    # git-merge-base <headsha> <headsha> should tell
 882    # me what the base of the merge should be 
 883    #
 884    my $ps = shift;
 885
 886    my %branches; # holds an arrayref per branch
 887                  # the arrayref contains a list of
 888                  # merged patches between the base
 889                  # of the merge and the current head
 890
 891    my @parents;  # parents found for this commit
 892
 893    # simple loop to split the merges
 894    # per branch
 895    foreach my $merge (@{$ps->{merges}}) {
 896        my $branch = git_branchname($merge);
 897        unless (defined $branches{$branch} ){
 898            $branches{$branch} = [];
 899        }
 900        push @{$branches{$branch}}, $merge;
 901    }
 902
 903    #
 904    # foreach branch find a merge base and walk it to the 
 905    # head where we are, collecting the merged patchsets that
 906    # Arch has recorded. Keep that in @have
 907    # Compare that with the commits on the other branch
 908    # between merge-base and the tip of the branch (@need)
 909    # and see if we have a series of consecutive patches
 910    # starting from the merge base. The tip of the series
 911    # of consecutive patches merged is our new parent for 
 912    # that branch.
 913    #
 914    foreach my $branch (keys %branches) {
 915
 916        # check that we actually know about the branch
 917        next unless -e "$git_dir/refs/heads/$branch";
 918
 919        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 920        if ($?) { 
 921            # Don't die here, Arch supports one-way cherry-picking
 922            # between branches with no common base (or any relationship
 923            # at all beforehand)
 924            warn "Cannot find merge base for $branch and $ps->{branch}";
 925            next;
 926        }
 927        chomp $mergebase;
 928
 929        # now walk up to the mergepoint collecting what patches we have
 930        my $branchtip = git_rev_parse($ps->{branch});
 931        my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
 932        my %have; # collected merges this branch has
 933        foreach my $merge (@{$ps->{merges}}) {
 934            $have{$merge} = 1;
 935        }
 936        my %ancestorshave;
 937        foreach my $par (@ancestors) {
 938            $par = commitid2pset($par);
 939            if (defined $par->{merges}) {
 940                foreach my $merge (@{$par->{merges}}) {
 941                    $ancestorshave{$merge}=1;
 942                }
 943            }
 944        }
 945        # print "++++ Merges in $ps->{id} are....\n";
 946        # my @have = sort keys %have;   print Dumper(\@have);
 947
 948        # merge what we have with what ancestors have
 949        %have = (%have, %ancestorshave);
 950
 951        # see what the remote branch has - these are the merges we 
 952        # will want to have in a consecutive series from the mergebase
 953        my $otherbranchtip = git_rev_parse($branch);
 954        my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
 955        my @need;
 956        foreach my $needps (@needraw) {         # get the psets
 957            $needps = commitid2pset($needps);
 958            # git-rev-list will also
 959            # list commits merged in via earlier 
 960            # merges. we are only interested in commits
 961            # from the branch we're looking at
 962            if ($branch eq $needps->{branch}) {
 963                push @need, $needps->{id};
 964            }
 965        }
 966
 967        # print "++++ Merges from $branch we want are....\n";
 968        # print Dumper(\@need);
 969
 970        my $newparent;
 971        while (my $needed_commit = pop @need) {
 972            if ($have{$needed_commit}) {
 973                $newparent = $needed_commit;
 974            } else {
 975                last; # break out of the while
 976            }
 977        }
 978        if ($newparent) {
 979            push @parents, $newparent;
 980        }
 981
 982
 983    } # end foreach branch
 984
 985    # prune redundant parents
 986    my %parents;
 987    foreach my $p (@parents) {
 988        $parents{$p} = 1;
 989    }
 990    foreach my $p (@parents) {
 991        next unless exists $psets{$p}{merges};
 992        next unless ref    $psets{$p}{merges};
 993        my @merges = @{$psets{$p}{merges}};
 994        foreach my $merge (@merges) {
 995            if ($parents{$merge}) { 
 996                delete $parents{$merge};
 997            }
 998        }
 999    }
1000
1001    @parents = ();
1002    foreach (keys %parents) {
1003        push @parents, '-p', ptag($_);
1004    }
1005    return @parents;
1006}
1007
1008sub git_rev_parse {
1009    my $name = shift;
1010    my $val  = `git-rev-parse $name`;
1011    die "Error: git-rev-parse $name" if $?;
1012    chomp $val;
1013    return $val;
1014}
1015
1016# resolve a SHA1 to a known patchset
1017sub commitid2pset {
1018    my $commitid = shift;
1019    chomp $commitid;
1020    my $name = $rptags{$commitid} 
1021        || die "Cannot find reverse tag mapping for $commitid";
1022    $name =~ s|,|/|;
1023    my $ps   = $psets{$name} 
1024        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1025    return $ps;
1026}
1027
1028
1029# an alterative to `command` that allows input to be passed as an array
1030# to work around shell problems with weird characters in arguments
1031sub safe_pipe_capture {
1032    my @output;
1033    if (my $pid = open my $child, '-|') {
1034        @output = (<$child>);
1035        close $child or die join(' ',@_).": $! $?";
1036    } else {
1037        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1038    }
1039    return wantarray ? @output : join('',@output);
1040}
1041
1042# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1043sub arch_tree_id {
1044    my $dir = shift;
1045    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1046    return $ret;
1047}
1048
1049sub archive_reachable {
1050    my $archive = shift;
1051    return 1 if $reachable{$archive};
1052    return 0 if $unreachable{$archive};
1053    
1054    if (system "$TLA whereis-archive $archive >/dev/null") {
1055        if ($opt_a && (system($TLA,'register-archive',
1056                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1057            $reachable{$archive} = 1;
1058            return 1;
1059        }
1060        print STDERR "Archive is unreachable: $archive\n";
1061        $unreachable{$archive} = 1;
1062        return 0;
1063    } else {
1064        $reachable{$archive} = 1;
1065        return 1;
1066    }
1067}
1068