b21077206a93139fd8652d5174817e18c89ba22e
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse,
   7# fetch the changesets and apply them.
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
  13        [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  14
  15Imports a project from one or more Arch repositories. It will follow branches
  16and repositories within the namespaces defined by the <archive/branch>
  17parameters supplied. If it cannot find the remote branch a merge comes from
  18it will just import it as a regular commit. If it can find it, it will mark it
  19as a merge whenever possible.
  20
  21See man (1) git-archimport for more details.
  22
  23=head1 TODO
  24
  25 - create tag objects instead of ref tags
  26 - audit shell-escaping of filenames
  27 - hide our private tags somewhere smarter
  28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
  29 - sort and apply patches by graphing ancestry relations instead of just
  30   relying in dates supplied in the changeset itself.
  31   tla ancestry-graph -m could be helpful here...
  32
  33=head1 Devel tricks
  34
  35Add print in front of the shell commands invoked via backticks.
  36
  37=head1 Devel Notes
  38
  39There are several places where Arch and git terminology are intermixed
  40and potentially confused.
  41
  42The notion of a "branch" in git is approximately equivalent to
  43a "archive/category--branch--version" in Arch.  Also, it should be noted
  44that the "--branch" portion of "archive/category--branch--version" is really
  45optional in Arch although not many people (nor tools!) seem to know this.
  46This means that "archive/category--version" is also a valid "branch"
  47in git terms.
  48
  49We always refer to Arch names by their fully qualified variant (which
  50means the "archive" name is prefixed.
  51
  52For people unfamiliar with Arch, an "archive" is the term for "repository",
  53and can contain multiple, unrelated branches.
  54
  55=cut
  56
  57use strict;
  58use warnings;
  59use Getopt::Std;
  60use File::Temp qw(tempdir);
  61use File::Path qw(mkpath rmtree);
  62use File::Basename qw(basename dirname);
  63use Data::Dumper qw/ Dumper /;
  64use IPC::Open2;
  65
  66$SIG{'PIPE'}="IGNORE";
  67$ENV{'TZ'}="UTC";
  68
  69my $git_dir = $ENV{"GIT_DIR"} || ".git";
  70$ENV{"GIT_DIR"} = $git_dir;
  71my $ptag_dir = "$git_dir/archimport/tags";
  72
  73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
  74
  75sub usage() {
  76    print STDERR <<END;
  77Usage: ${\basename $0}     # fetch/update GIT from Arch
  78       [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
  79       repository/arch-branch [ repository/arch-branch] ...
  80END
  81    exit(1);
  82}
  83
  84getopts("fThvat:D:") or usage();
  85usage if $opt_h;
  86
  87@ARGV >= 1 or usage();
  88# $arch_branches:
  89# values associated with keys:
  90#   =1 - Arch version / git 'branch' detected via abrowse on a limit
  91#   >1 - Arch version / git 'branch' of an auxiliary branch we've merged
  92my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
  93
  94# $branch_name_map:
  95# maps arch branches to git branch names
  96my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
  97
  98$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  99my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
 100$opt_v && print "+ Using $tmp as temporary directory\n";
 101
 102unless (-d $git_dir) { # initial import needs empty directory
 103    opendir DIR, '.' or die "Unable to open current directory: $!\n";
 104    while (my $entry = readdir DIR) {
 105        $entry =~ /^\.\.?$/ or
 106            die "Initial import needs an empty current working directory.\n"
 107    }
 108    closedir DIR
 109}
 110
 111my $default_archive;            # default Arch archive
 112my %reachable = ();             # Arch repositories we can access
 113my %unreachable = ();           # Arch repositories we can't access :<
 114my @psets  = ();                # the collection
 115my %psets  = ();                # the collection, by name
 116my %stats  = (                  # Track which strategy we used to import:
 117        get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
 118        simple_changeset => 0, import_or_tag => 0
 119);
 120
 121my %rptags = ();                # my reverse private tags
 122                                # to map a SHA1 to a commitid
 123my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 124
 125sub do_abrowse {
 126    my $stage = shift;
 127    while (my ($limit, $level) = each %arch_branches) {
 128        next unless $level == $stage;
 129
 130        open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
 131                                or die "Problems with tla abrowse: $!";
 132
 133        my %ps        = ();         # the current one
 134        my $lastseen  = '';
 135
 136        while (<ABROWSE>) {
 137            chomp;
 138
 139            # first record padded w 8 spaces
 140            if (s/^\s{8}\b//) {
 141                my ($id, $type) = split(m/\s+/, $_, 2);
 142
 143                my %last_ps;
 144                # store the record we just captured
 145                if (%ps && !exists $psets{ $ps{id} }) {
 146                    %last_ps = %ps; # break references
 147                    push (@psets, \%last_ps);
 148                    $psets{ $last_ps{id} } = \%last_ps;
 149                }
 150
 151                my $branch = extract_versionname($id);
 152                %ps = ( id => $id, branch => $branch );
 153                if (%last_ps && ($last_ps{branch} eq $branch)) {
 154                    $ps{parent_id} = $last_ps{id};
 155                }
 156
 157                $arch_branches{$branch} = 1;
 158                $lastseen = 'id';
 159
 160                # deal with types (should work with baz or tla):
 161                if ($type =~ m/\(.*changeset\)/) {
 162                    $ps{type} = 's';
 163                } elsif ($type =~ /\(.*import\)/) {
 164                    $ps{type} = 'i';
 165                } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
 166                    $ps{type} = 't';
 167                    # read which revision we've tagged when we parse the log
 168                    $ps{tag}  = $1;
 169                } else {
 170                    warn "Unknown type $type";
 171                }
 172
 173                $arch_branches{$branch} = 1;
 174                $lastseen = 'id';
 175            } elsif (s/^\s{10}//) {
 176                # 10 leading spaces or more
 177                # indicate commit metadata
 178
 179                # date
 180                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
 181                    $ps{date}   = $1;
 182                    $lastseen = 'date';
 183                } elsif ($_ eq 'merges in:') {
 184                    $ps{merges} = [];
 185                    $lastseen = 'merges';
 186                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 187                    my $id = $_;
 188                    push (@{$ps{merges}}, $id);
 189
 190                    # aggressive branch finding:
 191                    if ($opt_D) {
 192                        my $branch = extract_versionname($id);
 193                        my $repo = extract_reponame($branch);
 194
 195                        if (archive_reachable($repo) &&
 196                                !defined $arch_branches{$branch}) {
 197                            $arch_branches{$branch} = $stage + 1;
 198                        }
 199                    }
 200                } else {
 201                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
 202                }
 203            }
 204        }
 205
 206        if (%ps && !exists $psets{ $ps{id} }) {
 207            my %temp = %ps;         # break references
 208            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
 209                $temp{parent_id} = $psets[$#psets]{id};
 210            }
 211            push (@psets, \%temp);
 212            $psets{ $temp{id} } = \%temp;
 213        }
 214
 215        close ABROWSE or die "$TLA abrowse failed on $limit\n";
 216    }
 217}                               # end foreach $root
 218
 219do_abrowse(1);
 220my $depth = 2;
 221$opt_D ||= 0;
 222while ($depth <= $opt_D) {
 223    do_abrowse($depth);
 224    $depth++;
 225}
 226
 227## Order patches by time
 228# FIXME see if we can find a more optimal way to do this by graphing
 229# the ancestry data and walking it, that way we won't have to rely on
 230# client-supplied dates
 231@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 232
 233#print Dumper \@psets;
 234
 235##
 236## TODO cleanup irrelevant patches
 237##      and put an initial import
 238##      or a full tag
 239my $import = 0;
 240unless (-d $git_dir) { # initial import
 241    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 242        print "Starting import from $psets[0]{id}\n";
 243        `git-init`;
 244        die $! if $?;
 245        $import = 1;
 246    } else {
 247        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 248    }
 249} else {    # progressing an import
 250    # load the rptags
 251    opendir(DIR, $ptag_dir)
 252        || die "can't opendir: $!";
 253    while (my $file = readdir(DIR)) {
 254        # skip non-interesting-files
 255        next unless -f "$ptag_dir/$file";
 256
 257        # convert first '--' to '/' from old git-archimport to use
 258        # as an archivename/c--b--v private tag
 259        if ($file !~ m!,!) {
 260            my $oldfile = $file;
 261            $file =~ s!--!,!;
 262            print STDERR "converting old tag $oldfile to $file\n";
 263            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 264        }
 265        my $sha = ptag($file);
 266        chomp $sha;
 267        $rptags{$sha} = $file;
 268    }
 269    closedir DIR;
 270}
 271
 272# process patchsets
 273# extract the Arch repository name (Arch "archive" in Arch-speak)
 274sub extract_reponame {
 275    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 276    return (split(/\//, $fq_cvbr))[0];
 277}
 278
 279sub extract_versionname {
 280    my $name = shift;
 281    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 282    return $name;
 283}
 284
 285# convert a fully-qualified revision or version to a unique dirname:
 286#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
 287# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 288#
 289# the git notion of a branch is closer to
 290# archive/category--branch--version than archive/category--branch, so we
 291# use this to convert to git branch names.
 292# Also, keep archive names but replace '/' with ',' since it won't require
 293# subdirectories, and is safer than swapping '--' which could confuse
 294# reverse-mapping when dealing with bastard branches that
 295# are just archive/category--version  (no --branch)
 296sub tree_dirname {
 297    my $revision = shift;
 298    my $name = extract_versionname($revision);
 299    $name =~ s#/#,#;
 300    return $name;
 301}
 302
 303# old versions of git-archimport just use the <category--branch> part:
 304sub old_style_branchname {
 305    my $id = shift;
 306    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 307    chomp $ret;
 308    return $ret;
 309}
 310
 311*git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 312
 313# retrieve default archive, since $branch_name_map keys might not include it
 314sub get_default_archive {
 315    if (!defined $default_archive) {
 316        $default_archive = safe_pipe_capture($TLA,'my-default-archive');
 317        chomp $default_archive;
 318    }
 319    return $default_archive;
 320}
 321
 322sub git_branchname {
 323    my $revision = shift;
 324    my $name = extract_versionname($revision);
 325
 326    if (exists $branch_name_map{$name}) {
 327        return $branch_name_map{$name};
 328
 329    } elsif ($name =~ m#^([^/]*)/(.*)$#
 330             && $1 eq get_default_archive()
 331             && exists $branch_name_map{$2}) {
 332        # the names given in the command-line lacked the archive.
 333        return $branch_name_map{$2};
 334
 335    } else {
 336        return git_default_branchname($revision);
 337    }
 338}
 339
 340sub process_patchset_accurate {
 341    my $ps = shift;
 342
 343    # switch to that branch if we're not already in that branch:
 344    if (-e "$git_dir/refs/heads/$ps->{branch}") {
 345       system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
 346
 347       # remove any old stuff that got leftover:
 348       my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 349       rmtree(split(/\0/,$rm)) if $rm;
 350    }
 351
 352    # Apply the import/changeset/merge into the working tree
 353    my $dir = sync_to_ps($ps);
 354    # read the new log entry:
 355    my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
 356    die "Error in cat-log: $!" if $?;
 357    chomp @commitlog;
 358
 359    # grab variables we want from the log, new fields get added to $ps:
 360    # (author, date, email, summary, message body ...)
 361    parselog($ps, \@commitlog);
 362
 363    if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
 364        # this should work when importing continuations
 365        if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
 366
 367            # find where we are supposed to branch from
 368            if (! -e "$git_dir/refs/heads/$ps->{branch}") {
 369                system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
 370
 371                # We trust Arch with the fact that this is just a tag,
 372                # and it does not affect the state of the tree, so
 373                # we just tag and move on.  If the user really wants us
 374                # to consolidate more branches into one, don't tag because
 375                # the tag name would be already taken.
 376                tag($ps->{id}, $branchpoint);
 377                ptag($ps->{id}, $branchpoint);
 378                print " * Tagged $ps->{id} at $branchpoint\n";
 379            }
 380            system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
 381
 382            # remove any old stuff that got leftover:
 383            my $rm = safe_pipe_capture('git-ls-files','--others','-z');
 384            rmtree(split(/\0/,$rm)) if $rm;
 385            return 0;
 386        } else {
 387            warn "Tagging from unknown id unsupported\n" if $ps->{tag};
 388        }
 389        # allow multiple bases/imports here since Arch supports cherry-picks
 390        # from unrelated trees
 391    }
 392
 393    # update the index with all the changes we got
 394    system('git-diff-files --name-only -z | '.
 395            'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 396    system('git-ls-files --others -z | '.
 397            'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 398    return 1;
 399}
 400
 401# the native changeset processing strategy.  This is very fast, but
 402# does not handle permissions or any renames involving directories
 403sub process_patchset_fast {
 404    my $ps = shift;
 405    #
 406    # create the branch if needed
 407    #
 408    if ($ps->{type} eq 'i' && !$import) {
 409        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 410    }
 411
 412    unless ($import) { # skip for import
 413        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 414            # we know about this branch
 415            system('git-checkout',$ps->{branch});
 416        } else {
 417            # new branch! we need to verify a few things
 418            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 419            my $branchpoint = ptag($ps->{tag});
 420            die "Tagging from unknown id unsupported: $ps->{tag}"
 421                unless $branchpoint;
 422
 423            # find where we are supposed to branch from
 424            if (! -e "$git_dir/refs/heads/$ps->{branch}") {
 425                system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
 426
 427                # We trust Arch with the fact that this is just a tag,
 428                # and it does not affect the state of the tree, so
 429                # we just tag and move on.  If the user really wants us
 430                # to consolidate more branches into one, don't tag because
 431                # the tag name would be already taken.
 432                tag($ps->{id}, $branchpoint);
 433                ptag($ps->{id}, $branchpoint);
 434                print " * Tagged $ps->{id} at $branchpoint\n";
 435            }
 436            system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
 437            return 0;
 438        }
 439        die $! if $?;
 440    }
 441
 442    #
 443    # Apply the import/changeset/merge into the working tree
 444    #
 445    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 446        apply_import($ps) or die $!;
 447        $stats{import_or_tag}++;
 448        $import=0;
 449    } elsif ($ps->{type} eq 's') {
 450        apply_cset($ps);
 451        $stats{simple_changeset}++;
 452    }
 453
 454    #
 455    # prepare update git's index, based on what arch knows
 456    # about the pset, resolve parents, etc
 457    #
 458
 459    my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
 460    die "Error in cat-archive-log: $!" if $?;
 461
 462    parselog($ps,\@commitlog);
 463
 464    # imports don't give us good info
 465    # on added files. Shame on them
 466    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 467        system('git-ls-files --deleted -z | '.
 468                'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
 469        system('git-ls-files --others -z | '.
 470                'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
 471    }
 472
 473    # TODO: handle removed_directories and renamed_directories:
 474
 475    if (my $del = $ps->{removed_files}) {
 476        unlink @$del;
 477        while (@$del) {
 478            my @slice = splice(@$del, 0, 100);
 479            system('git-update-index','--remove','--',@slice) == 0 or
 480                            die "Error in git-update-index --remove: $! $?\n";
 481        }
 482    }
 483
 484    if (my $ren = $ps->{renamed_files}) {                # renamed
 485        if (@$ren % 2) {
 486            die "Odd number of entries in rename!?";
 487        }
 488
 489        while (@$ren) {
 490            my $from = shift @$ren;
 491            my $to   = shift @$ren;
 492
 493            unless (-d dirname($to)) {
 494                mkpath(dirname($to)); # will die on err
 495            }
 496            # print "moving $from $to";
 497            rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
 498            system('git-update-index','--remove','--',$from) == 0 or
 499                            die "Error in git-update-index --remove: $! $?\n";
 500            system('git-update-index','--add','--',$to) == 0 or
 501                            die "Error in git-update-index --add: $! $?\n";
 502        }
 503    }
 504
 505    if (my $add = $ps->{new_files}) {
 506        while (@$add) {
 507            my @slice = splice(@$add, 0, 100);
 508            system('git-update-index','--add','--',@slice) == 0 or
 509                            die "Error in git-update-index --add: $! $?\n";
 510        }
 511    }
 512
 513    if (my $mod = $ps->{modified_files}) {
 514        while (@$mod) {
 515            my @slice = splice(@$mod, 0, 100);
 516            system('git-update-index','--',@slice) == 0 or
 517                            die "Error in git-update-index: $! $?\n";
 518        }
 519    }
 520    return 1; # we successfully applied the changeset
 521}
 522
 523if ($opt_f) {
 524    print "Will import patchsets using the fast strategy\n",
 525            "Renamed directories and permission changes will be missed\n";
 526    *process_patchset = *process_patchset_fast;
 527} else {
 528    print "Using the default (accurate) import strategy.\n",
 529            "Things may be a bit slow\n";
 530    *process_patchset = *process_patchset_accurate;
 531}
 532
 533foreach my $ps (@psets) {
 534    # process patchsets
 535    $ps->{branch} = git_branchname($ps->{id});
 536
 537    #
 538    # ensure we have a clean state
 539    #
 540    if (my $dirty = `git-diff-files`) {
 541        die "Unclean tree when about to process $ps->{id} " .
 542            " - did we fail to commit cleanly before?\n$dirty";
 543    }
 544    die $! if $?;
 545
 546    #
 547    # skip commits already in repo
 548    #
 549    if (ptag($ps->{id})) {
 550      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 551      next;
 552    }
 553
 554    print " * Starting to work on $ps->{id}\n";
 555
 556    process_patchset($ps) or next;
 557
 558    # warn "errors when running git-update-index! $!";
 559    my $tree = `git-write-tree`;
 560    die "cannot write tree $!" if $?;
 561    chomp $tree;
 562
 563    #
 564    # Who's your daddy?
 565    #
 566    my @par;
 567    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 568        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 569            my $p = <HEAD>;
 570            close HEAD;
 571            chomp $p;
 572            push @par, '-p', $p;
 573        } else {
 574            if ($ps->{type} eq 's') {
 575                warn "Could not find the right head for the branch $ps->{branch}";
 576            }
 577        }
 578    }
 579
 580    if ($ps->{merges}) {
 581        push @par, find_parents($ps);
 582    }
 583
 584    #
 585    # Commit, tag and clean state
 586    #
 587    $ENV{TZ}                  = 'GMT';
 588    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 589    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 590    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 591    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 592    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 593    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 594
 595    my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
 596        or die $!;
 597    print WRITER $ps->{summary},"\n\n";
 598    print WRITER $ps->{message},"\n";
 599
 600    # make it easy to backtrack and figure out which Arch revision this was:
 601    print WRITER 'git-archimport-id: ',$ps->{id},"\n";
 602
 603    close WRITER;
 604    my $commitid = <READER>;    # read
 605    chomp $commitid;
 606    close READER;
 607    waitpid $pid,0;             # close;
 608
 609    if (length $commitid != 40) {
 610        die "Something went wrong with the commit! $! $commitid";
 611    }
 612    #
 613    # Update the branch
 614    #
 615    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 616    print HEAD $commitid;
 617    close HEAD;
 618    system('git-update-ref', 'HEAD', "$ps->{branch}");
 619
 620    # tag accordingly
 621    ptag($ps->{id}, $commitid); # private tag
 622    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 623        tag($ps->{id}, $commitid);
 624    }
 625    print " * Committed $ps->{id}\n";
 626    print "   + tree   $tree\n";
 627    print "   + commit $commitid\n";
 628    $opt_v && print "   + commit date is  $ps->{date} \n";
 629    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 630}
 631
 632if ($opt_v) {
 633    foreach (sort keys %stats) {
 634        print" $_: $stats{$_}\n";
 635    }
 636}
 637exit 0;
 638
 639# used by the accurate strategy:
 640sub sync_to_ps {
 641    my $ps = shift;
 642    my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
 643
 644    $opt_v && print "sync_to_ps($ps->{id}) method: ";
 645
 646    if (-d $tree_dir) {
 647        if ($ps->{type} eq 't') {
 648            $opt_v && print "get (tag)\n";
 649            # looks like a tag-only or (worse,) a mixed tags/changeset branch,
 650            # can't rely on replay to work correctly on these
 651            rmtree($tree_dir);
 652            safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 653            $stats{get_tag}++;
 654        } else {
 655                my $tree_id = arch_tree_id($tree_dir);
 656                if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
 657                    # the common case (hopefully)
 658                    $opt_v && print "replay\n";
 659                    safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
 660                    $stats{replay}++;
 661                } else {
 662                    # getting one tree is usually faster than getting two trees
 663                    # and applying the delta ...
 664                    rmtree($tree_dir);
 665                    $opt_v && print "apply-delta\n";
 666                    safe_pipe_capture($TLA,'get','--no-pristine',
 667                                        $ps->{id},$tree_dir);
 668                    $stats{get_delta}++;
 669                }
 670        }
 671    } else {
 672        # new branch work
 673        $opt_v && print "get (new tree)\n";
 674        safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
 675        $stats{get_new}++;
 676    }
 677
 678    # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
 679    system('rsync','-aI','--delete','--exclude',$git_dir,
 680#               '--exclude','.arch-inventory',
 681                '--exclude','.arch-ids','--exclude','{arch}',
 682                '--exclude','+*','--exclude',',*',
 683                "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
 684    return $tree_dir;
 685}
 686
 687sub apply_import {
 688    my $ps = shift;
 689    my $bname = git_branchname($ps->{id});
 690
 691    mkpath($tmp);
 692
 693    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 694    die "Cannot get import: $!" if $?;
 695    system('rsync','-aI','--delete', '--exclude',$git_dir,
 696                '--exclude','.arch-ids','--exclude','{arch}',
 697                "$tmp/import/", './');
 698    die "Cannot rsync import:$!" if $?;
 699
 700    rmtree("$tmp/import");
 701    die "Cannot remove tempdir: $!" if $?;
 702
 703
 704    return 1;
 705}
 706
 707sub apply_cset {
 708    my $ps = shift;
 709
 710    mkpath($tmp);
 711
 712    # get the changeset
 713    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 714    die "Cannot get changeset: $!" if $?;
 715
 716    # apply patches
 717    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 718        # this can be sped up considerably by doing
 719        #    (find | xargs cat) | patch
 720        # but that can get mucked up by patches
 721        # with missing trailing newlines or the standard
 722        # 'missing newline' flag in the patch - possibly
 723        # produced with an old/buggy diff.
 724        # slow and safe, we invoke patch once per patchfile
 725        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 726        die "Problem applying patches! $!" if $?;
 727    }
 728
 729    # apply changed binary files
 730    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 731        foreach my $mod (@modified) {
 732            chomp $mod;
 733            my $orig = $mod;
 734            $orig =~ s/\.modified$//; # lazy
 735            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 736            #print "rsync -p '$mod' '$orig'";
 737            system('rsync','-p',$mod,"./$orig");
 738            die "Problem applying binary changes! $!" if $?;
 739        }
 740    }
 741
 742    # bring in new files
 743    system('rsync','-aI','--exclude',$git_dir,
 744                '--exclude','.arch-ids',
 745                '--exclude', '{arch}',
 746                "$tmp/changeset/new-files-archive/",'./');
 747
 748    # deleted files are hinted from the commitlog processing
 749
 750    rmtree("$tmp/changeset");
 751}
 752
 753
 754# =for reference
 755# notes: *-files/-directories keys cannot have spaces, they're always
 756# pika-escaped.  Everything after the first newline
 757# A log entry looks like:
 758# Revision: moodle-org--moodle--1.3.3--patch-15
 759# Archive: arch-eduforge@catalyst.net.nz--2004
 760# Creator: Penny Leach <penny@catalyst.net.nz>
 761# Date: Wed May 25 14:15:34 NZST 2005
 762# Standard-date: 2005-05-25 02:15:34 GMT
 763# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 764#     lang/de/.arch-ids/block_html.php.id
 765# New-directories: lang/de/help/questionnaire
 766#     lang/de/help/questionnaire/.arch-ids
 767# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 768#    db_sears.sql db/db_sears.sql
 769# Removed-files: lang/be/docs/.arch-ids/release.html.id
 770#     lang/be/docs/.arch-ids/releaseold.html.id
 771# Modified-files: admin/cron.php admin/delete.php
 772#     admin/editor.html backup/lib.php backup/restore.php
 773# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 774# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 775#   summary can be multiline with a leading space just like the above fields
 776# Keywords:
 777#
 778# Updating yadda tadda tadda madda
 779sub parselog {
 780    my ($ps, $log) = @_;
 781    my $key = undef;
 782
 783    # headers we want that contain filenames:
 784    my %want_headers = (
 785        new_files => 1,
 786        modified_files => 1,
 787        renamed_files => 1,
 788        renamed_directories => 1,
 789        removed_files => 1,
 790        removed_directories => 1,
 791    );
 792
 793    chomp (@$log);
 794    while ($_ = shift @$log) {
 795        if (/^Continuation-of:\s*(.*)/) {
 796            $ps->{tag} = $1;
 797            $key = undef;
 798        } elsif (/^Summary:\s*(.*)$/ ) {
 799            # summary can be multiline as long as it has a leading space.
 800            # we squeeze it onto a single line, though.
 801            $ps->{summary} = [ $1 ];
 802            $key = 'summary';
 803        } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
 804            $ps->{author} = $1;
 805            $ps->{email} = $2;
 806            $key = undef;
 807        # any *-files or *-directories can be read here:
 808        } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
 809            my $val = $2;
 810            $key = lc $1;
 811            $key =~ tr/-/_/; # too lazy to quote :P
 812            if ($want_headers{$key}) {
 813                push @{$ps->{$key}}, split(/\s+/, $val);
 814            } else {
 815                $key = undef;
 816            }
 817        } elsif (/^$/) {
 818            last; # remainder of @$log that didn't get shifted off is message
 819        } elsif ($key) {
 820            if (/^\s+(.*)$/) {
 821                if ($key eq 'summary') {
 822                    push @{$ps->{$key}}, $1;
 823                } else { # files/directories:
 824                    push @{$ps->{$key}}, split(/\s+/, $1);
 825                }
 826            } else {
 827                $key = undef;
 828            }
 829        }
 830    }
 831
 832    # drop leading empty lines from the log message
 833    while (@$log && $log->[0] eq '') {
 834        shift @$log;
 835    }
 836    if (exists $ps->{summary} && @{$ps->{summary}}) {
 837        $ps->{summary} = join(' ', @{$ps->{summary}});
 838    }
 839    elsif (@$log == 0) {
 840        $ps->{summary} = 'empty commit message';
 841    } else {
 842        $ps->{summary} = $log->[0] . '...';
 843    }
 844    $ps->{message} = join("\n",@$log);
 845
 846    # skip Arch control files, unescape pika-escaped files
 847    foreach my $k (keys %want_headers) {
 848        next unless (defined $ps->{$k});
 849        my @tmp = ();
 850        foreach my $t (@{$ps->{$k}}) {
 851           next unless length ($t);
 852           next if $t =~ m!\{arch\}/!;
 853           next if $t =~ m!\.arch-ids/!;
 854           # should we skip this?
 855           next if $t =~ m!\.arch-inventory$!;
 856           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 857           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 858           if ($t =~ /\\/ ){
 859               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 860           }
 861           push @tmp, $t;
 862        }
 863        $ps->{$k} = \@tmp;
 864    }
 865}
 866
 867# write/read a tag
 868sub tag {
 869    my ($tag, $commit) = @_;
 870
 871    if ($opt_o) {
 872        $tag =~ s|/|--|g;
 873    } else {
 874        my $patchname = $tag;
 875        $patchname =~ s/.*--//;
 876        $tag = git_branchname ($tag) . '--' . $patchname;
 877    }
 878
 879    if ($commit) {
 880        open(C,">","$git_dir/refs/tags/$tag")
 881            or die "Cannot create tag $tag: $!\n";
 882        print C "$commit\n"
 883            or die "Cannot write tag $tag: $!\n";
 884        close(C)
 885            or die "Cannot write tag $tag: $!\n";
 886        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 887    } else {                    # read
 888        open(C,"<","$git_dir/refs/tags/$tag")
 889            or die "Cannot read tag $tag: $!\n";
 890        $commit = <C>;
 891        chomp $commit;
 892        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 893        close(C)
 894            or die "Cannot read tag $tag: $!\n";
 895        return $commit;
 896    }
 897}
 898
 899# write/read a private tag
 900# reads fail softly if the tag isn't there
 901sub ptag {
 902    my ($tag, $commit) = @_;
 903
 904    # don't use subdirs for tags yet, it could screw up other porcelains
 905    $tag =~ s|/|,|g;
 906
 907    my $tag_file = "$ptag_dir/$tag";
 908    my $tag_branch_dir = dirname($tag_file);
 909    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 910
 911    if ($commit) {              # write
 912        open(C,">",$tag_file)
 913            or die "Cannot create tag $tag: $!\n";
 914        print C "$commit\n"
 915            or die "Cannot write tag $tag: $!\n";
 916        close(C)
 917            or die "Cannot write tag $tag: $!\n";
 918        $rptags{$commit} = $tag
 919            unless $tag =~ m/--base-0$/;
 920    } else {                    # read
 921        # if the tag isn't there, return 0
 922        unless ( -s $tag_file) {
 923            return 0;
 924        }
 925        open(C,"<",$tag_file)
 926            or die "Cannot read tag $tag: $!\n";
 927        $commit = <C>;
 928        chomp $commit;
 929        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 930        close(C)
 931            or die "Cannot read tag $tag: $!\n";
 932        unless (defined $rptags{$commit}) {
 933            $rptags{$commit} = $tag;
 934        }
 935        return $commit;
 936    }
 937}
 938
 939sub find_parents {
 940    #
 941    # Identify what branches are merging into me
 942    # and whether we are fully merged
 943    # git-merge-base <headsha> <headsha> should tell
 944    # me what the base of the merge should be
 945    #
 946    my $ps = shift;
 947
 948    my %branches; # holds an arrayref per branch
 949                  # the arrayref contains a list of
 950                  # merged patches between the base
 951                  # of the merge and the current head
 952
 953    my @parents;  # parents found for this commit
 954
 955    # simple loop to split the merges
 956    # per branch
 957    foreach my $merge (@{$ps->{merges}}) {
 958        my $branch = git_branchname($merge);
 959        unless (defined $branches{$branch} ){
 960            $branches{$branch} = [];
 961        }
 962        push @{$branches{$branch}}, $merge;
 963    }
 964
 965    #
 966    # foreach branch find a merge base and walk it to the
 967    # head where we are, collecting the merged patchsets that
 968    # Arch has recorded. Keep that in @have
 969    # Compare that with the commits on the other branch
 970    # between merge-base and the tip of the branch (@need)
 971    # and see if we have a series of consecutive patches
 972    # starting from the merge base. The tip of the series
 973    # of consecutive patches merged is our new parent for
 974    # that branch.
 975    #
 976    foreach my $branch (keys %branches) {
 977
 978        # check that we actually know about the branch
 979        next unless -e "$git_dir/refs/heads/$branch";
 980
 981        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 982        if ($?) {
 983            # Don't die here, Arch supports one-way cherry-picking
 984            # between branches with no common base (or any relationship
 985            # at all beforehand)
 986            warn "Cannot find merge base for $branch and $ps->{branch}";
 987            next;
 988        }
 989        chomp $mergebase;
 990
 991        # now walk up to the mergepoint collecting what patches we have
 992        my $branchtip = git_rev_parse($ps->{branch});
 993        my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
 994        my %have; # collected merges this branch has
 995        foreach my $merge (@{$ps->{merges}}) {
 996            $have{$merge} = 1;
 997        }
 998        my %ancestorshave;
 999        foreach my $par (@ancestors) {
1000            $par = commitid2pset($par);
1001            if (defined $par->{merges}) {
1002                foreach my $merge (@{$par->{merges}}) {
1003                    $ancestorshave{$merge}=1;
1004                }
1005            }
1006        }
1007        # print "++++ Merges in $ps->{id} are....\n";
1008        # my @have = sort keys %have;   print Dumper(\@have);
1009
1010        # merge what we have with what ancestors have
1011        %have = (%have, %ancestorshave);
1012
1013        # see what the remote branch has - these are the merges we
1014        # will want to have in a consecutive series from the mergebase
1015        my $otherbranchtip = git_rev_parse($branch);
1016        my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
1017        my @need;
1018        foreach my $needps (@needraw) {         # get the psets
1019            $needps = commitid2pset($needps);
1020            # git-rev-list will also
1021            # list commits merged in via earlier
1022            # merges. we are only interested in commits
1023            # from the branch we're looking at
1024            if ($branch eq $needps->{branch}) {
1025                push @need, $needps->{id};
1026            }
1027        }
1028
1029        # print "++++ Merges from $branch we want are....\n";
1030        # print Dumper(\@need);
1031
1032        my $newparent;
1033        while (my $needed_commit = pop @need) {
1034            if ($have{$needed_commit}) {
1035                $newparent = $needed_commit;
1036            } else {
1037                last; # break out of the while
1038            }
1039        }
1040        if ($newparent) {
1041            push @parents, $newparent;
1042        }
1043
1044
1045    } # end foreach branch
1046
1047    # prune redundant parents
1048    my %parents;
1049    foreach my $p (@parents) {
1050        $parents{$p} = 1;
1051    }
1052    foreach my $p (@parents) {
1053        next unless exists $psets{$p}{merges};
1054        next unless ref    $psets{$p}{merges};
1055        my @merges = @{$psets{$p}{merges}};
1056        foreach my $merge (@merges) {
1057            if ($parents{$merge}) {
1058                delete $parents{$merge};
1059            }
1060        }
1061    }
1062
1063    @parents = ();
1064    foreach (keys %parents) {
1065        push @parents, '-p', ptag($_);
1066    }
1067    return @parents;
1068}
1069
1070sub git_rev_parse {
1071    my $name = shift;
1072    my $val  = `git-rev-parse $name`;
1073    die "Error: git-rev-parse $name" if $?;
1074    chomp $val;
1075    return $val;
1076}
1077
1078# resolve a SHA1 to a known patchset
1079sub commitid2pset {
1080    my $commitid = shift;
1081    chomp $commitid;
1082    my $name = $rptags{$commitid}
1083        || die "Cannot find reverse tag mapping for $commitid";
1084    $name =~ s|,|/|;
1085    my $ps   = $psets{$name}
1086        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1087    return $ps;
1088}
1089
1090
1091# an alternative to `command` that allows input to be passed as an array
1092# to work around shell problems with weird characters in arguments
1093sub safe_pipe_capture {
1094    my @output;
1095    if (my $pid = open my $child, '-|') {
1096        @output = (<$child>);
1097        close $child or die join(' ',@_).": $! $?";
1098    } else {
1099        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1100    }
1101    return wantarray ? @output : join('',@output);
1102}
1103
1104# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1105sub arch_tree_id {
1106    my $dir = shift;
1107    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1108    return $ret;
1109}
1110
1111sub archive_reachable {
1112    my $archive = shift;
1113    return 1 if $reachable{$archive};
1114    return 0 if $unreachable{$archive};
1115
1116    if (system "$TLA whereis-archive $archive >/dev/null") {
1117        if ($opt_a && (system($TLA,'register-archive',
1118                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1119            $reachable{$archive} = 1;
1120            return 1;
1121        }
1122        print STDERR "Archive is unreachable: $archive\n";
1123        $unreachable{$archive} = 1;
1124        return 0;
1125    } else {
1126        $reachable{$archive} = 1;
1127        return 1;
1128    }
1129}