1#!/usr/bin/perl -w 2# 3# This tool is copyright (c) 2005, Martin Langhoff. 4# It is released under the Gnu Public License, version 2. 5# 6# The basic idea is to walk the output of tla abrowse, 7# fetch the changesets and apply them. 8# 9 10=head1 Invocation 11 12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] 13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ] 14 15Imports a project from one or more Arch repositories. It will follow branches 16and repositories within the namespaces defined by the <archive/branch> 17parameters supplied. If it cannot find the remote branch a merge comes from 18it will just import it as a regular commit. If it can find it, it will mark it 19as a merge whenever possible. 20 21See man (1) git-archimport for more details. 22 23=head1 TODO 24 25 - create tag objects instead of ref tags 26 - audit shell-escaping of filenames 27 - hide our private tags somewhere smarter 28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines 29 - sort and apply patches by graphing ancestry relations instead of just 30 relying in dates supplied in the changeset itself. 31 tla ancestry-graph -m could be helpful here... 32 33=head1 Devel tricks 34 35Add print in front of the shell commands invoked via backticks. 36 37=head1 Devel Notes 38 39There are several places where Arch and git terminology are intermixed 40and potentially confused. 41 42The notion of a "branch" in git is approximately equivalent to 43a "archive/category--branch--version" in Arch. Also, it should be noted 44that the "--branch" portion of "archive/category--branch--version" is really 45optional in Arch although not many people (nor tools!) seem to know this. 46This means that "archive/category--version" is also a valid "branch" 47in git terms. 48 49We always refer to Arch names by their fully qualified variant (which 50means the "archive" name is prefixed. 51 52For people unfamiliar with Arch, an "archive" is the term for "repository", 53and can contain multiple, unrelated branches. 54 55=cut 56 57use strict; 58use warnings; 59use Getopt::Std; 60use File::Temp qw(tempdir); 61use File::Path qw(mkpath rmtree); 62use File::Basename qw(basename dirname); 63use Data::Dumper qw/ Dumper /; 64use IPC::Open2; 65 66$SIG{'PIPE'}="IGNORE"; 67$ENV{'TZ'}="UTC"; 68 69my$git_dir=$ENV{"GIT_DIR"} ||".git"; 70$ENV{"GIT_DIR"} =$git_dir; 71my$ptag_dir="$git_dir/archimport/tags"; 72 73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o); 74 75sub usage() { 76print STDERR <<END; 77Usage: ${\basename$0} # fetch/update GIT from Arch 78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ] 79 repository/arch-branch [ repository/arch-branch] ... 80END 81exit(1); 82} 83 84getopts("fThvat:D:")or usage(); 85usage if$opt_h; 86 87@ARGV>=1or usage(); 88# $arch_branches: 89# values associated with keys: 90# =1 - Arch version / git 'branch' detected via abrowse on a limit 91# >1 - Arch version / git 'branch' of an auxiliary branch we've merged 92my%arch_branches=map{my$branch=$_;$branch=~s/:[^:]*$//;$branch=>1}@ARGV; 93 94# $branch_name_map: 95# maps arch branches to git branch names 96my%branch_name_map=map{m/^(.*):([^:]*)$/;$1=>$2}grep{m/:/}@ARGV; 97 98$ENV{'TMPDIR'} =$opt_tif$opt_t;# $ENV{TMPDIR} will affect tempdir() calls: 99my$tmp= tempdir('git-archimport-XXXXXX', TMPDIR =>1, CLEANUP =>1); 100$opt_v&&print"+ Using$tmpas temporary directory\n"; 101 102unless(-d $git_dir) {# initial import needs empty directory 103opendir DIR,'.'or die"Unable to open current directory:$!\n"; 104while(my$entry=readdir DIR) { 105$entry=~/^\.\.?$/or 106die"Initial import needs an empty current working directory.\n" 107} 108closedir DIR 109} 110 111my$default_archive;# default Arch archive 112my%reachable= ();# Arch repositories we can access 113my%unreachable= ();# Arch repositories we can't access :< 114my@psets= ();# the collection 115my%psets= ();# the collection, by name 116my%stats= (# Track which strategy we used to import: 117 get_tag =>0, replay =>0, get_new =>0, get_delta =>0, 118 simple_changeset =>0, import_or_tag =>0 119); 120 121my%rptags= ();# my reverse private tags 122# to map a SHA1 to a commitid 123my$TLA=$ENV{'ARCH_CLIENT'} ||'tla'; 124 125sub do_abrowse { 126my$stage=shift; 127while(my($limit,$level) =each%arch_branches) { 128next unless$level==$stage; 129 130open ABROWSE,"$TLAabrowse -fkD --merges$limit|" 131or die"Problems with tla abrowse:$!"; 132 133my%ps= ();# the current one 134my$lastseen=''; 135 136while(<ABROWSE>) { 137chomp; 138 139# first record padded w 8 spaces 140if(s/^\s{8}\b//) { 141my($id,$type) =split(m/\s+/,$_,2); 142 143my%last_ps; 144# store the record we just captured 145if(%ps&& !exists$psets{$ps{id} }) { 146%last_ps=%ps;# break references 147push(@psets, \%last_ps); 148$psets{$last_ps{id} } = \%last_ps; 149} 150 151my$branch= extract_versionname($id); 152%ps= ( id =>$id, branch =>$branch); 153if(%last_ps&& ($last_ps{branch}eq$branch)) { 154$ps{parent_id} =$last_ps{id}; 155} 156 157$arch_branches{$branch} =1; 158$lastseen='id'; 159 160# deal with types (should work with baz or tla): 161if($type=~m/\(.*changeset\)/) { 162$ps{type} ='s'; 163}elsif($type=~/\(.*import\)/) { 164$ps{type} ='i'; 165}elsif($type=~m/\(tag.*?(\S+\@\S+).*?\)/) { 166$ps{type} ='t'; 167# read which revision we've tagged when we parse the log 168$ps{tag} =$1; 169}else{ 170warn"Unknown type$type"; 171} 172 173$arch_branches{$branch} =1; 174$lastseen='id'; 175}elsif(s/^\s{10}//) { 176# 10 leading spaces or more 177# indicate commit metadata 178 179# date 180if($lastseeneq'id'&&m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){ 181$ps{date} =$1; 182$lastseen='date'; 183}elsif($_eq'merges in:') { 184$ps{merges} = []; 185$lastseen='merges'; 186}elsif($lastseeneq'merges'&&s/^\s{2}//) { 187my$id=$_; 188push(@{$ps{merges}},$id); 189 190# aggressive branch finding: 191if($opt_D) { 192my$branch= extract_versionname($id); 193my$repo= extract_reponame($branch); 194 195if(archive_reachable($repo) && 196!defined$arch_branches{$branch}) { 197$arch_branches{$branch} =$stage+1; 198} 199} 200}else{ 201warn"more metadata after merges!?:$_\n"unless/^\s*$/; 202} 203} 204} 205 206if(%ps&& !exists$psets{$ps{id} }) { 207my%temp=%ps;# break references 208if(@psets&&$psets[$#psets]{branch}eq$ps{branch}) { 209$temp{parent_id} =$psets[$#psets]{id}; 210} 211push(@psets, \%temp); 212$psets{$temp{id} } = \%temp; 213} 214 215close ABROWSE or die"$TLAabrowse failed on$limit\n"; 216} 217}# end foreach $root 218 219do_abrowse(1); 220my$depth=2; 221$opt_D||=0; 222while($depth<=$opt_D) { 223 do_abrowse($depth); 224$depth++; 225} 226 227## Order patches by time 228# FIXME see if we can find a more optimal way to do this by graphing 229# the ancestry data and walking it, that way we won't have to rely on 230# client-supplied dates 231@psets=sort{$a->{date}.$b->{id}cmp$b->{date}.$b->{id}}@psets; 232 233#print Dumper \@psets; 234 235## 236## TODO cleanup irrelevant patches 237## and put an initial import 238## or a full tag 239my$import=0; 240unless(-d $git_dir) {# initial import 241if($psets[0]{type}eq'i'||$psets[0]{type}eq't') { 242print"Starting import from$psets[0]{id}\n"; 243`git-init`; 244die$!if$?; 245$import=1; 246}else{ 247die"Need to start from an import or a tag -- cannot use$psets[0]{id}"; 248} 249}else{# progressing an import 250# load the rptags 251opendir(DIR,$ptag_dir) 252||die"can't opendir:$!"; 253while(my$file=readdir(DIR)) { 254# skip non-interesting-files 255next unless-f "$ptag_dir/$file"; 256 257# convert first '--' to '/' from old git-archimport to use 258# as an archivename/c--b--v private tag 259if($file!~m!,!) { 260my$oldfile=$file; 261$file=~s!--!,!; 262print STDERR "converting old tag$oldfileto$file\n"; 263rename("$ptag_dir/$oldfile","$ptag_dir/$file")or die$!; 264} 265my$sha= ptag($file); 266chomp$sha; 267$rptags{$sha} =$file; 268} 269closedir DIR; 270} 271 272# process patchsets 273# extract the Arch repository name (Arch "archive" in Arch-speak) 274sub extract_reponame { 275my$fq_cvbr=shift;# archivename/[[[[category]branch]version]revision] 276return(split(/\//,$fq_cvbr))[0]; 277} 278 279sub extract_versionname { 280my$name=shift; 281$name=~s/--(?:patch|version(?:fix)?|base)-\d+$//; 282return$name; 283} 284 285# convert a fully-qualified revision or version to a unique dirname: 286# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 287# becomes: normalperson@yhbt.net-05,mpd--uclinux--1 288# 289# the git notion of a branch is closer to 290# archive/category--branch--version than archive/category--branch, so we 291# use this to convert to git branch names. 292# Also, keep archive names but replace '/' with ',' since it won't require 293# subdirectories, and is safer than swapping '--' which could confuse 294# reverse-mapping when dealing with bastard branches that 295# are just archive/category--version (no --branch) 296sub tree_dirname { 297my$revision=shift; 298my$name= extract_versionname($revision); 299$name=~ s#/#,#; 300return$name; 301} 302 303# old versions of git-archimport just use the <category--branch> part: 304sub old_style_branchname { 305my$id=shift; 306my$ret= safe_pipe_capture($TLA,'parse-package-name','-p',$id); 307chomp$ret; 308return$ret; 309} 310 311*git_default_branchname =$opt_o? *old_style_branchname : *tree_dirname; 312 313# retrieve default archive, since $branch_name_map keys might not include it 314sub get_default_archive { 315if(!defined$default_archive) { 316$default_archive= safe_pipe_capture($TLA,'my-default-archive'); 317chomp$default_archive; 318} 319return$default_archive; 320} 321 322sub git_branchname { 323my$revision=shift; 324my$name= extract_versionname($revision); 325 326if(exists$branch_name_map{$name}) { 327return$branch_name_map{$name}; 328 329}elsif($name=~ m#^([^/]*)/(.*)$# 330&&$1eq get_default_archive() 331&&exists$branch_name_map{$2}) { 332# the names given in the command-line lacked the archive. 333return$branch_name_map{$2}; 334 335}else{ 336return git_default_branchname($revision); 337} 338} 339 340sub process_patchset_accurate { 341my$ps=shift; 342 343# switch to that branch if we're not already in that branch: 344if(-e "$git_dir/refs/heads/$ps->{branch}") { 345system('git-checkout','-f',$ps->{branch}) ==0or die"$!$?\n"; 346 347# remove any old stuff that got leftover: 348my$rm= safe_pipe_capture('git-ls-files','--others','-z'); 349 rmtree(split(/\0/,$rm))if$rm; 350} 351 352# Apply the import/changeset/merge into the working tree 353my$dir= sync_to_ps($ps); 354# read the new log entry: 355my@commitlog= safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id}); 356die"Error in cat-log:$!"if$?; 357chomp@commitlog; 358 359# grab variables we want from the log, new fields get added to $ps: 360# (author, date, email, summary, message body ...) 361 parselog($ps, \@commitlog); 362 363if($ps->{id} =~/--base-0$/&&$ps->{id}ne$psets[0]{id}) { 364# this should work when importing continuations 365if($ps->{tag} && (my$branchpoint=eval{ ptag($ps->{tag}) })) { 366 367# find where we are supposed to branch from 368if(! -e "$git_dir/refs/heads/$ps->{branch}") { 369system('git-branch',$ps->{branch},$branchpoint) ==0or die"$!$?\n"; 370 371# We trust Arch with the fact that this is just a tag, 372# and it does not affect the state of the tree, so 373# we just tag and move on. If the user really wants us 374# to consolidate more branches into one, don't tag because 375# the tag name would be already taken. 376 tag($ps->{id},$branchpoint); 377 ptag($ps->{id},$branchpoint); 378print" * Tagged$ps->{id} at$branchpoint\n"; 379} 380system('git-checkout','-f',$ps->{branch}) ==0or die"$!$?\n"; 381 382# remove any old stuff that got leftover: 383my$rm= safe_pipe_capture('git-ls-files','--others','-z'); 384 rmtree(split(/\0/,$rm))if$rm; 385return0; 386}else{ 387warn"Tagging from unknown id unsupported\n"if$ps->{tag}; 388} 389# allow multiple bases/imports here since Arch supports cherry-picks 390# from unrelated trees 391} 392 393# update the index with all the changes we got 394system('git-diff-files --name-only -z | '. 395'git-update-index --remove -z --stdin') ==0or die"$!$?\n"; 396system('git-ls-files --others -z | '. 397'git-update-index --add -z --stdin') ==0or die"$!$?\n"; 398return1; 399} 400 401# the native changeset processing strategy. This is very fast, but 402# does not handle permissions or any renames involving directories 403sub process_patchset_fast { 404my$ps=shift; 405# 406# create the branch if needed 407# 408if($ps->{type}eq'i'&& !$import) { 409die"Should not have more than one 'Initial import' per GIT import:$ps->{id}"; 410} 411 412unless($import) {# skip for import 413if( -e "$git_dir/refs/heads/$ps->{branch}") { 414# we know about this branch 415system('git-checkout',$ps->{branch}); 416}else{ 417# new branch! we need to verify a few things 418die"Branch on a non-tag!"unless$ps->{type}eq't'; 419my$branchpoint= ptag($ps->{tag}); 420die"Tagging from unknown id unsupported:$ps->{tag}" 421unless$branchpoint; 422 423# find where we are supposed to branch from 424if(! -e "$git_dir/refs/heads/$ps->{branch}") { 425system('git-branch',$ps->{branch},$branchpoint) ==0or die"$!$?\n"; 426 427# We trust Arch with the fact that this is just a tag, 428# and it does not affect the state of the tree, so 429# we just tag and move on. If the user really wants us 430# to consolidate more branches into one, don't tag because 431# the tag name would be already taken. 432 tag($ps->{id},$branchpoint); 433 ptag($ps->{id},$branchpoint); 434print" * Tagged$ps->{id} at$branchpoint\n"; 435} 436system('git-checkout',$ps->{branch}) ==0or die"$!$?\n"; 437return0; 438} 439die$!if$?; 440} 441 442# 443# Apply the import/changeset/merge into the working tree 444# 445if($ps->{type}eq'i'||$ps->{type}eq't') { 446 apply_import($ps)or die$!; 447$stats{import_or_tag}++; 448$import=0; 449}elsif($ps->{type}eq's') { 450 apply_cset($ps); 451$stats{simple_changeset}++; 452} 453 454# 455# prepare update git's index, based on what arch knows 456# about the pset, resolve parents, etc 457# 458 459my@commitlog= safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 460die"Error in cat-archive-log:$!"if$?; 461 462 parselog($ps,\@commitlog); 463 464# imports don't give us good info 465# on added files. Shame on them 466if($ps->{type}eq'i'||$ps->{type}eq't') { 467system('git-ls-files --deleted -z | '. 468'git-update-index --remove -z --stdin') ==0or die"$!$?\n"; 469system('git-ls-files --others -z | '. 470'git-update-index --add -z --stdin') ==0or die"$!$?\n"; 471} 472 473# TODO: handle removed_directories and renamed_directories: 474 475if(my$del=$ps->{removed_files}) { 476unlink@$del; 477while(@$del) { 478my@slice=splice(@$del,0,100); 479system('git-update-index','--remove','--',@slice) ==0or 480die"Error in git-update-index --remove:$!$?\n"; 481} 482} 483 484if(my$ren=$ps->{renamed_files}) {# renamed 485if(@$ren%2) { 486die"Odd number of entries in rename!?"; 487} 488 489while(@$ren) { 490my$from=shift@$ren; 491my$to=shift@$ren; 492 493unless(-d dirname($to)) { 494 mkpath(dirname($to));# will die on err 495} 496# print "moving $from $to"; 497rename($from,$to)or die"Error renaming '$from' '$to':$!\n"; 498system('git-update-index','--remove','--',$from) ==0or 499die"Error in git-update-index --remove:$!$?\n"; 500system('git-update-index','--add','--',$to) ==0or 501die"Error in git-update-index --add:$!$?\n"; 502} 503} 504 505if(my$add=$ps->{new_files}) { 506while(@$add) { 507my@slice=splice(@$add,0,100); 508system('git-update-index','--add','--',@slice) ==0or 509die"Error in git-update-index --add:$!$?\n"; 510} 511} 512 513if(my$mod=$ps->{modified_files}) { 514while(@$mod) { 515my@slice=splice(@$mod,0,100); 516system('git-update-index','--',@slice) ==0or 517die"Error in git-update-index:$!$?\n"; 518} 519} 520return1;# we successfully applied the changeset 521} 522 523if($opt_f) { 524print"Will import patchsets using the fast strategy\n", 525"Renamed directories and permission changes will be missed\n"; 526*process_patchset = *process_patchset_fast; 527}else{ 528print"Using the default (accurate) import strategy.\n", 529"Things may be a bit slow\n"; 530*process_patchset = *process_patchset_accurate; 531} 532 533foreachmy$ps(@psets) { 534# process patchsets 535$ps->{branch} = git_branchname($ps->{id}); 536 537# 538# ensure we have a clean state 539# 540if(my$dirty=`git-diff-files`) { 541die"Unclean tree when about to process$ps->{id} ". 542" - did we fail to commit cleanly before?\n$dirty"; 543} 544die$!if$?; 545 546# 547# skip commits already in repo 548# 549if(ptag($ps->{id})) { 550$opt_v&&print" * Skipping already imported:$ps->{id}\n"; 551next; 552} 553 554print" * Starting to work on$ps->{id}\n"; 555 556 process_patchset($ps)ornext; 557 558# warn "errors when running git-update-index! $!"; 559my$tree=`git-write-tree`; 560die"cannot write tree$!"if$?; 561chomp$tree; 562 563# 564# Who's your daddy? 565# 566my@par; 567if( -e "$git_dir/refs/heads/$ps->{branch}") { 568if(open HEAD,"<","$git_dir/refs/heads/$ps->{branch}") { 569my$p= <HEAD>; 570close HEAD; 571chomp$p; 572push@par,'-p',$p; 573}else{ 574if($ps->{type}eq's') { 575warn"Could not find the right head for the branch$ps->{branch}"; 576} 577} 578} 579 580if($ps->{merges}) { 581push@par, find_parents($ps); 582} 583 584# 585# Commit, tag and clean state 586# 587$ENV{TZ} ='GMT'; 588$ENV{GIT_AUTHOR_NAME} =$ps->{author}; 589$ENV{GIT_AUTHOR_EMAIL} =$ps->{email}; 590$ENV{GIT_AUTHOR_DATE} =$ps->{date}; 591$ENV{GIT_COMMITTER_NAME} =$ps->{author}; 592$ENV{GIT_COMMITTER_EMAIL} =$ps->{email}; 593$ENV{GIT_COMMITTER_DATE} =$ps->{date}; 594 595my$pid= open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 596or die$!; 597print WRITER $ps->{summary},"\n\n"; 598 599# only print message if it's not empty, to avoid a spurious blank line; 600# also append an extra newline, so there's a blank line before the 601# following "git-archimport-id:" line. 602print WRITER $ps->{message},"\n\n"if($ps->{message}ne""); 603 604# make it easy to backtrack and figure out which Arch revision this was: 605print WRITER 'git-archimport-id: ',$ps->{id},"\n"; 606 607close WRITER; 608my$commitid= <READER>;# read 609chomp$commitid; 610close READER; 611waitpid$pid,0;# close; 612 613if(length$commitid!=40) { 614die"Something went wrong with the commit!$!$commitid"; 615} 616# 617# Update the branch 618# 619open HEAD,">","$git_dir/refs/heads/$ps->{branch}"; 620print HEAD $commitid; 621close HEAD; 622system('git-update-ref','HEAD',"$ps->{branch}"); 623 624# tag accordingly 625 ptag($ps->{id},$commitid);# private tag 626if($opt_T||$ps->{type}eq't'||$ps->{type}eq'i') { 627 tag($ps->{id},$commitid); 628} 629print" * Committed$ps->{id}\n"; 630print" + tree$tree\n"; 631print" + commit$commitid\n"; 632$opt_v&&print" + commit date is$ps->{date}\n"; 633$opt_v&&print" + parents: ",join(' ',@par),"\n"; 634} 635 636if($opt_v) { 637foreach(sort keys%stats) { 638print"$_:$stats{$_}\n"; 639} 640} 641exit0; 642 643# used by the accurate strategy: 644sub sync_to_ps { 645my$ps=shift; 646my$tree_dir=$tmp.'/'.tree_dirname($ps->{id}); 647 648$opt_v&&print"sync_to_ps($ps->{id}) method: "; 649 650if(-d $tree_dir) { 651if($ps->{type}eq't') { 652$opt_v&&print"get (tag)\n"; 653# looks like a tag-only or (worse,) a mixed tags/changeset branch, 654# can't rely on replay to work correctly on these 655 rmtree($tree_dir); 656 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 657$stats{get_tag}++; 658}else{ 659my$tree_id= arch_tree_id($tree_dir); 660if($ps->{parent_id} && ($ps->{parent_id}eq$tree_id)) { 661# the common case (hopefully) 662$opt_v&&print"replay\n"; 663 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id}); 664$stats{replay}++; 665}else{ 666# getting one tree is usually faster than getting two trees 667# and applying the delta ... 668 rmtree($tree_dir); 669$opt_v&&print"apply-delta\n"; 670 safe_pipe_capture($TLA,'get','--no-pristine', 671$ps->{id},$tree_dir); 672$stats{get_delta}++; 673} 674} 675}else{ 676# new branch work 677$opt_v&&print"get (new tree)\n"; 678 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 679$stats{get_new}++; 680} 681 682# added -I flag to rsync since we're going to fast! AIEEEEE!!!! 683system('rsync','-aI','--delete','--exclude',$git_dir, 684# '--exclude','.arch-inventory', 685'--exclude','.arch-ids','--exclude','{arch}', 686'--exclude','+*','--exclude',',*', 687"$tree_dir/",'./') ==0or die"Cannot rsync$tree_dir:$!$?"; 688return$tree_dir; 689} 690 691sub apply_import { 692my$ps=shift; 693my$bname= git_branchname($ps->{id}); 694 695 mkpath($tmp); 696 697 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import"); 698die"Cannot get import:$!"if$?; 699system('rsync','-aI','--delete','--exclude',$git_dir, 700'--exclude','.arch-ids','--exclude','{arch}', 701"$tmp/import/",'./'); 702die"Cannot rsync import:$!"if$?; 703 704 rmtree("$tmp/import"); 705die"Cannot remove tempdir:$!"if$?; 706 707 708return1; 709} 710 711sub apply_cset { 712my$ps=shift; 713 714 mkpath($tmp); 715 716# get the changeset 717 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset"); 718die"Cannot get changeset:$!"if$?; 719 720# apply patches 721if(`find$tmp/changeset/patches-type f -name '*.patch'`) { 722# this can be sped up considerably by doing 723# (find | xargs cat) | patch 724# but that can get mucked up by patches 725# with missing trailing newlines or the standard 726# 'missing newline' flag in the patch - possibly 727# produced with an old/buggy diff. 728# slow and safe, we invoke patch once per patchfile 729`find$tmp/changeset/patches-type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`; 730die"Problem applying patches!$!"if$?; 731} 732 733# apply changed binary files 734if(my@modified=`find$tmp/changeset/patches-type f -name '*.modified'`) { 735foreachmy$mod(@modified) { 736chomp$mod; 737my$orig=$mod; 738$orig=~s/\.modified$//;# lazy 739$orig=~s!^\Q$tmp\E/changeset/patches/!!; 740#print "rsync -p '$mod' '$orig'"; 741system('rsync','-p',$mod,"./$orig"); 742die"Problem applying binary changes!$!"if$?; 743} 744} 745 746# bring in new files 747system('rsync','-aI','--exclude',$git_dir, 748'--exclude','.arch-ids', 749'--exclude','{arch}', 750"$tmp/changeset/new-files-archive/",'./'); 751 752# deleted files are hinted from the commitlog processing 753 754 rmtree("$tmp/changeset"); 755} 756 757 758# =for reference 759# notes: *-files/-directories keys cannot have spaces, they're always 760# pika-escaped. Everything after the first newline 761# A log entry looks like: 762# Revision: moodle-org--moodle--1.3.3--patch-15 763# Archive: arch-eduforge@catalyst.net.nz--2004 764# Creator: Penny Leach <penny@catalyst.net.nz> 765# Date: Wed May 25 14:15:34 NZST 2005 766# Standard-date: 2005-05-25 02:15:34 GMT 767# New-files: lang/de/.arch-ids/block_glossary_random.php.id 768# lang/de/.arch-ids/block_html.php.id 769# New-directories: lang/de/help/questionnaire 770# lang/de/help/questionnaire/.arch-ids 771# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id 772# db_sears.sql db/db_sears.sql 773# Removed-files: lang/be/docs/.arch-ids/release.html.id 774# lang/be/docs/.arch-ids/releaseold.html.id 775# Modified-files: admin/cron.php admin/delete.php 776# admin/editor.html backup/lib.php backup/restore.php 777# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15 778# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+) 779# summary can be multiline with a leading space just like the above fields 780# Keywords: 781# 782# Updating yadda tadda tadda madda 783sub parselog { 784my($ps,$log) =@_; 785my$key=undef; 786 787# headers we want that contain filenames: 788my%want_headers= ( 789 new_files =>1, 790 modified_files =>1, 791 renamed_files =>1, 792 renamed_directories =>1, 793 removed_files =>1, 794 removed_directories =>1, 795); 796 797chomp(@$log); 798while($_=shift@$log) { 799if(/^Continuation-of:\s*(.*)/) { 800$ps->{tag} =$1; 801$key=undef; 802}elsif(/^Summary:\s*(.*)$/) { 803# summary can be multiline as long as it has a leading space. 804# we squeeze it onto a single line, though. 805$ps->{summary} = [$1]; 806$key='summary'; 807}elsif(/^Creator: (.*)\s*<([^\>]+)>/) { 808$ps->{author} =$1; 809$ps->{email} =$2; 810$key=undef; 811# any *-files or *-directories can be read here: 812}elsif(/^([A-Z][a-z\-]+):\s*(.*)$/) { 813my$val=$2; 814$key=lc$1; 815$key=~tr/-/_/;# too lazy to quote :P 816if($want_headers{$key}) { 817push@{$ps->{$key}},split(/\s+/,$val); 818}else{ 819$key=undef; 820} 821}elsif(/^$/) { 822last;# remainder of @$log that didn't get shifted off is message 823}elsif($key) { 824if(/^\s+(.*)$/) { 825if($keyeq'summary') { 826push@{$ps->{$key}},$1; 827}else{# files/directories: 828push@{$ps->{$key}},split(/\s+/,$1); 829} 830}else{ 831$key=undef; 832} 833} 834} 835 836# drop leading empty lines from the log message 837while(@$log&&$log->[0]eq'') { 838shift@$log; 839} 840if(exists$ps->{summary} && @{$ps->{summary}}) { 841$ps->{summary} =join(' ', @{$ps->{summary}}); 842} 843elsif(@$log==0) { 844$ps->{summary} ='empty commit message'; 845}else{ 846$ps->{summary} =$log->[0] .'...'; 847} 848$ps->{message} =join("\n",@$log); 849 850# skip Arch control files, unescape pika-escaped files 851foreachmy$k(keys%want_headers) { 852next unless(defined$ps->{$k}); 853my@tmp= (); 854foreachmy$t(@{$ps->{$k}}) { 855next unlesslength($t); 856next if$t=~m!\{arch\}/!; 857next if$t=~m!\.arch-ids/!; 858# should we skip this? 859next if$t=~m!\.arch-inventory$!; 860# tla cat-archive-log will give us filenames with spaces as file\(sp)name - why? 861# we can assume that any filename with \ indicates some pika escaping that we want to get rid of. 862if($t=~/\\/){ 863$t= (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0]; 864} 865push@tmp,$t; 866} 867$ps->{$k} = \@tmp; 868} 869} 870 871# write/read a tag 872sub tag { 873my($tag,$commit) =@_; 874 875if($opt_o) { 876$tag=~ s|/|--|g; 877}else{ 878my$patchname=$tag; 879$patchname=~s/.*--//; 880$tag= git_branchname ($tag) .'--'.$patchname; 881} 882 883if($commit) { 884open(C,">","$git_dir/refs/tags/$tag") 885or die"Cannot create tag$tag:$!\n"; 886print C "$commit\n" 887or die"Cannot write tag$tag:$!\n"; 888close(C) 889or die"Cannot write tag$tag:$!\n"; 890print" * Created tag '$tag' on '$commit'\n"if$opt_v; 891}else{# read 892open(C,"<","$git_dir/refs/tags/$tag") 893or die"Cannot read tag$tag:$!\n"; 894$commit= <C>; 895chomp$commit; 896die"Error reading tag$tag:$!\n"unlesslength$commit==40; 897close(C) 898or die"Cannot read tag$tag:$!\n"; 899return$commit; 900} 901} 902 903# write/read a private tag 904# reads fail softly if the tag isn't there 905sub ptag { 906my($tag,$commit) =@_; 907 908# don't use subdirs for tags yet, it could screw up other porcelains 909$tag=~ s|/|,|g; 910 911my$tag_file="$ptag_dir/$tag"; 912my$tag_branch_dir= dirname($tag_file); 913 mkpath($tag_branch_dir)unless(-d $tag_branch_dir); 914 915if($commit) {# write 916open(C,">",$tag_file) 917or die"Cannot create tag$tag:$!\n"; 918print C "$commit\n" 919or die"Cannot write tag$tag:$!\n"; 920close(C) 921or die"Cannot write tag$tag:$!\n"; 922$rptags{$commit} =$tag 923unless$tag=~m/--base-0$/; 924}else{# read 925# if the tag isn't there, return 0 926unless( -s $tag_file) { 927return0; 928} 929open(C,"<",$tag_file) 930or die"Cannot read tag$tag:$!\n"; 931$commit= <C>; 932chomp$commit; 933die"Error reading tag$tag:$!\n"unlesslength$commit==40; 934close(C) 935or die"Cannot read tag$tag:$!\n"; 936unless(defined$rptags{$commit}) { 937$rptags{$commit} =$tag; 938} 939return$commit; 940} 941} 942 943sub find_parents { 944# 945# Identify what branches are merging into me 946# and whether we are fully merged 947# git-merge-base <headsha> <headsha> should tell 948# me what the base of the merge should be 949# 950my$ps=shift; 951 952my%branches;# holds an arrayref per branch 953# the arrayref contains a list of 954# merged patches between the base 955# of the merge and the current head 956 957my@parents;# parents found for this commit 958 959# simple loop to split the merges 960# per branch 961foreachmy$merge(@{$ps->{merges}}) { 962my$branch= git_branchname($merge); 963unless(defined$branches{$branch} ){ 964$branches{$branch} = []; 965} 966push@{$branches{$branch}},$merge; 967} 968 969# 970# foreach branch find a merge base and walk it to the 971# head where we are, collecting the merged patchsets that 972# Arch has recorded. Keep that in @have 973# Compare that with the commits on the other branch 974# between merge-base and the tip of the branch (@need) 975# and see if we have a series of consecutive patches 976# starting from the merge base. The tip of the series 977# of consecutive patches merged is our new parent for 978# that branch. 979# 980foreachmy$branch(keys%branches) { 981 982# check that we actually know about the branch 983next unless-e "$git_dir/refs/heads/$branch"; 984 985my$mergebase=`git-merge-base$branch$ps->{branch}`; 986if($?) { 987# Don't die here, Arch supports one-way cherry-picking 988# between branches with no common base (or any relationship 989# at all beforehand) 990warn"Cannot find merge base for$branchand$ps->{branch}"; 991next; 992} 993chomp$mergebase; 994 995# now walk up to the mergepoint collecting what patches we have 996my$branchtip= git_rev_parse($ps->{branch}); 997my@ancestors=`git-rev-list --topo-order$branchtip^$mergebase`; 998 my%have; # collected merges this branch has 999 foreach my$merge(@{$ps->{merges}}) {1000$have{$merge} = 1;1001 }1002 my%ancestorshave;1003 foreach my$par(@ancestors) {1004$par= commitid2pset($par);1005 if (defined$par->{merges}) {1006 foreach my$merge(@{$par->{merges}}) {1007$ancestorshave{$merge}=1;1008 }1009 }1010 }1011 # print "++++ Merges in$ps->{id} are....\n";1012 # my@have= sort keys%have; print Dumper(\@have);10131014 # merge what we have with what ancestors have1015%have= (%have,%ancestorshave);10161017 # see what the remote branch has - these are the merges we1018 # will want to have in a consecutive series from the mergebase1019 my$otherbranchtip= git_rev_parse($branch);1020 my@needraw= `git-rev-list --topo-order $otherbranchtip^$mergebase`;1021my@need;1022foreachmy$needps(@needraw) {# get the psets1023$needps= commitid2pset($needps);1024# git-rev-list will also1025# list commits merged in via earlier1026# merges. we are only interested in commits1027# from the branch we're looking at1028if($brancheq$needps->{branch}) {1029push@need,$needps->{id};1030}1031}10321033# print "++++ Merges from $branch we want are....\n";1034# print Dumper(\@need);10351036my$newparent;1037while(my$needed_commit=pop@need) {1038if($have{$needed_commit}) {1039$newparent=$needed_commit;1040}else{1041last;# break out of the while1042}1043}1044if($newparent) {1045push@parents,$newparent;1046}104710481049}# end foreach branch10501051# prune redundant parents1052my%parents;1053foreachmy$p(@parents) {1054$parents{$p} =1;1055}1056foreachmy$p(@parents) {1057next unlessexists$psets{$p}{merges};1058next unlessref$psets{$p}{merges};1059my@merges= @{$psets{$p}{merges}};1060foreachmy$merge(@merges) {1061if($parents{$merge}) {1062delete$parents{$merge};1063}1064}1065}10661067@parents= ();1068foreach(keys%parents) {1069push@parents,'-p', ptag($_);1070}1071return@parents;1072}10731074sub git_rev_parse {1075my$name=shift;1076my$val=`git-rev-parse$name`;1077 die "Error: git-rev-parse$name" if$?;1078 chomp$val;1079 return$val;1080}10811082# resolve a SHA1 to a known patchset1083sub commitid2pset {1084 my$commitid= shift;1085 chomp$commitid;1086 my$name=$rptags{$commitid}1087 || die "Cannot find reverse tag mapping for$commitid";1088$name=~ s|,|/|;1089 my$ps=$psets{$name}1090 || (print Dumper(sort keys%psets)) && die "Cannot find patchset for$name";1091 return$ps;1092}109310941095# an alternative to `command` that allows input to be passed as an array1096# to work around shell problems with weird characters in arguments1097sub safe_pipe_capture {1098 my@output;1099 if (my$pid= open my$child, '-|') {1100@output= (<$child>);1101 close$childor die join(' ',@_).":$!$?";1102 } else {1103 exec(@_) or die "$!$?"; # exec() can fail the executable can't be found1104 }1105 return wantarray ?@output: join('',@output);1106}11071108# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`1109sub arch_tree_id {1110 my$dir= shift;1111 chomp( my$ret= (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );1112 return$ret;1113}11141115sub archive_reachable {1116 my$archive= shift;1117 return 1 if$reachable{$archive};1118 return 0 if$unreachable{$archive};11191120 if (system "$TLAwhereis-archive$archive>/dev/null") {1121 if ($opt_a&& (system($TLA,'register-archive',1122 "http://mirrors.sourcecontrol.net/$archive") == 0)) {1123$reachable{$archive} = 1;1124 return 1;1125 }1126 print STDERR "Archive is unreachable:$archive\n";1127$unreachable{$archive} = 1;1128 return 0;1129 } else {1130$reachable{$archive} = 1;1131 return 1;1132 }1133}