1#!/usr/bin/perl 2# 3# This tool is copyright (c) 2005, Martin Langhoff. 4# It is released under the Gnu Public License, version 2. 5# 6# The basic idea is to walk the output of tla abrowse, 7# fetch the changesets and apply them. 8# 9 10=head1 Invocation 11 12 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] 13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ] 14 15Imports a project from one or more Arch repositories. It will follow branches 16and repositories within the namespaces defined by the <archive/branch> 17parameters supplied. If it cannot find the remote branch a merge comes from 18it will just import it as a regular commit. If it can find it, it will mark it 19as a merge whenever possible. 20 21See man (1) git-archimport for more details. 22 23=head1 TODO 24 25 - create tag objects instead of ref tags 26 - audit shell-escaping of filenames 27 - hide our private tags somewhere smarter 28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines 29 - sort and apply patches by graphing ancestry relations instead of just 30 relying in dates supplied in the changeset itself. 31 tla ancestry-graph -m could be helpful here... 32 33=head1 Devel tricks 34 35Add print in front of the shell commands invoked via backticks. 36 37=head1 Devel Notes 38 39There are several places where Arch and git terminology are intermixed 40and potentially confused. 41 42The notion of a "branch" in git is approximately equivalent to 43a "archive/category--branch--version" in Arch. Also, it should be noted 44that the "--branch" portion of "archive/category--branch--version" is really 45optional in Arch although not many people (nor tools!) seem to know this. 46This means that "archive/category--version" is also a valid "branch" 47in git terms. 48 49We always refer to Arch names by their fully qualified variant (which 50means the "archive" name is prefixed. 51 52For people unfamiliar with Arch, an "archive" is the term for "repository", 53and can contain multiple, unrelated branches. 54 55=cut 56 57use5.008; 58use strict; 59use warnings; 60use Getopt::Std; 61use File::Temp qw(tempdir); 62use File::Path qw(mkpath rmtree); 63use File::Basename qw(basename dirname); 64use Data::Dumper qw/ Dumper /; 65use IPC::Open2; 66 67$SIG{'PIPE'}="IGNORE"; 68$ENV{'TZ'}="UTC"; 69 70my$git_dir=$ENV{"GIT_DIR"} ||".git"; 71$ENV{"GIT_DIR"} =$git_dir; 72my$ptag_dir="$git_dir/archimport/tags"; 73 74our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o); 75 76sub usage() { 77print STDERR <<END; 78usage: git archimport # fetch/update GIT from Arch 79 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ] 80 repository/arch-branch [ repository/arch-branch] ... 81END 82exit(1); 83} 84 85getopts("fThvat:D:")or usage(); 86usage if$opt_h; 87 88@ARGV>=1or usage(); 89# $arch_branches: 90# values associated with keys: 91# =1 - Arch version / git 'branch' detected via abrowse on a limit 92# >1 - Arch version / git 'branch' of an auxiliary branch we've merged 93my%arch_branches=map{my$branch=$_;$branch=~s/:[^:]*$//;$branch=>1}@ARGV; 94 95# $branch_name_map: 96# maps arch branches to git branch names 97my%branch_name_map=map{m/^(.*):([^:]*)$/;$1=>$2}grep{m/:/}@ARGV; 98 99$ENV{'TMPDIR'} =$opt_tif$opt_t;# $ENV{TMPDIR} will affect tempdir() calls: 100my$tmp= tempdir('git-archimport-XXXXXX', TMPDIR =>1, CLEANUP =>1); 101$opt_v&&print"+ Using$tmpas temporary directory\n"; 102 103unless(-d $git_dir) {# initial import needs empty directory 104opendir DIR,'.'or die"Unable to open current directory:$!\n"; 105while(my$entry=readdir DIR) { 106$entry=~/^\.\.?$/or 107die"Initial import needs an empty current working directory.\n" 108} 109closedir DIR 110} 111 112my$default_archive;# default Arch archive 113my%reachable= ();# Arch repositories we can access 114my%unreachable= ();# Arch repositories we can't access :< 115my@psets= ();# the collection 116my%psets= ();# the collection, by name 117my%stats= (# Track which strategy we used to import: 118 get_tag =>0, replay =>0, get_new =>0, get_delta =>0, 119 simple_changeset =>0, import_or_tag =>0 120); 121 122my%rptags= ();# my reverse private tags 123# to map a SHA1 to a commitid 124my$TLA=$ENV{'ARCH_CLIENT'} ||'tla'; 125 126sub do_abrowse { 127my$stage=shift; 128while(my($limit,$level) =each%arch_branches) { 129next unless$level==$stage; 130 131open ABROWSE,"$TLAabrowse -fkD --merges$limit|" 132or die"Problems with tla abrowse:$!"; 133 134my%ps= ();# the current one 135my$lastseen=''; 136 137while(<ABROWSE>) { 138chomp; 139 140# first record padded w 8 spaces 141if(s/^\s{8}\b//) { 142my($id,$type) =split(m/\s+/,$_,2); 143 144my%last_ps; 145# store the record we just captured 146if(%ps&& !exists$psets{$ps{id} }) { 147%last_ps=%ps;# break references 148push(@psets, \%last_ps); 149$psets{$last_ps{id} } = \%last_ps; 150} 151 152my$branch= extract_versionname($id); 153%ps= ( id =>$id, branch =>$branch); 154if(%last_ps&& ($last_ps{branch}eq$branch)) { 155$ps{parent_id} =$last_ps{id}; 156} 157 158$arch_branches{$branch} =1; 159$lastseen='id'; 160 161# deal with types (should work with baz or tla): 162if($type=~m/\(.*changeset\)/) { 163$ps{type} ='s'; 164}elsif($type=~/\(.*import\)/) { 165$ps{type} ='i'; 166}elsif($type=~m/\(tag.*?(\S+\@\S+).*?\)/) { 167$ps{type} ='t'; 168# read which revision we've tagged when we parse the log 169$ps{tag} =$1; 170}else{ 171warn"Unknown type$type"; 172} 173 174$arch_branches{$branch} =1; 175$lastseen='id'; 176}elsif(s/^\s{10}//) { 177# 10 leading spaces or more 178# indicate commit metadata 179 180# date 181if($lastseeneq'id'&&m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){ 182$ps{date} =$1; 183$lastseen='date'; 184}elsif($_eq'merges in:') { 185$ps{merges} = []; 186$lastseen='merges'; 187}elsif($lastseeneq'merges'&&s/^\s{2}//) { 188my$id=$_; 189push(@{$ps{merges}},$id); 190 191# aggressive branch finding: 192if($opt_D) { 193my$branch= extract_versionname($id); 194my$repo= extract_reponame($branch); 195 196if(archive_reachable($repo) && 197!defined$arch_branches{$branch}) { 198$arch_branches{$branch} =$stage+1; 199} 200} 201}else{ 202warn"more metadata after merges!?:$_\n"unless/^\s*$/; 203} 204} 205} 206 207if(%ps&& !exists$psets{$ps{id} }) { 208my%temp=%ps;# break references 209if(@psets&&$psets[$#psets]{branch}eq$ps{branch}) { 210$temp{parent_id} =$psets[$#psets]{id}; 211} 212push(@psets, \%temp); 213$psets{$temp{id} } = \%temp; 214} 215 216close ABROWSE or die"$TLAabrowse failed on$limit\n"; 217} 218}# end foreach $root 219 220do_abrowse(1); 221my$depth=2; 222$opt_D||=0; 223while($depth<=$opt_D) { 224 do_abrowse($depth); 225$depth++; 226} 227 228## Order patches by time 229# FIXME see if we can find a more optimal way to do this by graphing 230# the ancestry data and walking it, that way we won't have to rely on 231# client-supplied dates 232@psets=sort{$a->{date}.$b->{id}cmp$b->{date}.$b->{id}}@psets; 233 234#print Dumper \@psets; 235 236## 237## TODO cleanup irrelevant patches 238## and put an initial import 239## or a full tag 240my$import=0; 241unless(-d $git_dir) {# initial import 242if($psets[0]{type}eq'i'||$psets[0]{type}eq't') { 243print"Starting import from$psets[0]{id}\n"; 244`git-init`; 245die$!if$?; 246$import=1; 247}else{ 248die"Need to start from an import or a tag -- cannot use$psets[0]{id}"; 249} 250}else{# progressing an import 251# load the rptags 252opendir(DIR,$ptag_dir) 253||die"can't opendir:$!"; 254while(my$file=readdir(DIR)) { 255# skip non-interesting-files 256next unless-f "$ptag_dir/$file"; 257 258# convert first '--' to '/' from old git-archimport to use 259# as an archivename/c--b--v private tag 260if($file!~m!,!) { 261my$oldfile=$file; 262$file=~s!--!,!; 263print STDERR "converting old tag$oldfileto$file\n"; 264rename("$ptag_dir/$oldfile","$ptag_dir/$file")or die$!; 265} 266my$sha= ptag($file); 267chomp$sha; 268$rptags{$sha} =$file; 269} 270closedir DIR; 271} 272 273# process patchsets 274# extract the Arch repository name (Arch "archive" in Arch-speak) 275sub extract_reponame { 276my$fq_cvbr=shift;# archivename/[[[[category]branch]version]revision] 277return(split(/\//,$fq_cvbr))[0]; 278} 279 280sub extract_versionname { 281my$name=shift; 282$name=~s/--(?:patch|version(?:fix)?|base)-\d+$//; 283return$name; 284} 285 286# convert a fully-qualified revision or version to a unique dirname: 287# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 288# becomes: normalperson@yhbt.net-05,mpd--uclinux--1 289# 290# the git notion of a branch is closer to 291# archive/category--branch--version than archive/category--branch, so we 292# use this to convert to git branch names. 293# Also, keep archive names but replace '/' with ',' since it won't require 294# subdirectories, and is safer than swapping '--' which could confuse 295# reverse-mapping when dealing with bastard branches that 296# are just archive/category--version (no --branch) 297sub tree_dirname { 298my$revision=shift; 299my$name= extract_versionname($revision); 300$name=~ s#/#,#; 301return$name; 302} 303 304# old versions of git-archimport just use the <category--branch> part: 305sub old_style_branchname { 306my$id=shift; 307my$ret= safe_pipe_capture($TLA,'parse-package-name','-p',$id); 308chomp$ret; 309return$ret; 310} 311 312*git_default_branchname =$opt_o? *old_style_branchname : *tree_dirname; 313 314# retrieve default archive, since $branch_name_map keys might not include it 315sub get_default_archive { 316if(!defined$default_archive) { 317$default_archive= safe_pipe_capture($TLA,'my-default-archive'); 318chomp$default_archive; 319} 320return$default_archive; 321} 322 323sub git_branchname { 324my$revision=shift; 325my$name= extract_versionname($revision); 326 327if(exists$branch_name_map{$name}) { 328return$branch_name_map{$name}; 329 330}elsif($name=~ m#^([^/]*)/(.*)$# 331&&$1eq get_default_archive() 332&&exists$branch_name_map{$2}) { 333# the names given in the command-line lacked the archive. 334return$branch_name_map{$2}; 335 336}else{ 337return git_default_branchname($revision); 338} 339} 340 341sub process_patchset_accurate { 342my$ps=shift; 343 344# switch to that branch if we're not already in that branch: 345if(-e "$git_dir/refs/heads/$ps->{branch}") { 346system('git-checkout','-f',$ps->{branch}) ==0or die"$!$?\n"; 347 348# remove any old stuff that got leftover: 349my$rm= safe_pipe_capture('git-ls-files','--others','-z'); 350 rmtree(split(/\0/,$rm))if$rm; 351} 352 353# Apply the import/changeset/merge into the working tree 354my$dir= sync_to_ps($ps); 355# read the new log entry: 356my@commitlog= safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id}); 357die"Error in cat-log:$!"if$?; 358chomp@commitlog; 359 360# grab variables we want from the log, new fields get added to $ps: 361# (author, date, email, summary, message body ...) 362 parselog($ps, \@commitlog); 363 364if($ps->{id} =~/--base-0$/&&$ps->{id}ne$psets[0]{id}) { 365# this should work when importing continuations 366if($ps->{tag} && (my$branchpoint=eval{ ptag($ps->{tag}) })) { 367 368# find where we are supposed to branch from 369if(! -e "$git_dir/refs/heads/$ps->{branch}") { 370system('git-branch',$ps->{branch},$branchpoint) ==0or die"$!$?\n"; 371 372# We trust Arch with the fact that this is just a tag, 373# and it does not affect the state of the tree, so 374# we just tag and move on. If the user really wants us 375# to consolidate more branches into one, don't tag because 376# the tag name would be already taken. 377 tag($ps->{id},$branchpoint); 378 ptag($ps->{id},$branchpoint); 379print" * Tagged$ps->{id} at$branchpoint\n"; 380} 381system('git-checkout','-f',$ps->{branch}) ==0or die"$!$?\n"; 382 383# remove any old stuff that got leftover: 384my$rm= safe_pipe_capture('git-ls-files','--others','-z'); 385 rmtree(split(/\0/,$rm))if$rm; 386return0; 387}else{ 388warn"Tagging from unknown id unsupported\n"if$ps->{tag}; 389} 390# allow multiple bases/imports here since Arch supports cherry-picks 391# from unrelated trees 392} 393 394# update the index with all the changes we got 395system('git-diff-files --name-only -z | '. 396'git-update-index --remove -z --stdin') ==0or die"$!$?\n"; 397system('git-ls-files --others -z | '. 398'git-update-index --add -z --stdin') ==0or die"$!$?\n"; 399return1; 400} 401 402# the native changeset processing strategy. This is very fast, but 403# does not handle permissions or any renames involving directories 404sub process_patchset_fast { 405my$ps=shift; 406# 407# create the branch if needed 408# 409if($ps->{type}eq'i'&& !$import) { 410die"Should not have more than one 'Initial import' per GIT import:$ps->{id}"; 411} 412 413unless($import) {# skip for import 414if( -e "$git_dir/refs/heads/$ps->{branch}") { 415# we know about this branch 416system('git-checkout',$ps->{branch}); 417}else{ 418# new branch! we need to verify a few things 419die"Branch on a non-tag!"unless$ps->{type}eq't'; 420my$branchpoint= ptag($ps->{tag}); 421die"Tagging from unknown id unsupported:$ps->{tag}" 422unless$branchpoint; 423 424# find where we are supposed to branch from 425if(! -e "$git_dir/refs/heads/$ps->{branch}") { 426system('git-branch',$ps->{branch},$branchpoint) ==0or die"$!$?\n"; 427 428# We trust Arch with the fact that this is just a tag, 429# and it does not affect the state of the tree, so 430# we just tag and move on. If the user really wants us 431# to consolidate more branches into one, don't tag because 432# the tag name would be already taken. 433 tag($ps->{id},$branchpoint); 434 ptag($ps->{id},$branchpoint); 435print" * Tagged$ps->{id} at$branchpoint\n"; 436} 437system('git-checkout',$ps->{branch}) ==0or die"$!$?\n"; 438return0; 439} 440die$!if$?; 441} 442 443# 444# Apply the import/changeset/merge into the working tree 445# 446if($ps->{type}eq'i'||$ps->{type}eq't') { 447 apply_import($ps)or die$!; 448$stats{import_or_tag}++; 449$import=0; 450}elsif($ps->{type}eq's') { 451 apply_cset($ps); 452$stats{simple_changeset}++; 453} 454 455# 456# prepare update git's index, based on what arch knows 457# about the pset, resolve parents, etc 458# 459 460my@commitlog= safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 461die"Error in cat-archive-log:$!"if$?; 462 463 parselog($ps,\@commitlog); 464 465# imports don't give us good info 466# on added files. Shame on them 467if($ps->{type}eq'i'||$ps->{type}eq't') { 468system('git-ls-files --deleted -z | '. 469'git-update-index --remove -z --stdin') ==0or die"$!$?\n"; 470system('git-ls-files --others -z | '. 471'git-update-index --add -z --stdin') ==0or die"$!$?\n"; 472} 473 474# TODO: handle removed_directories and renamed_directories: 475 476if(my$del=$ps->{removed_files}) { 477unlink@$del; 478while(@$del) { 479my@slice=splice(@$del,0,100); 480system('git-update-index','--remove','--',@slice) ==0or 481die"Error in git-update-index --remove:$!$?\n"; 482} 483} 484 485if(my$ren=$ps->{renamed_files}) {# renamed 486if(@$ren%2) { 487die"Odd number of entries in rename!?"; 488} 489 490while(@$ren) { 491my$from=shift@$ren; 492my$to=shift@$ren; 493 494unless(-d dirname($to)) { 495 mkpath(dirname($to));# will die on err 496} 497# print "moving $from $to"; 498rename($from,$to)or die"Error renaming '$from' '$to':$!\n"; 499system('git-update-index','--remove','--',$from) ==0or 500die"Error in git-update-index --remove:$!$?\n"; 501system('git-update-index','--add','--',$to) ==0or 502die"Error in git-update-index --add:$!$?\n"; 503} 504} 505 506if(my$add=$ps->{new_files}) { 507while(@$add) { 508my@slice=splice(@$add,0,100); 509system('git-update-index','--add','--',@slice) ==0or 510die"Error in git-update-index --add:$!$?\n"; 511} 512} 513 514if(my$mod=$ps->{modified_files}) { 515while(@$mod) { 516my@slice=splice(@$mod,0,100); 517system('git-update-index','--',@slice) ==0or 518die"Error in git-update-index:$!$?\n"; 519} 520} 521return1;# we successfully applied the changeset 522} 523 524if($opt_f) { 525print"Will import patchsets using the fast strategy\n", 526"Renamed directories and permission changes will be missed\n"; 527*process_patchset = *process_patchset_fast; 528}else{ 529print"Using the default (accurate) import strategy.\n", 530"Things may be a bit slow\n"; 531*process_patchset = *process_patchset_accurate; 532} 533 534foreachmy$ps(@psets) { 535# process patchsets 536$ps->{branch} = git_branchname($ps->{id}); 537 538# 539# ensure we have a clean state 540# 541if(my$dirty=`git-diff-files`) { 542die"Unclean tree when about to process$ps->{id} ". 543" - did we fail to commit cleanly before?\n$dirty"; 544} 545die$!if$?; 546 547# 548# skip commits already in repo 549# 550if(ptag($ps->{id})) { 551$opt_v&&print" * Skipping already imported:$ps->{id}\n"; 552next; 553} 554 555print" * Starting to work on$ps->{id}\n"; 556 557 process_patchset($ps)ornext; 558 559# warn "errors when running git-update-index! $!"; 560my$tree=`git-write-tree`; 561die"cannot write tree$!"if$?; 562chomp$tree; 563 564# 565# Who's your daddy? 566# 567my@par; 568if( -e "$git_dir/refs/heads/$ps->{branch}") { 569if(open HEAD,"<","$git_dir/refs/heads/$ps->{branch}") { 570my$p= <HEAD>; 571close HEAD; 572chomp$p; 573push@par,'-p',$p; 574}else{ 575if($ps->{type}eq's') { 576warn"Could not find the right head for the branch$ps->{branch}"; 577} 578} 579} 580 581if($ps->{merges}) { 582push@par, find_parents($ps); 583} 584 585# 586# Commit, tag and clean state 587# 588$ENV{TZ} ='GMT'; 589$ENV{GIT_AUTHOR_NAME} =$ps->{author}; 590$ENV{GIT_AUTHOR_EMAIL} =$ps->{email}; 591$ENV{GIT_AUTHOR_DATE} =$ps->{date}; 592$ENV{GIT_COMMITTER_NAME} =$ps->{author}; 593$ENV{GIT_COMMITTER_EMAIL} =$ps->{email}; 594$ENV{GIT_COMMITTER_DATE} =$ps->{date}; 595 596my$pid= open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 597or die$!; 598print WRITER $ps->{summary},"\n\n"; 599 600# only print message if it's not empty, to avoid a spurious blank line; 601# also append an extra newline, so there's a blank line before the 602# following "git-archimport-id:" line. 603print WRITER $ps->{message},"\n\n"if($ps->{message}ne""); 604 605# make it easy to backtrack and figure out which Arch revision this was: 606print WRITER 'git-archimport-id: ',$ps->{id},"\n"; 607 608close WRITER; 609my$commitid= <READER>;# read 610chomp$commitid; 611close READER; 612waitpid$pid,0;# close; 613 614if(length$commitid!=40) { 615die"Something went wrong with the commit!$!$commitid"; 616} 617# 618# Update the branch 619# 620open HEAD,">","$git_dir/refs/heads/$ps->{branch}"; 621print HEAD $commitid; 622close HEAD; 623system('git-update-ref','HEAD',"$ps->{branch}"); 624 625# tag accordingly 626 ptag($ps->{id},$commitid);# private tag 627if($opt_T||$ps->{type}eq't'||$ps->{type}eq'i') { 628 tag($ps->{id},$commitid); 629} 630print" * Committed$ps->{id}\n"; 631print" + tree$tree\n"; 632print" + commit$commitid\n"; 633$opt_v&&print" + commit date is$ps->{date}\n"; 634$opt_v&&print" + parents: ",join(' ',@par),"\n"; 635} 636 637if($opt_v) { 638foreach(sort keys%stats) { 639print"$_:$stats{$_}\n"; 640} 641} 642exit0; 643 644# used by the accurate strategy: 645sub sync_to_ps { 646my$ps=shift; 647my$tree_dir=$tmp.'/'.tree_dirname($ps->{id}); 648 649$opt_v&&print"sync_to_ps($ps->{id}) method: "; 650 651if(-d $tree_dir) { 652if($ps->{type}eq't') { 653$opt_v&&print"get (tag)\n"; 654# looks like a tag-only or (worse,) a mixed tags/changeset branch, 655# can't rely on replay to work correctly on these 656 rmtree($tree_dir); 657 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 658$stats{get_tag}++; 659}else{ 660my$tree_id= arch_tree_id($tree_dir); 661if($ps->{parent_id} && ($ps->{parent_id}eq$tree_id)) { 662# the common case (hopefully) 663$opt_v&&print"replay\n"; 664 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id}); 665$stats{replay}++; 666}else{ 667# getting one tree is usually faster than getting two trees 668# and applying the delta ... 669 rmtree($tree_dir); 670$opt_v&&print"apply-delta\n"; 671 safe_pipe_capture($TLA,'get','--no-pristine', 672$ps->{id},$tree_dir); 673$stats{get_delta}++; 674} 675} 676}else{ 677# new branch work 678$opt_v&&print"get (new tree)\n"; 679 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 680$stats{get_new}++; 681} 682 683# added -I flag to rsync since we're going to fast! AIEEEEE!!!! 684system('rsync','-aI','--delete','--exclude',$git_dir, 685# '--exclude','.arch-inventory', 686'--exclude','.arch-ids','--exclude','{arch}', 687'--exclude','+*','--exclude',',*', 688"$tree_dir/",'./') ==0or die"Cannot rsync$tree_dir:$!$?"; 689return$tree_dir; 690} 691 692sub apply_import { 693my$ps=shift; 694my$bname= git_branchname($ps->{id}); 695 696 mkpath($tmp); 697 698 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import"); 699die"Cannot get import:$!"if$?; 700system('rsync','-aI','--delete','--exclude',$git_dir, 701'--exclude','.arch-ids','--exclude','{arch}', 702"$tmp/import/",'./'); 703die"Cannot rsync import:$!"if$?; 704 705 rmtree("$tmp/import"); 706die"Cannot remove tempdir:$!"if$?; 707 708 709return1; 710} 711 712sub apply_cset { 713my$ps=shift; 714 715 mkpath($tmp); 716 717# get the changeset 718 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset"); 719die"Cannot get changeset:$!"if$?; 720 721# apply patches 722if(`find$tmp/changeset/patches-type f -name '*.patch'`) { 723# this can be sped up considerably by doing 724# (find | xargs cat) | patch 725# but that can get mucked up by patches 726# with missing trailing newlines or the standard 727# 'missing newline' flag in the patch - possibly 728# produced with an old/buggy diff. 729# slow and safe, we invoke patch once per patchfile 730`find$tmp/changeset/patches-type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`; 731die"Problem applying patches!$!"if$?; 732} 733 734# apply changed binary files 735if(my@modified=`find$tmp/changeset/patches-type f -name '*.modified'`) { 736foreachmy$mod(@modified) { 737chomp$mod; 738my$orig=$mod; 739$orig=~s/\.modified$//;# lazy 740$orig=~s!^\Q$tmp\E/changeset/patches/!!; 741#print "rsync -p '$mod' '$orig'"; 742system('rsync','-p',$mod,"./$orig"); 743die"Problem applying binary changes!$!"if$?; 744} 745} 746 747# bring in new files 748system('rsync','-aI','--exclude',$git_dir, 749'--exclude','.arch-ids', 750'--exclude','{arch}', 751"$tmp/changeset/new-files-archive/",'./'); 752 753# deleted files are hinted from the commitlog processing 754 755 rmtree("$tmp/changeset"); 756} 757 758 759# =for reference 760# notes: *-files/-directories keys cannot have spaces, they're always 761# pika-escaped. Everything after the first newline 762# A log entry looks like: 763# Revision: moodle-org--moodle--1.3.3--patch-15 764# Archive: arch-eduforge@catalyst.net.nz--2004 765# Creator: Penny Leach <penny@catalyst.net.nz> 766# Date: Wed May 25 14:15:34 NZST 2005 767# Standard-date: 2005-05-25 02:15:34 GMT 768# New-files: lang/de/.arch-ids/block_glossary_random.php.id 769# lang/de/.arch-ids/block_html.php.id 770# New-directories: lang/de/help/questionnaire 771# lang/de/help/questionnaire/.arch-ids 772# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id 773# db_sears.sql db/db_sears.sql 774# Removed-files: lang/be/docs/.arch-ids/release.html.id 775# lang/be/docs/.arch-ids/releaseold.html.id 776# Modified-files: admin/cron.php admin/delete.php 777# admin/editor.html backup/lib.php backup/restore.php 778# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15 779# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+) 780# summary can be multiline with a leading space just like the above fields 781# Keywords: 782# 783# Updating yadda tadda tadda madda 784sub parselog { 785my($ps,$log) =@_; 786my$key=undef; 787 788# headers we want that contain filenames: 789my%want_headers= ( 790 new_files =>1, 791 modified_files =>1, 792 renamed_files =>1, 793 renamed_directories =>1, 794 removed_files =>1, 795 removed_directories =>1, 796); 797 798chomp(@$log); 799while($_=shift@$log) { 800if(/^Continuation-of:\s*(.*)/) { 801$ps->{tag} =$1; 802$key=undef; 803}elsif(/^Summary:\s*(.*)$/) { 804# summary can be multiline as long as it has a leading space. 805# we squeeze it onto a single line, though. 806$ps->{summary} = [$1]; 807$key='summary'; 808}elsif(/^Creator: (.*)\s*<([^\>]+)>/) { 809$ps->{author} =$1; 810$ps->{email} =$2; 811$key=undef; 812# any *-files or *-directories can be read here: 813}elsif(/^([A-Z][a-z\-]+):\s*(.*)$/) { 814my$val=$2; 815$key=lc$1; 816$key=~tr/-/_/;# too lazy to quote :P 817if($want_headers{$key}) { 818push@{$ps->{$key}},split(/\s+/,$val); 819}else{ 820$key=undef; 821} 822}elsif(/^$/) { 823last;# remainder of @$log that didn't get shifted off is message 824}elsif($key) { 825if(/^\s+(.*)$/) { 826if($keyeq'summary') { 827push@{$ps->{$key}},$1; 828}else{# files/directories: 829push@{$ps->{$key}},split(/\s+/,$1); 830} 831}else{ 832$key=undef; 833} 834} 835} 836 837# drop leading empty lines from the log message 838while(@$log&&$log->[0]eq'') { 839shift@$log; 840} 841if(exists$ps->{summary} && @{$ps->{summary}}) { 842$ps->{summary} =join(' ', @{$ps->{summary}}); 843} 844elsif(@$log==0) { 845$ps->{summary} ='empty commit message'; 846}else{ 847$ps->{summary} =$log->[0] .'...'; 848} 849$ps->{message} =join("\n",@$log); 850 851# skip Arch control files, unescape pika-escaped files 852foreachmy$k(keys%want_headers) { 853next unless(defined$ps->{$k}); 854my@tmp= (); 855foreachmy$t(@{$ps->{$k}}) { 856next unlesslength($t); 857next if$t=~m!\{arch\}/!; 858next if$t=~m!\.arch-ids/!; 859# should we skip this? 860next if$t=~m!\.arch-inventory$!; 861# tla cat-archive-log will give us filenames with spaces as file\(sp)name - why? 862# we can assume that any filename with \ indicates some pika escaping that we want to get rid of. 863if($t=~/\\/){ 864$t= (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0]; 865} 866push@tmp,$t; 867} 868$ps->{$k} = \@tmp; 869} 870} 871 872# write/read a tag 873sub tag { 874my($tag,$commit) =@_; 875 876if($opt_o) { 877$tag=~ s|/|--|g; 878}else{ 879my$patchname=$tag; 880$patchname=~s/.*--//; 881$tag= git_branchname ($tag) .'--'.$patchname; 882} 883 884if($commit) { 885open(C,">","$git_dir/refs/tags/$tag") 886or die"Cannot create tag$tag:$!\n"; 887print C "$commit\n" 888or die"Cannot write tag$tag:$!\n"; 889close(C) 890or die"Cannot write tag$tag:$!\n"; 891print" * Created tag '$tag' on '$commit'\n"if$opt_v; 892}else{# read 893open(C,"<","$git_dir/refs/tags/$tag") 894or die"Cannot read tag$tag:$!\n"; 895$commit= <C>; 896chomp$commit; 897die"Error reading tag$tag:$!\n"unlesslength$commit==40; 898close(C) 899or die"Cannot read tag$tag:$!\n"; 900return$commit; 901} 902} 903 904# write/read a private tag 905# reads fail softly if the tag isn't there 906sub ptag { 907my($tag,$commit) =@_; 908 909# don't use subdirs for tags yet, it could screw up other porcelains 910$tag=~ s|/|,|g; 911 912my$tag_file="$ptag_dir/$tag"; 913my$tag_branch_dir= dirname($tag_file); 914 mkpath($tag_branch_dir)unless(-d $tag_branch_dir); 915 916if($commit) {# write 917open(C,">",$tag_file) 918or die"Cannot create tag$tag:$!\n"; 919print C "$commit\n" 920or die"Cannot write tag$tag:$!\n"; 921close(C) 922or die"Cannot write tag$tag:$!\n"; 923$rptags{$commit} =$tag 924unless$tag=~m/--base-0$/; 925}else{# read 926# if the tag isn't there, return 0 927unless( -s $tag_file) { 928return0; 929} 930open(C,"<",$tag_file) 931or die"Cannot read tag$tag:$!\n"; 932$commit= <C>; 933chomp$commit; 934die"Error reading tag$tag:$!\n"unlesslength$commit==40; 935close(C) 936or die"Cannot read tag$tag:$!\n"; 937unless(defined$rptags{$commit}) { 938$rptags{$commit} =$tag; 939} 940return$commit; 941} 942} 943 944sub find_parents { 945# 946# Identify what branches are merging into me 947# and whether we are fully merged 948# git-merge-base <headsha> <headsha> should tell 949# me what the base of the merge should be 950# 951my$ps=shift; 952 953my%branches;# holds an arrayref per branch 954# the arrayref contains a list of 955# merged patches between the base 956# of the merge and the current head 957 958my@parents;# parents found for this commit 959 960# simple loop to split the merges 961# per branch 962foreachmy$merge(@{$ps->{merges}}) { 963my$branch= git_branchname($merge); 964unless(defined$branches{$branch} ){ 965$branches{$branch} = []; 966} 967push@{$branches{$branch}},$merge; 968} 969 970# 971# foreach branch find a merge base and walk it to the 972# head where we are, collecting the merged patchsets that 973# Arch has recorded. Keep that in @have 974# Compare that with the commits on the other branch 975# between merge-base and the tip of the branch (@need) 976# and see if we have a series of consecutive patches 977# starting from the merge base. The tip of the series 978# of consecutive patches merged is our new parent for 979# that branch. 980# 981foreachmy$branch(keys%branches) { 982 983# check that we actually know about the branch 984next unless-e "$git_dir/refs/heads/$branch"; 985 986my$mergebase= safe_pipe_capture(qw(git-merge-base),$branch,$ps->{branch}); 987if($?) { 988# Don't die here, Arch supports one-way cherry-picking 989# between branches with no common base (or any relationship 990# at all beforehand) 991warn"Cannot find merge base for$branchand$ps->{branch}"; 992next; 993} 994chomp$mergebase; 995 996# now walk up to the mergepoint collecting what patches we have 997my$branchtip= git_rev_parse($ps->{branch}); 998my@ancestors=`git-rev-list --topo-order$branchtip^$mergebase`; 999 my%have; # collected merges this branch has1000 foreach my$merge(@{$ps->{merges}}) {1001$have{$merge} = 1;1002 }1003 my%ancestorshave;1004 foreach my$par(@ancestors) {1005$par= commitid2pset($par);1006 if (defined$par->{merges}) {1007 foreach my$merge(@{$par->{merges}}) {1008$ancestorshave{$merge}=1;1009 }1010 }1011 }1012 # print "++++ Merges in$ps->{id} are....\n";1013 # my@have= sort keys%have; print Dumper(\@have);10141015 # merge what we have with what ancestors have1016%have= (%have,%ancestorshave);10171018 # see what the remote branch has - these are the merges we1019 # will want to have in a consecutive series from the mergebase1020 my$otherbranchtip= git_rev_parse($branch);1021 my@needraw= `git-rev-list --topo-order $otherbranchtip^$mergebase`;1022my@need;1023foreachmy$needps(@needraw) {# get the psets1024$needps= commitid2pset($needps);1025# git-rev-list will also1026# list commits merged in via earlier1027# merges. we are only interested in commits1028# from the branch we're looking at1029if($brancheq$needps->{branch}) {1030push@need,$needps->{id};1031}1032}10331034# print "++++ Merges from $branch we want are....\n";1035# print Dumper(\@need);10361037my$newparent;1038while(my$needed_commit=pop@need) {1039if($have{$needed_commit}) {1040$newparent=$needed_commit;1041}else{1042last;# break out of the while1043}1044}1045if($newparent) {1046push@parents,$newparent;1047}104810491050}# end foreach branch10511052# prune redundant parents1053my%parents;1054foreachmy$p(@parents) {1055$parents{$p} =1;1056}1057foreachmy$p(@parents) {1058next unlessexists$psets{$p}{merges};1059next unlessref$psets{$p}{merges};1060my@merges= @{$psets{$p}{merges}};1061foreachmy$merge(@merges) {1062if($parents{$merge}) {1063delete$parents{$merge};1064}1065}1066}10671068@parents= ();1069foreach(keys%parents) {1070push@parents,'-p', ptag($_);1071}1072return@parents;1073}10741075sub git_rev_parse {1076my$name=shift;1077my$val= safe_pipe_capture(qw(git-rev-parse),$name);1078die"Error: git-rev-parse$name"if$?;1079chomp$val;1080return$val;1081}10821083# resolve a SHA1 to a known patchset1084sub commitid2pset {1085my$commitid=shift;1086chomp$commitid;1087my$name=$rptags{$commitid}1088||die"Cannot find reverse tag mapping for$commitid";1089$name=~ s|,|/|;1090my$ps=$psets{$name}1091|| (print Dumper(sort keys%psets)) &&die"Cannot find patchset for$name";1092return$ps;1093}109410951096# an alternative to `command` that allows input to be passed as an array1097# to work around shell problems with weird characters in arguments1098sub safe_pipe_capture {1099my@output;1100if(my$pid=open my$child,'-|') {1101@output= (<$child>);1102close$childor die join(' ',@_).":$!$?";1103}else{1104exec(@_)or die"$!$?";# exec() can fail the executable can't be found1105}1106returnwantarray?@output:join('',@output);1107}11081109# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`1110sub arch_tree_id {1111my$dir=shift;1112chomp(my$ret= (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );1113return$ret;1114}11151116sub archive_reachable {1117my$archive=shift;1118return1if$reachable{$archive};1119return0if$unreachable{$archive};11201121if(system"$TLAwhereis-archive$archive>/dev/null") {1122if($opt_a&& (system($TLA,'register-archive',1123"http://mirrors.sourcecontrol.net/$archive") ==0)) {1124$reachable{$archive} =1;1125return1;1126}1127print STDERR "Archive is unreachable:$archive\n";1128$unreachable{$archive} =1;1129return0;1130}else{1131$reachable{$archive} =1;1132return1;1133}1134}