1#!/usr/bin/perl -w 2# 3# This tool is copyright (c) 2005, Martin Langhoff. 4# It is released under the Gnu Public License, version 2. 5# 6# The basic idea is to walk the output of tla abrowse, 7# fetch the changesets and apply them. 8# 9 10=head1 Invocation 11 12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] 13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ] 14 15Imports a project from one or more Arch repositories. It will follow branches 16and repositories within the namespaces defined by the <archive/branch> 17parameters suppplied. If it cannot find the remote branch a merge comes from 18it will just import it as a regular commit. If it can find it, it will mark it 19as a merge whenever possible. 20 21See man (1) git-archimport for more details. 22 23=head1 TODO 24 25 - create tag objects instead of ref tags 26 - audit shell-escaping of filenames 27 - hide our private tags somewhere smarter 28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines 29 - sort and apply patches by graphing ancestry relations instead of just 30 relying in dates supplied in the changeset itself. 31 tla ancestry-graph -m could be helpful here... 32 33=head1 Devel tricks 34 35Add print in front of the shell commands invoked via backticks. 36 37=head1 Devel Notes 38 39There are several places where Arch and git terminology are intermixed 40and potentially confused. 41 42The notion of a "branch" in git is approximately equivalent to 43a "archive/category--branch--version" in Arch. Also, it should be noted 44that the "--branch" portion of "archive/category--branch--version" is really 45optional in Arch although not many people (nor tools!) seem to know this. 46This means that "archive/category--version" is also a valid "branch" 47in git terms. 48 49We always refer to Arch names by their fully qualified variant (which 50means the "archive" name is prefixed. 51 52For people unfamiliar with Arch, an "archive" is the term for "repository", 53and can contain multiple, unrelated branches. 54 55=cut 56 57use strict; 58use warnings; 59use Getopt::Std; 60use File::Temp qw(tempdir); 61use File::Path qw(mkpath rmtree); 62use File::Basename qw(basename dirname); 63use Data::Dumper qw/ Dumper /; 64use IPC::Open2; 65 66$SIG{'PIPE'}="IGNORE"; 67$ENV{'TZ'}="UTC"; 68 69my$git_dir=$ENV{"GIT_DIR"} ||".git"; 70$ENV{"GIT_DIR"} =$git_dir; 71my$ptag_dir="$git_dir/archimport/tags"; 72 73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o); 74 75sub usage() { 76print STDERR <<END; 77Usage: ${\basename$0} # fetch/update GIT from Arch 78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ] 79 repository/arch-branch [ repository/arch-branch] ... 80END 81exit(1); 82} 83 84getopts("fThvat:D:")or usage(); 85usage if$opt_h; 86 87@ARGV>=1or usage(); 88# $arch_branches: 89# values associated with keys: 90# =1 - Arch version / git 'branch' detected via abrowse on a limit 91# >1 - Arch version / git 'branch' of an auxilliary branch we've merged 92my%arch_branches=map{$_=>1}@ARGV; 93 94$ENV{'TMPDIR'} =$opt_tif$opt_t;# $ENV{TMPDIR} will affect tempdir() calls: 95my$tmp= tempdir('git-archimport-XXXXXX', TMPDIR =>1, CLEANUP =>1); 96$opt_v&&print"+ Using$tmpas temporary directory\n"; 97 98my%reachable= ();# Arch repositories we can access 99my%unreachable= ();# Arch repositories we can't access :< 100my@psets= ();# the collection 101my%psets= ();# the collection, by name 102my%stats= (# Track which strategy we used to import: 103 get_tag =>0, replay =>0, get_new =>0, get_delta =>0, 104 simple_changeset =>0, import_or_tag =>0 105); 106 107my%rptags= ();# my reverse private tags 108# to map a SHA1 to a commitid 109my$TLA=$ENV{'ARCH_CLIENT'} ||'tla'; 110 111sub do_abrowse { 112my$stage=shift; 113while(my($limit,$level) =each%arch_branches) { 114next unless$level==$stage; 115 116open ABROWSE,"$TLAabrowse -fkD --merges$limit|" 117or die"Problems with tla abrowse:$!"; 118 119my%ps= ();# the current one 120my$lastseen=''; 121 122while(<ABROWSE>) { 123chomp; 124 125# first record padded w 8 spaces 126if(s/^\s{8}\b//) { 127my($id,$type) =split(m/\s+/,$_,2); 128 129my%last_ps; 130# store the record we just captured 131if(%ps&& !exists$psets{$ps{id} }) { 132%last_ps=%ps;# break references 133push(@psets, \%last_ps); 134$psets{$last_ps{id} } = \%last_ps; 135} 136 137my$branch= extract_versionname($id); 138%ps= ( id =>$id, branch =>$branch); 139if(%last_ps&& ($last_ps{branch}eq$branch)) { 140$ps{parent_id} =$last_ps{id}; 141} 142 143$arch_branches{$branch} =1; 144$lastseen='id'; 145 146# deal with types (should work with baz or tla): 147if($type=~m/\(.*changeset\)/) { 148$ps{type} ='s'; 149}elsif($type=~/\(.*import\)/) { 150$ps{type} ='i'; 151}elsif($type=~m/\(tag.*?(\S+\@\S+).*?\)/) { 152$ps{type} ='t'; 153# read which revision we've tagged when we parse the log 154$ps{tag} =$1; 155}else{ 156warn"Unknown type$type"; 157} 158 159$arch_branches{$branch} =1; 160$lastseen='id'; 161}elsif(s/^\s{10}//) { 162# 10 leading spaces or more 163# indicate commit metadata 164 165# date 166if($lastseeneq'id'&&m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){ 167$ps{date} =$1; 168$lastseen='date'; 169}elsif($_eq'merges in:') { 170$ps{merges} = []; 171$lastseen='merges'; 172}elsif($lastseeneq'merges'&&s/^\s{2}//) { 173my$id=$_; 174push(@{$ps{merges}},$id); 175 176# aggressive branch finding: 177if($opt_D) { 178my$branch= extract_versionname($id); 179my$repo= extract_reponame($branch); 180 181if(archive_reachable($repo) && 182!defined$arch_branches{$branch}) { 183$arch_branches{$branch} =$stage+1; 184} 185} 186}else{ 187warn"more metadata after merges!?:$_\n"unless/^\s*$/; 188} 189} 190} 191 192if(%ps&& !exists$psets{$ps{id} }) { 193my%temp=%ps;# break references 194if(@psets&&$psets[$#psets]{branch}eq$ps{branch}) { 195$temp{parent_id} =$psets[$#psets]{id}; 196} 197push(@psets, \%temp); 198$psets{$temp{id} } = \%temp; 199} 200 201close ABROWSE or die"$TLAabrowse failed on$limit\n"; 202} 203}# end foreach $root 204 205do_abrowse(1); 206my$depth=2; 207$opt_D||=0; 208while($depth<=$opt_D) { 209 do_abrowse($depth); 210$depth++; 211} 212 213## Order patches by time 214# FIXME see if we can find a more optimal way to do this by graphing 215# the ancestry data and walking it, that way we won't have to rely on 216# client-supplied dates 217@psets=sort{$a->{date}.$b->{id}cmp$b->{date}.$b->{id}}@psets; 218 219#print Dumper \@psets; 220 221## 222## TODO cleanup irrelevant patches 223## and put an initial import 224## or a full tag 225my$import=0; 226unless(-d $git_dir) {# initial import 227if($psets[0]{type}eq'i'||$psets[0]{type}eq't') { 228print"Starting import from$psets[0]{id}\n"; 229`git-init-db`; 230die$!if$?; 231$import=1; 232}else{ 233die"Need to start from an import or a tag -- cannot use$psets[0]{id}"; 234} 235}else{# progressing an import 236# load the rptags 237opendir(DIR,$ptag_dir) 238||die"can't opendir:$!"; 239while(my$file=readdir(DIR)) { 240# skip non-interesting-files 241next unless-f "$ptag_dir/$file"; 242 243# convert first '--' to '/' from old git-archimport to use 244# as an archivename/c--b--v private tag 245if($file!~m!,!) { 246my$oldfile=$file; 247$file=~s!--!,!; 248print STDERR "converting old tag$oldfileto$file\n"; 249rename("$ptag_dir/$oldfile","$ptag_dir/$file")or die$!; 250} 251my$sha= ptag($file); 252chomp$sha; 253$rptags{$sha} =$file; 254} 255closedir DIR; 256} 257 258# process patchsets 259# extract the Arch repository name (Arch "archive" in Arch-speak) 260sub extract_reponame { 261my$fq_cvbr=shift;# archivename/[[[[category]branch]version]revision] 262return(split(/\//,$fq_cvbr))[0]; 263} 264 265sub extract_versionname { 266my$name=shift; 267$name=~s/--(?:patch|version(?:fix)?|base)-\d+$//; 268return$name; 269} 270 271# convert a fully-qualified revision or version to a unique dirname: 272# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 273# becomes: normalperson@yhbt.net-05,mpd--uclinux--1 274# 275# the git notion of a branch is closer to 276# archive/category--branch--version than archive/category--branch, so we 277# use this to convert to git branch names. 278# Also, keep archive names but replace '/' with ',' since it won't require 279# subdirectories, and is safer than swapping '--' which could confuse 280# reverse-mapping when dealing with bastard branches that 281# are just archive/category--version (no --branch) 282sub tree_dirname { 283my$revision=shift; 284my$name= extract_versionname($revision); 285$name=~ s#/#,#; 286return$name; 287} 288 289# old versions of git-archimport just use the <category--branch> part: 290sub old_style_branchname { 291my$id=shift; 292my$ret= safe_pipe_capture($TLA,'parse-package-name','-p',$id); 293chomp$ret; 294return$ret; 295} 296 297*git_branchname =$opt_o? *old_style_branchname : *tree_dirname; 298 299sub process_patchset_accurate { 300my$ps=shift; 301 302# switch to that branch if we're not already in that branch: 303if(-e "$git_dir/refs/heads/$ps->{branch}") { 304system('git-checkout','-f',$ps->{branch}) ==0or die"$!$?\n"; 305 306# remove any old stuff that got leftover: 307my$rm= safe_pipe_capture('git-ls-files','--others','-z'); 308 rmtree(split(/\0/,$rm))if$rm; 309} 310 311# Apply the import/changeset/merge into the working tree 312my$dir= sync_to_ps($ps); 313# read the new log entry: 314my@commitlog= safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id}); 315die"Error in cat-log:$!"if$?; 316chomp@commitlog; 317 318# grab variables we want from the log, new fields get added to $ps: 319# (author, date, email, summary, message body ...) 320 parselog($ps, \@commitlog); 321 322if($ps->{id} =~/--base-0$/&&$ps->{id}ne$psets[0]{id}) { 323# this should work when importing continuations 324if($ps->{tag} && (my$branchpoint=eval{ ptag($ps->{tag}) })) { 325 326# find where we are supposed to branch from 327system('git-checkout','-f','-b',$ps->{branch}, 328$branchpoint) ==0or die"$!$?\n"; 329 330# remove any old stuff that got leftover: 331my$rm= safe_pipe_capture('git-ls-files','--others','-z'); 332 rmtree(split(/\0/,$rm))if$rm; 333 334# If we trust Arch with the fact that this is just 335# a tag, and it does not affect the state of the tree 336# then we just tag and move on 337 tag($ps->{id},$branchpoint); 338 ptag($ps->{id},$branchpoint); 339print" * Tagged$ps->{id} at$branchpoint\n"; 340return0; 341}else{ 342warn"Tagging from unknown id unsupported\n"if$ps->{tag}; 343} 344# allow multiple bases/imports here since Arch supports cherry-picks 345# from unrelated trees 346} 347 348# update the index with all the changes we got 349system('git-diff-files --name-only -z | '. 350'git-update-index --remove -z --stdin') ==0or die"$!$?\n"; 351system('git-ls-files --others -z | '. 352'git-update-index --add -z --stdin') ==0or die"$!$?\n"; 353return1; 354} 355 356# the native changeset processing strategy. This is very fast, but 357# does not handle permissions or any renames involving directories 358sub process_patchset_fast { 359my$ps=shift; 360# 361# create the branch if needed 362# 363if($ps->{type}eq'i'&& !$import) { 364die"Should not have more than one 'Initial import' per GIT import:$ps->{id}"; 365} 366 367unless($import) {# skip for import 368if( -e "$git_dir/refs/heads/$ps->{branch}") { 369# we know about this branch 370system('git-checkout',$ps->{branch}); 371}else{ 372# new branch! we need to verify a few things 373die"Branch on a non-tag!"unless$ps->{type}eq't'; 374my$branchpoint= ptag($ps->{tag}); 375die"Tagging from unknown id unsupported:$ps->{tag}" 376unless$branchpoint; 377 378# find where we are supposed to branch from 379system('git-checkout','-b',$ps->{branch},$branchpoint); 380 381# If we trust Arch with the fact that this is just 382# a tag, and it does not affect the state of the tree 383# then we just tag and move on 384 tag($ps->{id},$branchpoint); 385 ptag($ps->{id},$branchpoint); 386print" * Tagged$ps->{id} at$branchpoint\n"; 387return0; 388} 389die$!if$?; 390} 391 392# 393# Apply the import/changeset/merge into the working tree 394# 395if($ps->{type}eq'i'||$ps->{type}eq't') { 396 apply_import($ps)or die$!; 397$stats{import_or_tag}++; 398$import=0; 399}elsif($ps->{type}eq's') { 400 apply_cset($ps); 401$stats{simple_changeset}++; 402} 403 404# 405# prepare update git's index, based on what arch knows 406# about the pset, resolve parents, etc 407# 408 409my@commitlog= safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 410die"Error in cat-archive-log:$!"if$?; 411 412 parselog($ps,\@commitlog); 413 414# imports don't give us good info 415# on added files. Shame on them 416if($ps->{type}eq'i'||$ps->{type}eq't') { 417system('git-ls-files --deleted -z | '. 418'git-update-index --remove -z --stdin') ==0or die"$!$?\n"; 419system('git-ls-files --others -z | '. 420'git-update-index --add -z --stdin') ==0or die"$!$?\n"; 421} 422 423# TODO: handle removed_directories and renamed_directories: 424 425if(my$del=$ps->{removed_files}) { 426unlink@$del; 427while(@$del) { 428my@slice=splice(@$del,0,100); 429system('git-update-index','--remove','--',@slice) ==0or 430die"Error in git-update-index --remove:$!$?\n"; 431} 432} 433 434if(my$ren=$ps->{renamed_files}) {# renamed 435if(@$ren%2) { 436die"Odd number of entries in rename!?"; 437} 438 439while(@$ren) { 440my$from=shift@$ren; 441my$to=shift@$ren; 442 443unless(-d dirname($to)) { 444 mkpath(dirname($to));# will die on err 445} 446# print "moving $from $to"; 447rename($from,$to)or die"Error renaming '$from' '$to':$!\n"; 448system('git-update-index','--remove','--',$from) ==0or 449die"Error in git-update-index --remove:$!$?\n"; 450system('git-update-index','--add','--',$to) ==0or 451die"Error in git-update-index --add:$!$?\n"; 452} 453} 454 455if(my$add=$ps->{new_files}) { 456while(@$add) { 457my@slice=splice(@$add,0,100); 458system('git-update-index','--add','--',@slice) ==0or 459die"Error in git-update-index --add:$!$?\n"; 460} 461} 462 463if(my$mod=$ps->{modified_files}) { 464while(@$mod) { 465my@slice=splice(@$mod,0,100); 466system('git-update-index','--',@slice) ==0or 467die"Error in git-update-index:$!$?\n"; 468} 469} 470return1;# we successfully applied the changeset 471} 472 473if($opt_f) { 474print"Will import patchsets using the fast strategy\n", 475"Renamed directories and permission changes will be missed\n"; 476*process_patchset = *process_patchset_fast; 477}else{ 478print"Using the default (accurate) import strategy.\n", 479"Things may be a bit slow\n"; 480*process_patchset = *process_patchset_accurate; 481} 482 483foreachmy$ps(@psets) { 484# process patchsets 485$ps->{branch} = git_branchname($ps->{id}); 486 487# 488# ensure we have a clean state 489# 490if(my$dirty=`git-diff-files`) { 491die"Unclean tree when about to process$ps->{id} ". 492" - did we fail to commit cleanly before?\n$dirty"; 493} 494die$!if$?; 495 496# 497# skip commits already in repo 498# 499if(ptag($ps->{id})) { 500$opt_v&&print" * Skipping already imported:$ps->{id}\n"; 501next; 502} 503 504print" * Starting to work on$ps->{id}\n"; 505 506 process_patchset($ps)ornext; 507 508# warn "errors when running git-update-index! $!"; 509my$tree=`git-write-tree`; 510die"cannot write tree$!"if$?; 511chomp$tree; 512 513# 514# Who's your daddy? 515# 516my@par; 517if( -e "$git_dir/refs/heads/$ps->{branch}") { 518if(open HEAD,"<","$git_dir/refs/heads/$ps->{branch}") { 519my$p= <HEAD>; 520close HEAD; 521chomp$p; 522push@par,'-p',$p; 523}else{ 524if($ps->{type}eq's') { 525warn"Could not find the right head for the branch$ps->{branch}"; 526} 527} 528} 529 530if($ps->{merges}) { 531push@par, find_parents($ps); 532} 533 534# 535# Commit, tag and clean state 536# 537$ENV{TZ} ='GMT'; 538$ENV{GIT_AUTHOR_NAME} =$ps->{author}; 539$ENV{GIT_AUTHOR_EMAIL} =$ps->{email}; 540$ENV{GIT_AUTHOR_DATE} =$ps->{date}; 541$ENV{GIT_COMMITTER_NAME} =$ps->{author}; 542$ENV{GIT_COMMITTER_EMAIL} =$ps->{email}; 543$ENV{GIT_COMMITTER_DATE} =$ps->{date}; 544 545my$pid= open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 546or die$!; 547print WRITER $ps->{summary},"\n"; 548print WRITER $ps->{message},"\n"; 549 550# make it easy to backtrack and figure out which Arch revision this was: 551print WRITER 'git-archimport-id: ',$ps->{id},"\n"; 552 553close WRITER; 554my$commitid= <READER>;# read 555chomp$commitid; 556close READER; 557waitpid$pid,0;# close; 558 559if(length$commitid!=40) { 560die"Something went wrong with the commit!$!$commitid"; 561} 562# 563# Update the branch 564# 565open HEAD,">","$git_dir/refs/heads/$ps->{branch}"; 566print HEAD $commitid; 567close HEAD; 568system('git-update-ref','HEAD',"$ps->{branch}"); 569 570# tag accordingly 571 ptag($ps->{id},$commitid);# private tag 572if($opt_T||$ps->{type}eq't'||$ps->{type}eq'i') { 573 tag($ps->{id},$commitid); 574} 575print" * Committed$ps->{id}\n"; 576print" + tree$tree\n"; 577print" + commit$commitid\n"; 578$opt_v&&print" + commit date is$ps->{date}\n"; 579$opt_v&&print" + parents: ",join(' ',@par),"\n"; 580} 581 582if($opt_v) { 583foreach(sort keys%stats) { 584print"$_:$stats{$_}\n"; 585} 586} 587exit0; 588 589# used by the accurate strategy: 590sub sync_to_ps { 591my$ps=shift; 592my$tree_dir=$tmp.'/'.tree_dirname($ps->{id}); 593 594$opt_v&&print"sync_to_ps($ps->{id}) method: "; 595 596if(-d $tree_dir) { 597if($ps->{type}eq't') { 598$opt_v&&print"get (tag)\n"; 599# looks like a tag-only or (worse,) a mixed tags/changeset branch, 600# can't rely on replay to work correctly on these 601 rmtree($tree_dir); 602 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 603$stats{get_tag}++; 604}else{ 605my$tree_id= arch_tree_id($tree_dir); 606if($ps->{parent_id} && ($ps->{parent_id}eq$tree_id)) { 607# the common case (hopefully) 608$opt_v&&print"replay\n"; 609 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id}); 610$stats{replay}++; 611}else{ 612# getting one tree is usually faster than getting two trees 613# and applying the delta ... 614 rmtree($tree_dir); 615$opt_v&&print"apply-delta\n"; 616 safe_pipe_capture($TLA,'get','--no-pristine', 617$ps->{id},$tree_dir); 618$stats{get_delta}++; 619} 620} 621}else{ 622# new branch work 623$opt_v&&print"get (new tree)\n"; 624 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir); 625$stats{get_new}++; 626} 627 628# added -I flag to rsync since we're going to fast! AIEEEEE!!!! 629system('rsync','-aI','--delete','--exclude',$git_dir, 630# '--exclude','.arch-inventory', 631'--exclude','.arch-ids','--exclude','{arch}', 632'--exclude','+*','--exclude',',*', 633"$tree_dir/",'./') ==0or die"Cannot rsync$tree_dir:$!$?"; 634return$tree_dir; 635} 636 637sub apply_import { 638my$ps=shift; 639my$bname= git_branchname($ps->{id}); 640 641 mkpath($tmp); 642 643 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import"); 644die"Cannot get import:$!"if$?; 645system('rsync','-aI','--delete','--exclude',$git_dir, 646'--exclude','.arch-ids','--exclude','{arch}', 647"$tmp/import/",'./'); 648die"Cannot rsync import:$!"if$?; 649 650 rmtree("$tmp/import"); 651die"Cannot remove tempdir:$!"if$?; 652 653 654return1; 655} 656 657sub apply_cset { 658my$ps=shift; 659 660 mkpath($tmp); 661 662# get the changeset 663 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset"); 664die"Cannot get changeset:$!"if$?; 665 666# apply patches 667if(`find$tmp/changeset/patches-type f -name '*.patch'`) { 668# this can be sped up considerably by doing 669# (find | xargs cat) | patch 670# but that cna get mucked up by patches 671# with missing trailing newlines or the standard 672# 'missing newline' flag in the patch - possibly 673# produced with an old/buggy diff. 674# slow and safe, we invoke patch once per patchfile 675`find$tmp/changeset/patches-type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`; 676die"Problem applying patches!$!"if$?; 677} 678 679# apply changed binary files 680if(my@modified=`find$tmp/changeset/patches-type f -name '*.modified'`) { 681foreachmy$mod(@modified) { 682chomp$mod; 683my$orig=$mod; 684$orig=~s/\.modified$//;# lazy 685$orig=~s!^\Q$tmp\E/changeset/patches/!!; 686#print "rsync -p '$mod' '$orig'"; 687system('rsync','-p',$mod,"./$orig"); 688die"Problem applying binary changes!$!"if$?; 689} 690} 691 692# bring in new files 693system('rsync','-aI','--exclude',$git_dir, 694'--exclude','.arch-ids', 695'--exclude','{arch}', 696"$tmp/changeset/new-files-archive/",'./'); 697 698# deleted files are hinted from the commitlog processing 699 700 rmtree("$tmp/changeset"); 701} 702 703 704# =for reference 705# notes: *-files/-directories keys cannot have spaces, they're always 706# pika-escaped. Everything after the first newline 707# A log entry looks like: 708# Revision: moodle-org--moodle--1.3.3--patch-15 709# Archive: arch-eduforge@catalyst.net.nz--2004 710# Creator: Penny Leach <penny@catalyst.net.nz> 711# Date: Wed May 25 14:15:34 NZST 2005 712# Standard-date: 2005-05-25 02:15:34 GMT 713# New-files: lang/de/.arch-ids/block_glossary_random.php.id 714# lang/de/.arch-ids/block_html.php.id 715# New-directories: lang/de/help/questionnaire 716# lang/de/help/questionnaire/.arch-ids 717# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id 718# db_sears.sql db/db_sears.sql 719# Removed-files: lang/be/docs/.arch-ids/release.html.id 720# lang/be/docs/.arch-ids/releaseold.html.id 721# Modified-files: admin/cron.php admin/delete.php 722# admin/editor.html backup/lib.php backup/restore.php 723# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15 724# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+) 725# summary can be multiline with a leading space just like the above fields 726# Keywords: 727# 728# Updating yadda tadda tadda madda 729sub parselog { 730my($ps,$log) =@_; 731my$key=undef; 732 733# headers we want that contain filenames: 734my%want_headers= ( 735 new_files =>1, 736 modified_files =>1, 737 renamed_files =>1, 738 renamed_directories =>1, 739 removed_files =>1, 740 removed_directories =>1, 741); 742 743chomp(@$log); 744while($_=shift@$log) { 745if(/^Continuation-of:\s*(.*)/) { 746$ps->{tag} =$1; 747$key=undef; 748}elsif(/^Summary:\s*(.*)$/) { 749# summary can be multiline as long as it has a leading space 750$ps->{summary} = [$1]; 751$key='summary'; 752}elsif(/^Creator: (.*)\s*<([^\>]+)>/) { 753$ps->{author} =$1; 754$ps->{email} =$2; 755$key=undef; 756# any *-files or *-directories can be read here: 757}elsif(/^([A-Z][a-z\-]+):\s*(.*)$/) { 758my$val=$2; 759$key=lc$1; 760$key=~tr/-/_/;# too lazy to quote :P 761if($want_headers{$key}) { 762push@{$ps->{$key}},split(/\s+/,$val); 763}else{ 764$key=undef; 765} 766}elsif(/^$/) { 767last;# remainder of @$log that didn't get shifted off is message 768}elsif($key) { 769if(/^\s+(.*)$/) { 770if($keyeq'summary') { 771push@{$ps->{$key}},$1; 772}else{# files/directories: 773push@{$ps->{$key}},split(/\s+/,$1); 774} 775}else{ 776$key=undef; 777} 778} 779} 780 781# post-processing: 782$ps->{summary} =join("\n",@{$ps->{summary}})."\n"; 783$ps->{message} =join("\n",@$log); 784 785# skip Arch control files, unescape pika-escaped files 786foreachmy$k(keys%want_headers) { 787next unless(defined$ps->{$k}); 788my@tmp= (); 789foreachmy$t(@{$ps->{$k}}) { 790next unlesslength($t); 791next if$t=~m!\{arch\}/!; 792next if$t=~m!\.arch-ids/!; 793# should we skip this? 794next if$t=~m!\.arch-inventory$!; 795# tla cat-archive-log will give us filenames with spaces as file\(sp)name - why? 796# we can assume that any filename with \ indicates some pika escaping that we want to get rid of. 797if($t=~/\\/){ 798$t= (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0]; 799} 800push@tmp,$t; 801} 802$ps->{$k} = \@tmp; 803} 804} 805 806# write/read a tag 807sub tag { 808my($tag,$commit) =@_; 809 810if($opt_o) { 811$tag=~ s|/|--|g; 812}else{ 813# don't use subdirs for tags yet, it could screw up other porcelains 814$tag=~ s|/|,|g; 815} 816 817if($commit) { 818open(C,">","$git_dir/refs/tags/$tag") 819or die"Cannot create tag$tag:$!\n"; 820print C "$commit\n" 821or die"Cannot write tag$tag:$!\n"; 822close(C) 823or die"Cannot write tag$tag:$!\n"; 824print" * Created tag '$tag' on '$commit'\n"if$opt_v; 825}else{# read 826open(C,"<","$git_dir/refs/tags/$tag") 827or die"Cannot read tag$tag:$!\n"; 828$commit= <C>; 829chomp$commit; 830die"Error reading tag$tag:$!\n"unlesslength$commit==40; 831close(C) 832or die"Cannot read tag$tag:$!\n"; 833return$commit; 834} 835} 836 837# write/read a private tag 838# reads fail softly if the tag isn't there 839sub ptag { 840my($tag,$commit) =@_; 841 842# don't use subdirs for tags yet, it could screw up other porcelains 843$tag=~ s|/|,|g; 844 845my$tag_file="$ptag_dir/$tag"; 846my$tag_branch_dir= dirname($tag_file); 847 mkpath($tag_branch_dir)unless(-d $tag_branch_dir); 848 849if($commit) {# write 850open(C,">",$tag_file) 851or die"Cannot create tag$tag:$!\n"; 852print C "$commit\n" 853or die"Cannot write tag$tag:$!\n"; 854close(C) 855or die"Cannot write tag$tag:$!\n"; 856$rptags{$commit} =$tag 857unless$tag=~m/--base-0$/; 858}else{# read 859# if the tag isn't there, return 0 860unless( -s $tag_file) { 861return0; 862} 863open(C,"<",$tag_file) 864or die"Cannot read tag$tag:$!\n"; 865$commit= <C>; 866chomp$commit; 867die"Error reading tag$tag:$!\n"unlesslength$commit==40; 868close(C) 869or die"Cannot read tag$tag:$!\n"; 870unless(defined$rptags{$commit}) { 871$rptags{$commit} =$tag; 872} 873return$commit; 874} 875} 876 877sub find_parents { 878# 879# Identify what branches are merging into me 880# and whether we are fully merged 881# git-merge-base <headsha> <headsha> should tell 882# me what the base of the merge should be 883# 884my$ps=shift; 885 886my%branches;# holds an arrayref per branch 887# the arrayref contains a list of 888# merged patches between the base 889# of the merge and the current head 890 891my@parents;# parents found for this commit 892 893# simple loop to split the merges 894# per branch 895foreachmy$merge(@{$ps->{merges}}) { 896my$branch= git_branchname($merge); 897unless(defined$branches{$branch} ){ 898$branches{$branch} = []; 899} 900push@{$branches{$branch}},$merge; 901} 902 903# 904# foreach branch find a merge base and walk it to the 905# head where we are, collecting the merged patchsets that 906# Arch has recorded. Keep that in @have 907# Compare that with the commits on the other branch 908# between merge-base and the tip of the branch (@need) 909# and see if we have a series of consecutive patches 910# starting from the merge base. The tip of the series 911# of consecutive patches merged is our new parent for 912# that branch. 913# 914foreachmy$branch(keys%branches) { 915 916# check that we actually know about the branch 917next unless-e "$git_dir/refs/heads/$branch"; 918 919my$mergebase=`git-merge-base$branch$ps->{branch}`; 920if($?) { 921# Don't die here, Arch supports one-way cherry-picking 922# between branches with no common base (or any relationship 923# at all beforehand) 924warn"Cannot find merge base for$branchand$ps->{branch}"; 925next; 926} 927chomp$mergebase; 928 929# now walk up to the mergepoint collecting what patches we have 930my$branchtip= git_rev_parse($ps->{branch}); 931my@ancestors=`git-rev-list --topo-order$branchtip^$mergebase`; 932 my%have; # collected merges this branch has 933 foreach my$merge(@{$ps->{merges}}) { 934$have{$merge} = 1; 935 } 936 my%ancestorshave; 937 foreach my$par(@ancestors) { 938$par= commitid2pset($par); 939 if (defined$par->{merges}) { 940 foreach my$merge(@{$par->{merges}}) { 941$ancestorshave{$merge}=1; 942 } 943 } 944 } 945 # print "++++ Merges in$ps->{id} are....\n"; 946 # my@have= sort keys%have; print Dumper(\@have); 947 948 # merge what we have with what ancestors have 949%have= (%have,%ancestorshave); 950 951 # see what the remote branch has - these are the merges we 952 # will want to have in a consecutive series from the mergebase 953 my$otherbranchtip= git_rev_parse($branch); 954 my@needraw= `git-rev-list --topo-order $otherbranchtip^$mergebase`; 955my@need; 956foreachmy$needps(@needraw) {# get the psets 957$needps= commitid2pset($needps); 958# git-rev-list will also 959# list commits merged in via earlier 960# merges. we are only interested in commits 961# from the branch we're looking at 962if($brancheq$needps->{branch}) { 963push@need,$needps->{id}; 964} 965} 966 967# print "++++ Merges from $branch we want are....\n"; 968# print Dumper(\@need); 969 970my$newparent; 971while(my$needed_commit=pop@need) { 972if($have{$needed_commit}) { 973$newparent=$needed_commit; 974}else{ 975last;# break out of the while 976} 977} 978if($newparent) { 979push@parents,$newparent; 980} 981 982 983}# end foreach branch 984 985# prune redundant parents 986my%parents; 987foreachmy$p(@parents) { 988$parents{$p} =1; 989} 990foreachmy$p(@parents) { 991next unlessexists$psets{$p}{merges}; 992next unlessref$psets{$p}{merges}; 993my@merges= @{$psets{$p}{merges}}; 994foreachmy$merge(@merges) { 995if($parents{$merge}) { 996delete$parents{$merge}; 997} 998} 999}10001001@parents= ();1002foreach(keys%parents) {1003push@parents,'-p', ptag($_);1004}1005return@parents;1006}10071008sub git_rev_parse {1009my$name=shift;1010my$val=`git-rev-parse$name`;1011 die "Error: git-rev-parse$name" if$?;1012 chomp$val;1013 return$val;1014}10151016# resolve a SHA1 to a known patchset1017sub commitid2pset {1018 my$commitid= shift;1019 chomp$commitid;1020 my$name=$rptags{$commitid} 1021 || die "Cannot find reverse tag mapping for$commitid";1022$name=~ s|,|/|;1023 my$ps=$psets{$name} 1024 || (print Dumper(sort keys%psets)) && die "Cannot find patchset for$name";1025 return$ps;1026}102710281029# an alterative to `command` that allows input to be passed as an array1030# to work around shell problems with weird characters in arguments1031sub safe_pipe_capture {1032 my@output;1033 if (my$pid= open my$child, '-|') {1034@output= (<$child>);1035 close$childor die join(' ',@_).":$!$?";1036 } else {1037 exec(@_) or die "$!$?"; # exec() can fail the executable can't be found1038 }1039 return wantarray ?@output: join('',@output);1040}10411042# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`1043sub arch_tree_id {1044 my$dir= shift;1045 chomp( my$ret= (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );1046 return$ret;1047}10481049sub archive_reachable {1050 my$archive= shift;1051 return 1 if$reachable{$archive};1052 return 0 if$unreachable{$archive};10531054 if (system "$TLAwhereis-archive$archive>/dev/null") {1055 if ($opt_a&& (system($TLA,'register-archive',1056 "http://mirrors.sourcecontrol.net/$archive") == 0)) {1057$reachable{$archive} = 1;1058 return 1;1059 }1060 print STDERR "Archive is unreachable:$archive\n";1061$unreachable{$archive} = 1;1062 return 0;1063 } else {1064$reachable{$archive} = 1;1065 return 1;1066 }1067}1068