1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/ 13 14use strict; 15use MediaWiki::API; 16use DateTime::Format::ISO8601; 17 18# By default, use UTF-8 to communicate with Git and the user 19binmode STDERR,":utf8"; 20binmode STDOUT,":utf8"; 21 22use URI::Escape; 23use IPC::Open2; 24 25use warnings; 26 27# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 28useconstant SLASH_REPLACEMENT =>"%2F"; 29 30# It's not always possible to delete pages (may require some 31# privileges). Deleted pages are replaced with this content. 32useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 33 34# It's not possible to create empty pages. New empty files in Git are 35# sent with this content instead. 36useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 37 38# used to reflect file creation or deletion in diff. 39useconstant NULL_SHA1 =>"0000000000000000000000000000000000000000"; 40 41# Used on Git's side to reflect empty edit messages on the wiki 42useconstant EMPTY_MESSAGE =>'*Empty MediaWiki Message*'; 43 44if(@ARGV!=2) { 45 exit_error_usage(); 46} 47 48my$remotename=$ARGV[0]; 49my$url=$ARGV[1]; 50 51# Accept both space-separated and multiple keys in config file. 52# Spaces should be written as _ anyway because we'll use chomp. 53my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".pages")); 54chomp(@tracked_pages); 55 56# Just like @tracked_pages, but for MediaWiki categories. 57my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".categories")); 58chomp(@tracked_categories); 59 60# Import media files on pull 61my$import_media= run_git("config --get --bool remote.".$remotename.".mediaimport"); 62chomp($import_media); 63$import_media= ($import_mediaeq"true"); 64 65# Export media files on push 66my$export_media= run_git("config --get --bool remote.".$remotename.".mediaexport"); 67chomp($export_media); 68$export_media= !($export_mediaeq"false"); 69 70my$wiki_login= run_git("config --get remote.".$remotename.".mwLogin"); 71# Note: mwPassword is discourraged. Use the credential system instead. 72my$wiki_passwd= run_git("config --get remote.".$remotename.".mwPassword"); 73my$wiki_domain= run_git("config --get remote.".$remotename.".mwDomain"); 74chomp($wiki_login); 75chomp($wiki_passwd); 76chomp($wiki_domain); 77 78# Import only last revisions (both for clone and fetch) 79my$shallow_import= run_git("config --get --bool remote.".$remotename.".shallow"); 80chomp($shallow_import); 81$shallow_import= ($shallow_importeq"true"); 82 83# Fetch (clone and pull) by revisions instead of by pages. This behavior 84# is more efficient when we have a wiki with lots of pages and we fetch 85# the revisions quite often so that they concern only few pages. 86# Possible values: 87# - by_rev: perform one query per new revision on the remote wiki 88# - by_page: query each tracked page for new revision 89my$fetch_strategy= run_git("config --get remote.$remotename.fetchStrategy"); 90unless($fetch_strategy) { 91$fetch_strategy= run_git("config --get mediawiki.fetchStrategy"); 92} 93chomp($fetch_strategy); 94unless($fetch_strategy) { 95$fetch_strategy="by_page"; 96} 97 98# Dumb push: don't update notes and mediawiki ref to reflect the last push. 99# 100# Configurable with mediawiki.dumbPush, or per-remote with 101# remote.<remotename>.dumbPush. 102# 103# This means the user will have to re-import the just-pushed 104# revisions. On the other hand, this means that the Git revisions 105# corresponding to MediaWiki revisions are all imported from the wiki, 106# regardless of whether they were initially created in Git or from the 107# web interface, hence all users will get the same history (i.e. if 108# the push from Git to MediaWiki loses some information, everybody 109# will get the history with information lost). If the import is 110# deterministic, this means everybody gets the same sha1 for each 111# MediaWiki revision. 112my$dumb_push= run_git("config --get --bool remote.$remotename.dumbPush"); 113unless($dumb_push) { 114$dumb_push= run_git("config --get --bool mediawiki.dumbPush"); 115} 116chomp($dumb_push); 117$dumb_push= ($dumb_pusheq"true"); 118 119my$wiki_name=$url; 120$wiki_name=~s/[^\/]*:\/\///; 121# If URL is like http://user:password@example.com/, we clearly don't 122# want the password in $wiki_name. While we're there, also remove user 123# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 124$wiki_name=~s/^.*@//; 125 126# Commands parser 127my$entry; 128my@cmd; 129while(<STDIN>) { 130chomp; 131@cmd=split(/ /); 132if(defined($cmd[0])) { 133# Line not blank 134if($cmd[0]eq"capabilities") { 135die("Too many arguments for capabilities")unless(!defined($cmd[1])); 136 mw_capabilities(); 137}elsif($cmd[0]eq"list") { 138die("Too many arguments for list")unless(!defined($cmd[2])); 139 mw_list($cmd[1]); 140}elsif($cmd[0]eq"import") { 141die("Invalid arguments for import")unless($cmd[1]ne""&& !defined($cmd[2])); 142 mw_import($cmd[1]); 143}elsif($cmd[0]eq"option") { 144die("Too many arguments for option")unless($cmd[1]ne""&&$cmd[2]ne""&& !defined($cmd[3])); 145 mw_option($cmd[1],$cmd[2]); 146}elsif($cmd[0]eq"push") { 147 mw_push($cmd[1]); 148}else{ 149print STDERR "Unknown command. Aborting...\n"; 150last; 151} 152}else{ 153# blank line: we should terminate 154last; 155} 156 157BEGIN{ $| =1}# flush STDOUT, to make sure the previous 158# command is fully processed. 159} 160 161########################## Functions ############################## 162 163## error handling 164sub exit_error_usage { 165die"ERROR: git-remote-mediawiki module was not called with a correct number of\n". 166"parameters\n". 167"You may obtain this error because you attempted to run the git-remote-mediawiki\n". 168"module directly.\n". 169"This module can be used the following way:\n". 170"\tgit clone mediawiki://<address of a mediawiki>\n". 171"Then, use git commit, push and pull as with every normal git repository.\n"; 172} 173 174## credential API management (generic functions) 175 176sub credential_read { 177my%credential; 178my$reader=shift; 179my$op=shift; 180while(<$reader>) { 181my($key,$value) =/([^=]*)=(.*)/; 182if(not defined$key) { 183die"ERROR receiving response from git credential$op:\n$_\n"; 184} 185$credential{$key} =$value; 186} 187return%credential; 188} 189 190sub credential_write { 191my$credential=shift; 192my$writer=shift; 193# url overwrites other fields, so it must come first 194print$writer"url=$credential->{url}\n"ifexists$credential->{url}; 195while(my($key,$value) =each(%$credential) ) { 196if(length$value&&$keyne'url') { 197print$writer"$key=$value\n"; 198} 199} 200} 201 202sub credential_run { 203my$op=shift; 204my$credential=shift; 205my$pid= open2(my$reader,my$writer,"git credential$op"); 206 credential_write($credential,$writer); 207print$writer"\n"; 208close($writer); 209 210if($opeq"fill") { 211%$credential= credential_read($reader,$op); 212}else{ 213if(<$reader>) { 214die"ERROR while running git credential$op:\n$_"; 215} 216} 217close($reader); 218waitpid($pid,0); 219my$child_exit_status=$?>>8; 220if($child_exit_status!=0) { 221die"'git credential$op' failed with code$child_exit_status."; 222} 223} 224 225# MediaWiki API instance, created lazily. 226my$mediawiki; 227 228sub mw_connect_maybe { 229if($mediawiki) { 230return; 231} 232$mediawiki= MediaWiki::API->new; 233$mediawiki->{config}->{api_url} ="$url/api.php"; 234if($wiki_login) { 235my%credential= (url =>$url); 236$credential{username} =$wiki_login; 237$credential{password} =$wiki_passwd; 238 credential_run("fill", \%credential); 239my$request= {lgname =>$credential{username}, 240 lgpassword =>$credential{password}, 241 lgdomain =>$wiki_domain}; 242if($mediawiki->login($request)) { 243 credential_run("approve", \%credential); 244print STDERR "Logged in mediawiki user\"$credential{username}\".\n"; 245}else{ 246print STDERR "Failed to log in mediawiki user\"$credential{username}\"on$url\n"; 247print STDERR " (error ". 248$mediawiki->{error}->{code} .': '. 249$mediawiki->{error}->{details} .")\n"; 250 credential_run("reject", \%credential); 251exit1; 252} 253} 254} 255 256## Functions for listing pages on the remote wiki 257sub get_mw_tracked_pages { 258my$pages=shift; 259 get_mw_page_list(\@tracked_pages,$pages); 260} 261 262sub get_mw_page_list { 263my$page_list=shift; 264my$pages=shift; 265my@some_pages=@$page_list; 266while(@some_pages) { 267my$last=50; 268if($#some_pages<$last) { 269$last=$#some_pages; 270} 271my@slice=@some_pages[0..$last]; 272 get_mw_first_pages(\@slice,$pages); 273@some_pages=@some_pages[51..$#some_pages]; 274} 275} 276 277sub get_mw_tracked_categories { 278my$pages=shift; 279foreachmy$category(@tracked_categories) { 280if(index($category,':') <0) { 281# Mediawiki requires the Category 282# prefix, but let's not force the user 283# to specify it. 284$category="Category:".$category; 285} 286my$mw_pages=$mediawiki->list( { 287 action =>'query', 288 list =>'categorymembers', 289 cmtitle =>$category, 290 cmlimit =>'max'} ) 291||die$mediawiki->{error}->{code} .': ' 292.$mediawiki->{error}->{details}; 293foreachmy$page(@{$mw_pages}) { 294$pages->{$page->{title}} =$page; 295} 296} 297} 298 299sub get_mw_all_pages { 300my$pages=shift; 301# No user-provided list, get the list of pages from the API. 302my$mw_pages=$mediawiki->list({ 303 action =>'query', 304 list =>'allpages', 305 aplimit =>'max' 306}); 307if(!defined($mw_pages)) { 308print STDERR "fatal: could not get the list of wiki pages.\n"; 309print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 310print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 311exit1; 312} 313foreachmy$page(@{$mw_pages}) { 314$pages->{$page->{title}} =$page; 315} 316} 317 318# queries the wiki for a set of pages. Meant to be used within a loop 319# querying the wiki for slices of page list. 320sub get_mw_first_pages { 321my$some_pages=shift; 322my@some_pages= @{$some_pages}; 323 324my$pages=shift; 325 326# pattern 'page1|page2|...' required by the API 327my$titles=join('|',@some_pages); 328 329my$mw_pages=$mediawiki->api({ 330 action =>'query', 331 titles =>$titles, 332}); 333if(!defined($mw_pages)) { 334print STDERR "fatal: could not query the list of wiki pages.\n"; 335print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 336print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 337exit1; 338} 339while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 340if($id<0) { 341print STDERR "Warning: page$page->{title} not found on wiki\n"; 342}else{ 343$pages->{$page->{title}} =$page; 344} 345} 346} 347 348# Get the list of pages to be fetched according to configuration. 349sub get_mw_pages { 350 mw_connect_maybe(); 351 352print STDERR "Listing pages on remote wiki...\n"; 353 354my%pages;# hash on page titles to avoid duplicates 355my$user_defined; 356if(@tracked_pages) { 357$user_defined=1; 358# The user provided a list of pages titles, but we 359# still need to query the API to get the page IDs. 360 get_mw_tracked_pages(\%pages); 361} 362if(@tracked_categories) { 363$user_defined=1; 364 get_mw_tracked_categories(\%pages); 365} 366if(!$user_defined) { 367 get_mw_all_pages(\%pages); 368} 369if($import_media) { 370print STDERR "Getting media files for selected pages...\n"; 371if($user_defined) { 372 get_linked_mediafiles(\%pages); 373}else{ 374 get_all_mediafiles(\%pages); 375} 376} 377print STDERR (scalar keys%pages) ." pages found.\n"; 378return%pages; 379} 380 381# usage: $out = run_git("command args"); 382# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 383sub run_git { 384my$args=shift; 385my$encoding= (shift||"encoding(UTF-8)"); 386open(my$git,"-|:$encoding","git ".$args); 387my$res=do{local$/; <$git> }; 388close($git); 389 390return$res; 391} 392 393 394sub get_all_mediafiles { 395my$pages=shift; 396# Attach list of all pages for media files from the API, 397# they are in a different namespace, only one namespace 398# can be queried at the same moment 399my$mw_pages=$mediawiki->list({ 400 action =>'query', 401 list =>'allpages', 402 apnamespace => get_mw_namespace_id("File"), 403 aplimit =>'max' 404}); 405if(!defined($mw_pages)) { 406print STDERR "fatal: could not get the list of pages for media files.\n"; 407print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 408print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 409exit1; 410} 411foreachmy$page(@{$mw_pages}) { 412$pages->{$page->{title}} =$page; 413} 414} 415 416sub get_linked_mediafiles { 417my$pages=shift; 418my@titles=map$_->{title},values(%{$pages}); 419 420# The query is split in small batches because of the MW API limit of 421# the number of links to be returned (500 links max). 422my$batch=10; 423while(@titles) { 424if($#titles<$batch) { 425$batch=$#titles; 426} 427my@slice=@titles[0..$batch]; 428 429# pattern 'page1|page2|...' required by the API 430my$mw_titles=join('|',@slice); 431 432# Media files could be included or linked from 433# a page, get all related 434my$query= { 435 action =>'query', 436 prop =>'links|images', 437 titles =>$mw_titles, 438 plnamespace => get_mw_namespace_id("File"), 439 pllimit =>'max' 440}; 441my$result=$mediawiki->api($query); 442 443while(my($id,$page) =each(%{$result->{query}->{pages}})) { 444my@media_titles; 445if(defined($page->{links})) { 446my@link_titles=map$_->{title}, @{$page->{links}}; 447push(@media_titles,@link_titles); 448} 449if(defined($page->{images})) { 450my@image_titles=map$_->{title}, @{$page->{images}}; 451push(@media_titles,@image_titles); 452} 453if(@media_titles) { 454 get_mw_page_list(\@media_titles,$pages); 455} 456} 457 458@titles=@titles[($batch+1)..$#titles]; 459} 460} 461 462sub get_mw_mediafile_for_page_revision { 463# Name of the file on Wiki, with the prefix. 464my$filename=shift; 465my$timestamp=shift; 466my%mediafile; 467 468# Search if on a media file with given timestamp exists on 469# MediaWiki. In that case download the file. 470my$query= { 471 action =>'query', 472 prop =>'imageinfo', 473 titles =>"File:".$filename, 474 iistart =>$timestamp, 475 iiend =>$timestamp, 476 iiprop =>'timestamp|archivename|url', 477 iilimit =>1 478}; 479my$result=$mediawiki->api($query); 480 481my($fileid,$file) =each( %{$result->{query}->{pages}} ); 482# If not defined it means there is no revision of the file for 483# given timestamp. 484if(defined($file->{imageinfo})) { 485$mediafile{title} =$filename; 486 487my$fileinfo=pop(@{$file->{imageinfo}}); 488$mediafile{timestamp} =$fileinfo->{timestamp}; 489# Mediawiki::API's download function doesn't support https URLs 490# and can't download old versions of files. 491print STDERR "\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 492$mediafile{content} = download_mw_mediafile($fileinfo->{url}); 493} 494return%mediafile; 495} 496 497sub download_mw_mediafile { 498my$url=shift; 499 500my$response=$mediawiki->{ua}->get($url); 501if($response->code==200) { 502return$response->decoded_content; 503}else{ 504print STDERR "Error downloading mediafile from :\n"; 505print STDERR "URL:$url\n"; 506print STDERR "Server response: ".$response->code." ".$response->message."\n"; 507exit1; 508} 509} 510 511sub get_last_local_revision { 512# Get note regarding last mediawiki revision 513my$note= run_git("notes --ref=$remotename/mediawikishow refs/mediawiki/$remotename/master2>/dev/null"); 514my@note_info=split(/ /,$note); 515 516my$lastrevision_number; 517if(!(defined($note_info[0]) &&$note_info[0]eq"mediawiki_revision:")) { 518print STDERR "No previous mediawiki revision found"; 519$lastrevision_number=0; 520}else{ 521# Notes are formatted : mediawiki_revision: #number 522$lastrevision_number=$note_info[1]; 523chomp($lastrevision_number); 524print STDERR "Last local mediawiki revision found is$lastrevision_number"; 525} 526return$lastrevision_number; 527} 528 529# Remember the timestamp corresponding to a revision id. 530my%basetimestamps; 531 532# Get the last remote revision without taking in account which pages are 533# tracked or not. This function makes a single request to the wiki thus 534# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev 535# option. 536sub get_last_global_remote_rev { 537 mw_connect_maybe(); 538 539my$query= { 540 action =>'query', 541 list =>'recentchanges', 542 prop =>'revisions', 543 rclimit =>'1', 544 rcdir =>'older', 545}; 546my$result=$mediawiki->api($query); 547return$result->{query}->{recentchanges}[0]->{revid}; 548} 549 550# Get the last remote revision concerning the tracked pages and the tracked 551# categories. 552sub get_last_remote_revision { 553 mw_connect_maybe(); 554 555my%pages_hash= get_mw_pages(); 556my@pages=values(%pages_hash); 557 558my$max_rev_num=0; 559 560print STDERR "Getting last revision id on tracked pages...\n"; 561 562foreachmy$page(@pages) { 563my$id=$page->{pageid}; 564 565my$query= { 566 action =>'query', 567 prop =>'revisions', 568 rvprop =>'ids|timestamp', 569 pageids =>$id, 570}; 571 572my$result=$mediawiki->api($query); 573 574my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 575 576$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 577 578$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 579} 580 581print STDERR "Last remote revision found is$max_rev_num.\n"; 582return$max_rev_num; 583} 584 585# Clean content before sending it to MediaWiki 586sub mediawiki_clean { 587my$string=shift; 588my$page_created=shift; 589# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 590# This function right trims a string and adds a \n at the end to follow this rule 591$string=~s/\s+$//; 592if($stringeq""&&$page_created) { 593# Creating empty pages is forbidden. 594$string= EMPTY_CONTENT; 595} 596return$string."\n"; 597} 598 599# Filter applied on MediaWiki data before adding them to Git 600sub mediawiki_smudge { 601my$string=shift; 602if($stringeq EMPTY_CONTENT) { 603$string=""; 604} 605# This \n is important. This is due to mediawiki's way to handle end of files. 606return$string."\n"; 607} 608 609sub mediawiki_clean_filename { 610my$filename=shift; 611$filename=~s/@{[SLASH_REPLACEMENT]}/\//g; 612# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 613# Do a variant of URL-encoding, i.e. looks like URL-encoding, 614# but with _ added to prevent MediaWiki from thinking this is 615# an actual special character. 616$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 617# If we use the uri escape before 618# we should unescape here, before anything 619 620return$filename; 621} 622 623sub mediawiki_smudge_filename { 624my$filename=shift; 625$filename=~s/\//@{[SLASH_REPLACEMENT]}/g; 626$filename=~s/ /_/g; 627# Decode forbidden characters encoded in mediawiki_clean_filename 628$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; 629return$filename; 630} 631 632sub literal_data { 633my($content) =@_; 634print STDOUT "data ", bytes::length($content),"\n",$content; 635} 636 637sub literal_data_raw { 638# Output possibly binary content. 639my($content) =@_; 640# Avoid confusion between size in bytes and in characters 641 utf8::downgrade($content); 642binmode STDOUT,":raw"; 643print STDOUT "data ", bytes::length($content),"\n",$content; 644binmode STDOUT,":utf8"; 645} 646 647sub mw_capabilities { 648# Revisions are imported to the private namespace 649# refs/mediawiki/$remotename/ by the helper and fetched into 650# refs/remotes/$remotename later by fetch. 651print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; 652print STDOUT "import\n"; 653print STDOUT "list\n"; 654print STDOUT "push\n"; 655print STDOUT "\n"; 656} 657 658sub mw_list { 659# MediaWiki do not have branches, we consider one branch arbitrarily 660# called master, and HEAD pointing to it. 661print STDOUT "? refs/heads/master\n"; 662print STDOUT "\@refs/heads/masterHEAD\n"; 663print STDOUT "\n"; 664} 665 666sub mw_option { 667print STDERR "remote-helper command 'option$_[0]' not yet implemented\n"; 668print STDOUT "unsupported\n"; 669} 670 671sub fetch_mw_revisions_for_page { 672my$page=shift; 673my$id=shift; 674my$fetch_from=shift; 675my@page_revs= (); 676my$query= { 677 action =>'query', 678 prop =>'revisions', 679 rvprop =>'ids', 680 rvdir =>'newer', 681 rvstartid =>$fetch_from, 682 rvlimit =>500, 683 pageids =>$id, 684}; 685 686my$revnum=0; 687# Get 500 revisions at a time due to the mediawiki api limit 688while(1) { 689my$result=$mediawiki->api($query); 690 691# Parse each of those 500 revisions 692foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 693my$page_rev_ids; 694$page_rev_ids->{pageid} =$page->{pageid}; 695$page_rev_ids->{revid} =$revision->{revid}; 696push(@page_revs,$page_rev_ids); 697$revnum++; 698} 699last unless$result->{'query-continue'}; 700$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 701} 702if($shallow_import&&@page_revs) { 703print STDERR " Found 1 revision (shallow import).\n"; 704@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 705return$page_revs[0]; 706} 707print STDERR " Found ",$revnum," revision(s).\n"; 708return@page_revs; 709} 710 711sub fetch_mw_revisions { 712my$pages=shift;my@pages= @{$pages}; 713my$fetch_from=shift; 714 715my@revisions= (); 716my$n=1; 717foreachmy$page(@pages) { 718my$id=$page->{pageid}; 719 720print STDERR "page$n/",scalar(@pages),": ".$page->{title} ."\n"; 721$n++; 722my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 723@revisions= (@page_revs,@revisions); 724} 725 726return($n,@revisions); 727} 728 729sub fe_escape_path { 730my$path=shift; 731$path=~s/\\/\\\\/g; 732$path=~s/"/\\"/g; 733$path=~s/\n/\\n/g; 734return'"'.$path.'"'; 735} 736 737sub import_file_revision { 738my$commit=shift; 739my%commit= %{$commit}; 740my$full_import=shift; 741my$n=shift; 742my$mediafile=shift; 743my%mediafile; 744if($mediafile) { 745%mediafile= %{$mediafile}; 746} 747 748my$title=$commit{title}; 749my$comment=$commit{comment}; 750my$content=$commit{content}; 751my$author=$commit{author}; 752my$date=$commit{date}; 753 754print STDOUT "commit refs/mediawiki/$remotename/master\n"; 755print STDOUT "mark :$n\n"; 756print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 757 literal_data($comment); 758 759# If it's not a clone, we need to know where to start from 760if(!$full_import&&$n==1) { 761print STDOUT "from refs/mediawiki/$remotename/master^0\n"; 762} 763if($contentne DELETED_CONTENT) { 764print STDOUT "M 644 inline ". 765 fe_escape_path($title.".mw") ."\n"; 766 literal_data($content); 767if(%mediafile) { 768print STDOUT "M 644 inline " 769. fe_escape_path($mediafile{title}) ."\n"; 770 literal_data_raw($mediafile{content}); 771} 772print STDOUT "\n\n"; 773}else{ 774print STDOUT "D ". fe_escape_path($title.".mw") ."\n"; 775} 776 777# mediawiki revision number in the git note 778if($full_import&&$n==1) { 779print STDOUT "reset refs/notes/$remotename/mediawiki\n"; 780} 781print STDOUT "commit refs/notes/$remotename/mediawiki\n"; 782print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 783 literal_data("Note added by git-mediawiki during import"); 784if(!$full_import&&$n==1) { 785print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; 786} 787print STDOUT "N inline :$n\n"; 788 literal_data("mediawiki_revision: ".$commit{mw_revision}); 789print STDOUT "\n\n"; 790} 791 792# parse a sequence of 793# <cmd> <arg1> 794# <cmd> <arg2> 795# \n 796# (like batch sequence of import and sequence of push statements) 797sub get_more_refs { 798my$cmd=shift; 799my@refs; 800while(1) { 801my$line= <STDIN>; 802if($line=~m/^$cmd (.*)$/) { 803push(@refs,$1); 804}elsif($lineeq"\n") { 805return@refs; 806}else{ 807die("Invalid command in a '$cmd' batch: ".$_); 808} 809} 810} 811 812sub mw_import { 813# multiple import commands can follow each other. 814my@refs= (shift, get_more_refs("import")); 815foreachmy$ref(@refs) { 816 mw_import_ref($ref); 817} 818print STDOUT "done\n"; 819} 820 821sub mw_import_ref { 822my$ref=shift; 823# The remote helper will call "import HEAD" and 824# "import refs/heads/master". 825# Since HEAD is a symbolic ref to master (by convention, 826# followed by the output of the command "list" that we gave), 827# we don't need to do anything in this case. 828if($refeq"HEAD") { 829return; 830} 831 832 mw_connect_maybe(); 833 834print STDERR "Searching revisions...\n"; 835my$last_local= get_last_local_revision(); 836my$fetch_from=$last_local+1; 837if($fetch_from==1) { 838print STDERR ", fetching from beginning.\n"; 839}else{ 840print STDERR ", fetching from here.\n"; 841} 842 843my$n=0; 844if($fetch_strategyeq"by_rev") { 845print STDERR "Fetching & writing export data by revs...\n"; 846$n= mw_import_ref_by_revs($fetch_from); 847}elsif($fetch_strategyeq"by_page") { 848print STDERR "Fetching & writing export data by pages...\n"; 849$n= mw_import_ref_by_pages($fetch_from); 850}else{ 851print STDERR "fatal: invalid fetch strategy\"$fetch_strategy\".\n"; 852print STDERR "Check your configuration variables remote.$remotename.fetchStrategy and mediawiki.fetchStrategy\n"; 853exit1; 854} 855 856if($fetch_from==1&&$n==0) { 857print STDERR "You appear to have cloned an empty MediaWiki.\n"; 858# Something has to be done remote-helper side. If nothing is done, an error is 859# thrown saying that HEAD is referring to unknown object 0000000000000000000 860# and the clone fails. 861} 862} 863 864sub mw_import_ref_by_pages { 865 866my$fetch_from=shift; 867my%pages_hash= get_mw_pages(); 868my@pages=values(%pages_hash); 869 870my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 871 872@revisions=sort{$a->{revid} <=>$b->{revid}}@revisions; 873my@revision_ids=map$_->{revid},@revisions; 874 875return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 876} 877 878sub mw_import_ref_by_revs { 879 880my$fetch_from=shift; 881my%pages_hash= get_mw_pages(); 882 883my$last_remote= get_last_global_remote_rev(); 884my@revision_ids=$fetch_from..$last_remote; 885return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 886} 887 888# Import revisions given in second argument (array of integers). 889# Only pages appearing in the third argument (hash indexed by page titles) 890# will be imported. 891sub mw_import_revids { 892my$fetch_from=shift; 893my$revision_ids=shift; 894my$pages=shift; 895 896my$n=0; 897my$n_actual=0; 898my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 899 900foreachmy$pagerevid(@$revision_ids) { 901# Count page even if we skip it, since we display 902# $n/$total and $total includes skipped pages. 903$n++; 904 905# fetch the content of the pages 906my$query= { 907 action =>'query', 908 prop =>'revisions', 909 rvprop =>'content|timestamp|comment|user|ids', 910 revids =>$pagerevid, 911}; 912 913my$result=$mediawiki->api($query); 914 915if(!$result) { 916die"Failed to retrieve modified page for revision$pagerevid"; 917} 918 919if(defined($result->{query}->{badrevids}->{$pagerevid})) { 920# The revision id does not exist on the remote wiki. 921next; 922} 923 924if(!defined($result->{query}->{pages})) { 925die"Invalid revision$pagerevid."; 926} 927 928my@result_pages=values(%{$result->{query}->{pages}}); 929my$result_page=$result_pages[0]; 930my$rev=$result_pages[0]->{revisions}->[0]; 931 932my$page_title=$result_page->{title}; 933 934if(!exists($pages->{$page_title})) { 935print STDERR "$n/",scalar(@$revision_ids), 936": Skipping revision #$rev->{revid} of$page_title\n"; 937next; 938} 939 940$n_actual++; 941 942my%commit; 943$commit{author} =$rev->{user} ||'Anonymous'; 944$commit{comment} =$rev->{comment} || EMPTY_MESSAGE; 945$commit{title} = mediawiki_smudge_filename($page_title); 946$commit{mw_revision} =$rev->{revid}; 947$commit{content} = mediawiki_smudge($rev->{'*'}); 948 949if(!defined($rev->{timestamp})) { 950$last_timestamp++; 951}else{ 952$last_timestamp=$rev->{timestamp}; 953} 954$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 955 956# Differentiates classic pages and media files. 957my($namespace,$filename) =$page_title=~/^([^:]*):(.*)$/; 958my%mediafile; 959if($namespace) { 960my$id= get_mw_namespace_id($namespace); 961if($id&&$id== get_mw_namespace_id("File")) { 962%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 963} 964} 965# If this is a revision of the media page for new version 966# of a file do one common commit for both file and media page. 967# Else do commit only for that page. 968print STDERR "$n/",scalar(@$revision_ids),": Revision #$rev->{revid} of$commit{title}\n"; 969 import_file_revision(\%commit, ($fetch_from==1),$n_actual, \%mediafile); 970} 971 972return$n_actual; 973} 974 975sub error_non_fast_forward { 976my$advice= run_git("config --bool advice.pushNonFastForward"); 977chomp($advice); 978if($advicene"false") { 979# Native git-push would show this after the summary. 980# We can't ask it to display it cleanly, so print it 981# ourselves before. 982print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; 983print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 984print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; 985} 986print STDOUT "error$_[0]\"non-fast-forward\"\n"; 987return0; 988} 989 990sub mw_upload_file { 991my$complete_file_name=shift; 992my$new_sha1=shift; 993my$extension=shift; 994my$file_deleted=shift; 995my$summary=shift; 996my$newrevid; 997my$path="File:".$complete_file_name; 998my%hashFiles= get_allowed_file_extensions(); 999if(!exists($hashFiles{$extension})) {1000print STDERR "$complete_file_nameis not a permitted file on this wiki.\n";1001print STDERR "Check the configuration of file uploads in your mediawiki.\n";1002return$newrevid;1003}1004# Deleting and uploading a file requires a priviledged user1005if($file_deleted) {1006 mw_connect_maybe();1007my$query= {1008 action =>'delete',1009 title =>$path,1010 reason =>$summary1011};1012if(!$mediawiki->edit($query)) {1013print STDERR "Failed to delete file on remote wiki\n";1014print STDERR "Check your permissions on the remote site. Error code:\n";1015print STDERR $mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details};1016exit1;1017}1018}else{1019# Don't let perl try to interpret file content as UTF-8 => use "raw"1020my$content= run_git("cat-file blob$new_sha1","raw");1021if($contentne"") {1022 mw_connect_maybe();1023$mediawiki->{config}->{upload_url} =1024"$url/index.php/Special:Upload";1025$mediawiki->edit({1026 action =>'upload',1027 filename =>$complete_file_name,1028 comment =>$summary,1029 file => [undef,1030$complete_file_name,1031 Content =>$content],1032 ignorewarnings =>1,1033}, {1034 skip_encoding =>11035} ) ||die$mediawiki->{error}->{code} .':'1036.$mediawiki->{error}->{details};1037my$last_file_page=$mediawiki->get_page({title =>$path});1038$newrevid=$last_file_page->{revid};1039print STDERR "Pushed file:$new_sha1-$complete_file_name.\n";1040}else{1041print STDERR "Empty file$complete_file_namenot pushed.\n";1042}1043}1044return$newrevid;1045}10461047sub mw_push_file {1048my$diff_info=shift;1049# $diff_info contains a string in this format:1050# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status>1051my@diff_info_split=split(/[ \t]/,$diff_info);10521053# Filename, including .mw extension1054my$complete_file_name=shift;1055# Commit message1056my$summary=shift;1057# MediaWiki revision number. Keep the previous one by default,1058# in case there's no edit to perform.1059my$oldrevid=shift;1060my$newrevid;10611062if($summaryeq EMPTY_MESSAGE) {1063$summary='';1064}10651066my$new_sha1=$diff_info_split[3];1067my$old_sha1=$diff_info_split[2];1068my$page_created= ($old_sha1eq NULL_SHA1);1069my$page_deleted= ($new_sha1eq NULL_SHA1);1070$complete_file_name= mediawiki_clean_filename($complete_file_name);10711072my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/;1073if(!defined($extension)) {1074$extension="";1075}1076if($extensioneq"mw") {1077my$ns= get_mw_namespace_id_for_page($complete_file_name);1078if($ns&&$ns== get_mw_namespace_id("File") && (!$export_media)) {1079print STDERR "Ignoring media file related page:$complete_file_name\n";1080return($oldrevid,"ok");1081}1082my$file_content;1083if($page_deleted) {1084# Deleting a page usually requires1085# special privileges. A common1086# convention is to replace the page1087# with this content instead:1088$file_content= DELETED_CONTENT;1089}else{1090$file_content= run_git("cat-file blob$new_sha1");1091}10921093 mw_connect_maybe();10941095my$result=$mediawiki->edit( {1096 action =>'edit',1097 summary =>$summary,1098 title =>$title,1099 basetimestamp =>$basetimestamps{$oldrevid},1100 text => mediawiki_clean($file_content,$page_created),1101}, {1102 skip_encoding =>1# Helps with names with accentuated characters1103});1104if(!$result) {1105if($mediawiki->{error}->{code} ==3) {1106# edit conflicts, considered as non-fast-forward1107print STDERR 'Warning: Error '.1108$mediawiki->{error}->{code} .1109' from mediwiki: '.$mediawiki->{error}->{details} .1110".\n";1111return($oldrevid,"non-fast-forward");1112}else{1113# Other errors. Shouldn't happen => just die()1114die'Fatal: Error '.1115$mediawiki->{error}->{code} .1116' from mediwiki: '.$mediawiki->{error}->{details};1117}1118}1119$newrevid=$result->{edit}->{newrevid};1120print STDERR "Pushed file:$new_sha1-$title\n";1121}elsif($export_media) {1122$newrevid= mw_upload_file($complete_file_name,$new_sha1,1123$extension,$page_deleted,1124$summary);1125}else{1126print STDERR "Ignoring media file$title\n";1127}1128$newrevid= ($newrevidor$oldrevid);1129return($newrevid,"ok");1130}11311132sub mw_push {1133# multiple push statements can follow each other1134my@refsspecs= (shift, get_more_refs("push"));1135my$pushed;1136formy$refspec(@refsspecs) {1137my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1138or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>");1139if($force) {1140print STDERR "Warning: forced push not allowed on a MediaWiki.\n";1141}1142if($localeq"") {1143print STDERR "Cannot delete remote branch on a MediaWiki\n";1144print STDOUT "error$remotecannot delete\n";1145next;1146}1147if($remotene"refs/heads/master") {1148print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n";1149print STDOUT "error$remoteonly master allowed\n";1150next;1151}1152if(mw_push_revision($local,$remote)) {1153$pushed=1;1154}1155}11561157# Notify Git that the push is done1158print STDOUT "\n";11591160if($pushed&&$dumb_push) {1161print STDERR "Just pushed some revisions to MediaWiki.\n";1162print STDERR "The pushed revisions now have to be re-imported, and your current branch\n";1163print STDERR "needs to be updated with these re-imported commits. You can do this with\n";1164print STDERR "\n";1165print STDERR " git pull --rebase\n";1166print STDERR "\n";1167}1168}11691170sub mw_push_revision {1171my$local=shift;1172my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1173my$last_local_revid= get_last_local_revision();1174print STDERR ".\n";# Finish sentence started by get_last_local_revision()1175my$last_remote_revid= get_last_remote_revision();1176my$mw_revision=$last_remote_revid;11771178# Get sha1 of commit pointed by local HEAD1179my$HEAD_sha1= run_git("rev-parse$local2>/dev/null");chomp($HEAD_sha1);1180# Get sha1 of commit pointed by remotes/$remotename/master1181my$remoteorigin_sha1= run_git("rev-parse refs/remotes/$remotename/master2>/dev/null");1182chomp($remoteorigin_sha1);11831184if($last_local_revid>0&&1185$last_local_revid<$last_remote_revid) {1186return error_non_fast_forward($remote);1187}11881189if($HEAD_sha1eq$remoteorigin_sha1) {1190# nothing to push1191return0;1192}11931194# Get every commit in between HEAD and refs/remotes/origin/master,1195# including HEAD and refs/remotes/origin/master1196my@commit_pairs= ();1197if($last_local_revid>0) {1198my$parsed_sha1=$remoteorigin_sha1;1199# Find a path from last MediaWiki commit to pushed commit1200print STDERR "Computing path from local to remote ...\n";1201my@local_ancestry=split(/\n/, run_git("rev-list --boundary --parents$local^$parsed_sha1"));1202my%local_ancestry;1203foreachmy$line(@local_ancestry) {1204if(my($child,$parents) =$line=~m/^-?([a-f0-9]+) ([a-f0-9 ]+)/) {1205foreachmy$parent(split(' ',$parents)) {1206$local_ancestry{$parent} =$child;1207}1208}elsif(!$line=~m/^([a-f0-9]+)/) {1209die"Unexpected output from git rev-list:$line";1210}1211}1212while($parsed_sha1ne$HEAD_sha1) {1213my$child=$local_ancestry{$parsed_sha1};1214if(!$child) {1215printf STDERR "Cannot find a path in history from remote commit to last commit\n";1216return error_non_fast_forward($remote);1217}1218push(@commit_pairs, [$parsed_sha1,$child]);1219$parsed_sha1=$child;1220}1221}else{1222# No remote mediawiki revision. Export the whole1223# history (linearized with --first-parent)1224print STDERR "Warning: no common ancestor, pushing complete history\n";1225my$history= run_git("rev-list --first-parent --children$local");1226my@history=split('\n',$history);1227@history=@history[1..$#history];1228foreachmy$line(reverse@history) {1229my@commit_info_split=split(/ |\n/,$line);1230push(@commit_pairs, \@commit_info_split);1231}1232}12331234foreachmy$commit_info_split(@commit_pairs) {1235my$sha1_child= @{$commit_info_split}[0];1236my$sha1_commit= @{$commit_info_split}[1];1237my$diff_infos= run_git("diff-tree -r --raw -z$sha1_child$sha1_commit");1238# TODO: we could detect rename, and encode them with a #redirect on the wiki.1239# TODO: for now, it's just a delete+add1240my@diff_info_list=split(/\0/,$diff_infos);1241# Keep the subject line of the commit message as mediawiki comment for the revision1242my$commit_msg= run_git("log --no-walk --format=\"%s\"$sha1_commit");1243chomp($commit_msg);1244# Push every blob1245while(@diff_info_list) {1246my$status;1247# git diff-tree -z gives an output like1248# <metadata>\0<filename1>\01249# <metadata>\0<filename2>\01250# and we've split on \0.1251my$info=shift(@diff_info_list);1252my$file=shift(@diff_info_list);1253($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1254if($statuseq"non-fast-forward") {1255# we may already have sent part of the1256# commit to MediaWiki, but it's too1257# late to cancel it. Stop the push in1258# the middle, but still give an1259# accurate error message.1260return error_non_fast_forward($remote);1261}1262if($statusne"ok") {1263die("Unknown error from mw_push_file()");1264}1265}1266unless($dumb_push) {1267 run_git("notes --ref=$remotename/mediawikiadd -f -m\"mediawiki_revision:$mw_revision\"$sha1_commit");1268 run_git("update-ref -m\"Git-MediaWiki push\"refs/mediawiki/$remotename/master$sha1_commit$sha1_child");1269}1270}12711272print STDOUT "ok$remote\n";1273return1;1274}12751276sub get_allowed_file_extensions {1277 mw_connect_maybe();12781279my$query= {1280 action =>'query',1281 meta =>'siteinfo',1282 siprop =>'fileextensions'1283};1284my$result=$mediawiki->api($query);1285my@file_extensions=map$_->{ext},@{$result->{query}->{fileextensions}};1286my%hashFile=map{$_=>1}@file_extensions;12871288return%hashFile;1289}12901291# In memory cache for MediaWiki namespace ids.1292my%namespace_id;12931294# Namespaces whose id is cached in the configuration file1295# (to avoid duplicates)1296my%cached_mw_namespace_id;12971298# Return MediaWiki id for a canonical namespace name.1299# Ex.: "File", "Project".1300sub get_mw_namespace_id {1301 mw_connect_maybe();1302my$name=shift;13031304if(!exists$namespace_id{$name}) {1305# Look at configuration file, if the record for that namespace is1306# already cached. Namespaces are stored in form:1307# "Name_of_namespace:Id_namespace", ex.: "File:6".1308my@temp=split(/[\n]/, run_git("config --get-all remote."1309.$remotename.".namespaceCache"));1310chomp(@temp);1311foreachmy$ns(@temp) {1312my($n,$id) =split(/:/,$ns);1313if($ideq'notANameSpace') {1314$namespace_id{$n} = {is_namespace =>0};1315}else{1316$namespace_id{$n} = {is_namespace =>1, id =>$id};1317}1318$cached_mw_namespace_id{$n} =1;1319}1320}13211322if(!exists$namespace_id{$name}) {1323print STDERR "Namespace$namenot found in cache, querying the wiki ...\n";1324# NS not found => get namespace id from MW and store it in1325# configuration file.1326my$query= {1327 action =>'query',1328 meta =>'siteinfo',1329 siprop =>'namespaces'1330};1331my$result=$mediawiki->api($query);13321333while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1334if(defined($ns->{id}) &&defined($ns->{canonical})) {1335$namespace_id{$ns->{canonical}} = {is_namespace =>1, id =>$ns->{id}};1336if($ns->{'*'}) {1337# alias (e.g. french Fichier: as alias for canonical File:)1338$namespace_id{$ns->{'*'}} = {is_namespace =>1, id =>$ns->{id}};1339}1340}1341}1342}13431344my$ns=$namespace_id{$name};1345my$id;13461347unless(defined$ns) {1348print STDERR "No such namespace$nameon MediaWiki.\n";1349$ns= {is_namespace =>0};1350$namespace_id{$name} =$ns;1351}13521353if($ns->{is_namespace}) {1354$id=$ns->{id};1355}13561357# Store "notANameSpace" as special value for inexisting namespaces1358my$store_id= ($id||'notANameSpace');13591360# Store explicitely requested namespaces on disk1361if(!exists$cached_mw_namespace_id{$name}) {1362 run_git("config --add remote.".$remotename1363.".namespaceCache\"".$name.":".$store_id."\"");1364$cached_mw_namespace_id{$name} =1;1365}1366return$id;1367}13681369sub get_mw_namespace_id_for_page {1370if(my($namespace) =$_[0] =~/^([^:]*):/) {1371return get_mw_namespace_id($namespace);1372}else{1373return;1374}1375}