1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# https://github.com/Bibzball/Git-Mediawiki/wiki 13# 14# Known limitations: 15# 16# - Poor performance in the best case: it takes forever to check 17# whether we're up-to-date (on fetch or push) or to fetch a few 18# revisions from a large wiki, because we use exclusively a 19# page-based synchronization. We could switch to a wiki-wide 20# synchronization when the synchronization involves few revisions 21# but the wiki is large. 22# 23# - Git renames could be turned into MediaWiki renames (see TODO 24# below) 25# 26# - login/password support requires the user to write the password 27# cleartext in a file (see TODO below). 28# 29# - No way to import "one page, and all pages included in it" 30# 31# - Multiple remote MediaWikis have not been very well tested. 32 33use strict; 34use MediaWiki::API; 35use DateTime::Format::ISO8601; 36use FileHandle; 37 38# By default, use UTF-8 to communicate with Git and the user 39binmode STDERR,":utf8"; 40binmode STDOUT,":utf8"; 41 42use URI::Escape; 43use IPC::Open2; 44 45use warnings; 46 47# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 48useconstant SLASH_REPLACEMENT =>"%2F"; 49 50# It's not always possible to delete pages (may require some 51# priviledges). Deleted pages are replaced with this content. 52useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 53 54# It's not possible to create empty pages. New empty files in Git are 55# sent with this content instead. 56useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 57 58# used to reflect file creation or deletion in diff. 59useconstant NULL_SHA1 =>"0000000000000000000000000000000000000000"; 60 61my$remotename=$ARGV[0]; 62my$url=$ARGV[1]; 63 64# Accept both space-separated and multiple keys in config file. 65# Spaces should be written as _ anyway because we'll use chomp. 66my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".pages")); 67chomp(@tracked_pages); 68 69# Just like @tracked_pages, but for MediaWiki categories. 70my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".categories")); 71chomp(@tracked_categories); 72 73# Import media files too. 74my$import_media= run_git("config --get --bool remote.".$remotename.".mediaimport"); 75chomp($import_media); 76$import_media= ($import_mediaeq"true"); 77 78my$wiki_login= run_git("config --get remote.".$remotename.".mwLogin"); 79# TODO: ideally, this should be able to read from keyboard, but we're 80# inside a remote helper, so our stdin is connect to git, not to a 81# terminal. 82my$wiki_passwd= run_git("config --get remote.".$remotename.".mwPassword"); 83my$wiki_domain= run_git("config --get remote.".$remotename.".mwDomain"); 84chomp($wiki_login); 85chomp($wiki_passwd); 86chomp($wiki_domain); 87 88# Import only last revisions (both for clone and fetch) 89my$shallow_import= run_git("config --get --bool remote.".$remotename.".shallow"); 90chomp($shallow_import); 91$shallow_import= ($shallow_importeq"true"); 92 93# Cache for MediaWiki namespace ids. 94my%namespace_id; 95 96# Dumb push: don't update notes and mediawiki ref to reflect the last push. 97# 98# Configurable with mediawiki.dumbPush, or per-remote with 99# remote.<remotename>.dumbPush. 100# 101# This means the user will have to re-import the just-pushed 102# revisions. On the other hand, this means that the Git revisions 103# corresponding to MediaWiki revisions are all imported from the wiki, 104# regardless of whether they were initially created in Git or from the 105# web interface, hence all users will get the same history (i.e. if 106# the push from Git to MediaWiki loses some information, everybody 107# will get the history with information lost). If the import is 108# deterministic, this means everybody gets the same sha1 for each 109# MediaWiki revision. 110my$dumb_push= run_git("config --get --bool remote.$remotename.dumbPush"); 111unless($dumb_push) { 112$dumb_push= run_git("config --get --bool mediawiki.dumbPush"); 113} 114chomp($dumb_push); 115$dumb_push= ($dumb_pusheq"true"); 116 117my$wiki_name=$url; 118$wiki_name=~s/[^\/]*:\/\///; 119# If URL is like http://user:password@example.com/, we clearly don't 120# want the password in $wiki_name. While we're there, also remove user 121# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 122$wiki_name=~s/^.*@//; 123 124# Commands parser 125my$entry; 126my@cmd; 127while(<STDIN>) { 128chomp; 129@cmd=split(/ /); 130if(defined($cmd[0])) { 131# Line not blank 132if($cmd[0]eq"capabilities") { 133die("Too many arguments for capabilities")unless(!defined($cmd[1])); 134 mw_capabilities(); 135}elsif($cmd[0]eq"list") { 136die("Too many arguments for list")unless(!defined($cmd[2])); 137 mw_list($cmd[1]); 138}elsif($cmd[0]eq"import") { 139die("Invalid arguments for import")unless($cmd[1]ne""&& !defined($cmd[2])); 140 mw_import($cmd[1]); 141}elsif($cmd[0]eq"option") { 142die("Too many arguments for option")unless($cmd[1]ne""&&$cmd[2]ne""&& !defined($cmd[3])); 143 mw_option($cmd[1],$cmd[2]); 144}elsif($cmd[0]eq"push") { 145 mw_push($cmd[1]); 146}else{ 147print STDERR "Unknown command. Aborting...\n"; 148last; 149} 150}else{ 151# blank line: we should terminate 152last; 153} 154 155BEGIN{ $| =1}# flush STDOUT, to make sure the previous 156# command is fully processed. 157} 158 159########################## Functions ############################## 160 161## credential API management (generic functions) 162 163sub credential_from_url { 164my$url=shift; 165my$parsed= URI->new($url); 166my%credential; 167 168if($parsed->scheme) { 169$credential{protocol} =$parsed->scheme; 170} 171if($parsed->host) { 172$credential{host} =$parsed->host; 173} 174if($parsed->path) { 175$credential{path} =$parsed->path; 176} 177if($parsed->userinfo) { 178if($parsed->userinfo=~/([^:]*):(.*)/) { 179$credential{username} =$1; 180$credential{password} =$2; 181}else{ 182$credential{username} =$parsed->userinfo; 183} 184} 185 186return%credential; 187} 188 189sub credential_read { 190my%credential; 191my$reader=shift; 192my$op=shift; 193while(<$reader>) { 194my($key,$value) =/([^=]*)=(.*)/; 195if(not defined$key) { 196die"ERROR receiving response from git credential$op:\n$_\n"; 197} 198$credential{$key} =$value; 199} 200return%credential; 201} 202 203sub credential_write { 204my$credential=shift; 205my$writer=shift; 206while(my($key,$value) =each(%$credential) ) { 207if($value) { 208print$writer"$key=$value\n"; 209} 210} 211} 212 213sub credential_run { 214my$op=shift; 215my$credential=shift; 216my$pid= open2(my$reader,my$writer,"git credential$op"); 217 credential_write($credential,$writer); 218print$writer"\n"; 219close($writer); 220 221if($opeq"fill") { 222%$credential= credential_read($reader,$op); 223}else{ 224if(<$reader>) { 225die"ERROR while running git credential$op:\n$_"; 226} 227} 228close($reader); 229waitpid($pid,0); 230my$child_exit_status=$?>>8; 231if($child_exit_status!=0) { 232die"'git credential$op' failed with code$child_exit_status."; 233} 234} 235 236# MediaWiki API instance, created lazily. 237my$mediawiki; 238 239sub mw_connect_maybe { 240if($mediawiki) { 241return; 242} 243$mediawiki= MediaWiki::API->new; 244$mediawiki->{config}->{api_url} ="$url/api.php"; 245if($wiki_login) { 246my%credential= credential_from_url($url); 247$credential{username} =$wiki_login; 248$credential{password} =$wiki_passwd; 249 credential_run("fill", \%credential); 250my$request= {lgname =>$credential{username}, 251 lgpassword =>$credential{password}, 252 lgdomain =>$wiki_domain}; 253if($mediawiki->login($request)) { 254 credential_run("approve", \%credential); 255print STDERR "Logged in mediawiki user\"$credential{username}\".\n"; 256}else{ 257print STDERR "Failed to log in mediawiki user\"$credential{username}\"on$url\n"; 258print STDERR " (error ". 259$mediawiki->{error}->{code} .': '. 260$mediawiki->{error}->{details} .")\n"; 261 credential_run("reject", \%credential); 262exit1; 263} 264} 265} 266 267## Functions for listing pages on the remote wiki 268sub get_mw_tracked_pages { 269my$pages=shift; 270my@some_pages=@tracked_pages; 271while(@some_pages) { 272my$last=50; 273if($#some_pages<$last) { 274$last=$#some_pages; 275} 276my@slice=@some_pages[0..$last]; 277 get_mw_first_pages(\@slice,$pages); 278@some_pages=@some_pages[51..$#some_pages]; 279} 280} 281 282sub get_mw_tracked_categories { 283my$pages=shift; 284foreachmy$category(@tracked_categories) { 285if(index($category,':') <0) { 286# Mediawiki requires the Category 287# prefix, but let's not force the user 288# to specify it. 289$category="Category:".$category; 290} 291my$mw_pages=$mediawiki->list( { 292 action =>'query', 293 list =>'categorymembers', 294 cmtitle =>$category, 295 cmlimit =>'max'} ) 296||die$mediawiki->{error}->{code} .': ' 297.$mediawiki->{error}->{details}; 298foreachmy$page(@{$mw_pages}) { 299$pages->{$page->{title}} =$page; 300} 301} 302} 303 304sub get_mw_all_pages { 305my$pages=shift; 306# No user-provided list, get the list of pages from the API. 307my$mw_pages=$mediawiki->list({ 308 action =>'query', 309 list =>'allpages', 310 aplimit =>'max' 311}); 312if(!defined($mw_pages)) { 313print STDERR "fatal: could not get the list of wiki pages.\n"; 314print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 315print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 316exit1; 317} 318foreachmy$page(@{$mw_pages}) { 319$pages->{$page->{title}} =$page; 320} 321} 322 323# queries the wiki for a set of pages. Meant to be used within a loop 324# querying the wiki for slices of page list. 325sub get_mw_first_pages { 326my$some_pages=shift; 327my@some_pages= @{$some_pages}; 328 329my$pages=shift; 330 331# pattern 'page1|page2|...' required by the API 332my$titles=join('|',@some_pages); 333 334my$mw_pages=$mediawiki->api({ 335 action =>'query', 336 titles =>$titles, 337}); 338if(!defined($mw_pages)) { 339print STDERR "fatal: could not query the list of wiki pages.\n"; 340print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 341print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 342exit1; 343} 344while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 345if($id<0) { 346print STDERR "Warning: page$page->{title} not found on wiki\n"; 347}else{ 348$pages->{$page->{title}} =$page; 349} 350} 351} 352 353# Get the list of pages to be fetched according to configuration. 354sub get_mw_pages { 355 mw_connect_maybe(); 356 357my%pages;# hash on page titles to avoid duplicates 358my$user_defined; 359if(@tracked_pages) { 360$user_defined=1; 361# The user provided a list of pages titles, but we 362# still need to query the API to get the page IDs. 363 get_mw_tracked_pages(\%pages); 364} 365if(@tracked_categories) { 366$user_defined=1; 367 get_mw_tracked_categories(\%pages); 368} 369if(!$user_defined) { 370 get_mw_all_pages(\%pages); 371} 372if($import_media) { 373print STDERR "Getting media files for selected pages...\n"; 374if($user_defined) { 375 get_linked_mediafiles(\%pages); 376}else{ 377 get_all_mediafiles(\%pages); 378} 379} 380returnvalues(%pages); 381} 382 383# usage: $out = run_git("command args"); 384# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 385sub run_git { 386my$args=shift; 387my$encoding= (shift||"encoding(UTF-8)"); 388open(my$git,"-|:$encoding","git ".$args); 389my$res=do{local$/; <$git> }; 390close($git); 391 392return$res; 393} 394 395 396sub get_all_mediafiles { 397my$pages=shift; 398# Attach list of all pages for media files from the API, 399# they are in a different namespace, only one namespace 400# can be queried at the same moment 401my$mw_pages=$mediawiki->list({ 402 action =>'query', 403 list =>'allpages', 404 apnamespace => get_mw_namespace_id("File"), 405 aplimit =>'max' 406}); 407if(!defined($mw_pages)) { 408print STDERR "fatal: could not get the list of pages for media files.\n"; 409print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 410print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 411exit1; 412} 413foreachmy$page(@{$mw_pages}) { 414$pages->{$page->{title}} =$page; 415} 416} 417 418sub get_linked_mediafiles { 419my$pages=shift; 420my@titles=map$_->{title},values(%{$pages}); 421 422# The query is split in small batches because of the MW API limit of 423# the number of links to be returned (500 links max). 424my$batch=10; 425while(@titles) { 426if($#titles<$batch) { 427$batch=$#titles; 428} 429my@slice=@titles[0..$batch]; 430 431# pattern 'page1|page2|...' required by the API 432my$mw_titles=join('|',@slice); 433 434# Media files could be included or linked from 435# a page, get all related 436my$query= { 437 action =>'query', 438 prop =>'links|images', 439 titles =>$mw_titles, 440 plnamespace => get_mw_namespace_id("File"), 441 pllimit =>'max' 442}; 443my$result=$mediawiki->api($query); 444 445while(my($id,$page) =each(%{$result->{query}->{pages}})) { 446my@titles; 447if(defined($page->{links})) { 448my@link_titles=map$_->{title}, @{$page->{links}}; 449push(@titles,@link_titles); 450} 451if(defined($page->{images})) { 452my@image_titles=map$_->{title}, @{$page->{images}}; 453push(@titles,@image_titles); 454} 455if(@titles) { 456 get_mw_first_pages(\@titles, \%{$pages}); 457} 458} 459 460@titles=@titles[($batch+1)..$#titles]; 461} 462} 463 464sub get_mw_mediafile_for_page_revision { 465# Name of the file on Wiki, with the prefix. 466my$mw_filename=shift; 467my$timestamp=shift; 468my%mediafile; 469 470# Search if on MediaWiki exists a media file with given 471# timestamp. In that case download the file. 472my$query= { 473 action =>'query', 474 prop =>'imageinfo', 475 titles =>$mw_filename, 476 iistart =>$timestamp, 477 iiend =>$timestamp, 478 iiprop =>'timestamp|archivename|url', 479 iilimit =>1 480}; 481my$result=$mediawiki->api($query); 482 483my($fileid,$file) =each( %{$result->{query}->{pages}} ); 484# If not defined it means there is no revision of the file for 485# given timestamp. 486if(defined($file->{imageinfo})) { 487# Get real name of media file. 488my$filename; 489if(index($mw_filename,'File:') ==0) { 490$filename=substr$mw_filename,5; 491}else{ 492$filename=substr$mw_filename,6; 493} 494$mediafile{title} =$filename; 495 496my$fileinfo=pop(@{$file->{imageinfo}}); 497$mediafile{timestamp} =$fileinfo->{timestamp}; 498# If this is an old version of the file, the file has to be 499# obtained from the archive. Otherwise it can be downloaded 500# by MediaWiki API download() function. 501if(defined($fileinfo->{archivename})) { 502$mediafile{content} = download_mw_mediafile_from_archive($fileinfo->{url}); 503}else{ 504$mediafile{content} = download_mw_mediafile($mw_filename); 505} 506} 507return%mediafile; 508} 509 510sub download_mw_mediafile_from_archive { 511my$url=shift; 512my$file; 513 514my$ua= LWP::UserAgent->new; 515my$response=$ua->get($url); 516if($response->code) { 517$file=$response->decoded_content; 518}else{ 519print STDERR "Error downloading a file from archive.\n"; 520} 521 522return$file; 523} 524 525sub download_mw_mediafile { 526my$filename=shift; 527 528$mediawiki->{config}->{files_url} =$url; 529 530my$file_content=$mediawiki->download( { title =>$filename} ); 531if(!defined($file_content)) { 532print STDERR "\tFile\'$filename\'could not be downloaded.\n"; 533exit1; 534}elsif($file_contenteq"") { 535print STDERR "\tFile\'$filename\'does not exist on the wiki.\n"; 536exit1; 537}else{ 538return$file_content; 539} 540} 541 542sub get_last_local_revision { 543# Get note regarding last mediawiki revision 544my$note= run_git("notes --ref=$remotename/mediawikishow refs/mediawiki/$remotename/master2>/dev/null"); 545my@note_info=split(/ /,$note); 546 547my$lastrevision_number; 548if(!(defined($note_info[0]) &&$note_info[0]eq"mediawiki_revision:")) { 549print STDERR "No previous mediawiki revision found"; 550$lastrevision_number=0; 551}else{ 552# Notes are formatted : mediawiki_revision: #number 553$lastrevision_number=$note_info[1]; 554chomp($lastrevision_number); 555print STDERR "Last local mediawiki revision found is$lastrevision_number"; 556} 557return$lastrevision_number; 558} 559 560# Remember the timestamp corresponding to a revision id. 561my%basetimestamps; 562 563sub get_last_remote_revision { 564 mw_connect_maybe(); 565 566my@pages= get_mw_pages(); 567 568my$max_rev_num=0; 569 570foreachmy$page(@pages) { 571my$id=$page->{pageid}; 572 573my$query= { 574 action =>'query', 575 prop =>'revisions', 576 rvprop =>'ids|timestamp', 577 pageids =>$id, 578}; 579 580my$result=$mediawiki->api($query); 581 582my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 583 584$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 585 586$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 587} 588 589print STDERR "Last remote revision found is$max_rev_num.\n"; 590return$max_rev_num; 591} 592 593# Clean content before sending it to MediaWiki 594sub mediawiki_clean { 595my$string=shift; 596my$page_created=shift; 597# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 598# This function right trims a string and adds a \n at the end to follow this rule 599$string=~s/\s+$//; 600if($stringeq""&&$page_created) { 601# Creating empty pages is forbidden. 602$string= EMPTY_CONTENT; 603} 604return$string."\n"; 605} 606 607# Filter applied on MediaWiki data before adding them to Git 608sub mediawiki_smudge { 609my$string=shift; 610if($stringeq EMPTY_CONTENT) { 611$string=""; 612} 613# This \n is important. This is due to mediawiki's way to handle end of files. 614return$string."\n"; 615} 616 617sub mediawiki_clean_filename { 618my$filename=shift; 619$filename=~s/@{[SLASH_REPLACEMENT]}/\//g; 620# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 621# Do a variant of URL-encoding, i.e. looks like URL-encoding, 622# but with _ added to prevent MediaWiki from thinking this is 623# an actual special character. 624$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 625# If we use the uri escape before 626# we should unescape here, before anything 627 628return$filename; 629} 630 631sub mediawiki_smudge_filename { 632my$filename=shift; 633$filename=~s/\//@{[SLASH_REPLACEMENT]}/g; 634$filename=~s/ /_/g; 635# Decode forbidden characters encoded in mediawiki_clean_filename 636$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; 637return$filename; 638} 639 640sub literal_data { 641my($content) =@_; 642print STDOUT "data ", bytes::length($content),"\n",$content; 643} 644 645sub literal_data_raw { 646# Output possibly binary content. 647my($content) =@_; 648# Avoid confusion between size in bytes and in characters 649 utf8::downgrade($content); 650binmode STDOUT,":raw"; 651print STDOUT "data ", bytes::length($content),"\n",$content; 652binmode STDOUT,":utf8"; 653} 654 655sub mw_capabilities { 656# Revisions are imported to the private namespace 657# refs/mediawiki/$remotename/ by the helper and fetched into 658# refs/remotes/$remotename later by fetch. 659print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; 660print STDOUT "import\n"; 661print STDOUT "list\n"; 662print STDOUT "push\n"; 663print STDOUT "\n"; 664} 665 666sub mw_list { 667# MediaWiki do not have branches, we consider one branch arbitrarily 668# called master, and HEAD pointing to it. 669print STDOUT "? refs/heads/master\n"; 670print STDOUT "\@refs/heads/masterHEAD\n"; 671print STDOUT "\n"; 672} 673 674sub mw_option { 675print STDERR "remote-helper command 'option$_[0]' not yet implemented\n"; 676print STDOUT "unsupported\n"; 677} 678 679sub fetch_mw_revisions_for_page { 680my$page=shift; 681my$id=shift; 682my$fetch_from=shift; 683my@page_revs= (); 684my$query= { 685 action =>'query', 686 prop =>'revisions', 687 rvprop =>'ids', 688 rvdir =>'newer', 689 rvstartid =>$fetch_from, 690 rvlimit =>500, 691 pageids =>$id, 692}; 693 694my$revnum=0; 695# Get 500 revisions at a time due to the mediawiki api limit 696while(1) { 697my$result=$mediawiki->api($query); 698 699# Parse each of those 500 revisions 700foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 701my$page_rev_ids; 702$page_rev_ids->{pageid} =$page->{pageid}; 703$page_rev_ids->{revid} =$revision->{revid}; 704push(@page_revs,$page_rev_ids); 705$revnum++; 706} 707last unless$result->{'query-continue'}; 708$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 709} 710if($shallow_import&&@page_revs) { 711print STDERR " Found 1 revision (shallow import).\n"; 712@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 713return$page_revs[0]; 714} 715print STDERR " Found ",$revnum," revision(s).\n"; 716return@page_revs; 717} 718 719sub fetch_mw_revisions { 720my$pages=shift;my@pages= @{$pages}; 721my$fetch_from=shift; 722 723my@revisions= (); 724my$n=1; 725foreachmy$page(@pages) { 726my$id=$page->{pageid}; 727 728print STDERR "page$n/",scalar(@pages),": ".$page->{title} ."\n"; 729$n++; 730my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 731@revisions= (@page_revs,@revisions); 732} 733 734return($n,@revisions); 735} 736 737sub import_file_revision { 738my$commit=shift; 739my%commit= %{$commit}; 740my$full_import=shift; 741my$n=shift; 742my$mediafile=shift; 743my%mediafile; 744if($mediafile) { 745%mediafile= %{$mediafile}; 746} 747 748my$title=$commit{title}; 749my$comment=$commit{comment}; 750my$content=$commit{content}; 751my$author=$commit{author}; 752my$date=$commit{date}; 753 754print STDOUT "commit refs/mediawiki/$remotename/master\n"; 755print STDOUT "mark :$n\n"; 756print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 757 literal_data($comment); 758 759# If it's not a clone, we need to know where to start from 760if(!$full_import&&$n==1) { 761print STDOUT "from refs/mediawiki/$remotename/master^0\n"; 762} 763if($contentne DELETED_CONTENT) { 764print STDOUT "M 644 inline$title.mw\n"; 765 literal_data($content); 766if(%mediafile) { 767print STDOUT "M 644 inline$mediafile{title}\n"; 768 literal_data_raw($mediafile{content}); 769} 770print STDOUT "\n\n"; 771}else{ 772print STDOUT "D$title.mw\n"; 773} 774 775# mediawiki revision number in the git note 776if($full_import&&$n==1) { 777print STDOUT "reset refs/notes/$remotename/mediawiki\n"; 778} 779print STDOUT "commit refs/notes/$remotename/mediawiki\n"; 780print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 781 literal_data("Note added by git-mediawiki during import"); 782if(!$full_import&&$n==1) { 783print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; 784} 785print STDOUT "N inline :$n\n"; 786 literal_data("mediawiki_revision: ".$commit{mw_revision}); 787print STDOUT "\n\n"; 788} 789 790# parse a sequence of 791# <cmd> <arg1> 792# <cmd> <arg2> 793# \n 794# (like batch sequence of import and sequence of push statements) 795sub get_more_refs { 796my$cmd=shift; 797my@refs; 798while(1) { 799my$line= <STDIN>; 800if($line=~m/^$cmd (.*)$/) { 801push(@refs,$1); 802}elsif($lineeq"\n") { 803return@refs; 804}else{ 805die("Invalid command in a '$cmd' batch: ".$_); 806} 807} 808} 809 810sub mw_import { 811# multiple import commands can follow each other. 812my@refs= (shift, get_more_refs("import")); 813foreachmy$ref(@refs) { 814 mw_import_ref($ref); 815} 816print STDOUT "done\n"; 817} 818 819sub mw_import_ref { 820my$ref=shift; 821# The remote helper will call "import HEAD" and 822# "import refs/heads/master". 823# Since HEAD is a symbolic ref to master (by convention, 824# followed by the output of the command "list" that we gave), 825# we don't need to do anything in this case. 826if($refeq"HEAD") { 827return; 828} 829 830 mw_connect_maybe(); 831 832my@pages= get_mw_pages(); 833 834print STDERR "Searching revisions...\n"; 835my$last_local= get_last_local_revision(); 836my$fetch_from=$last_local+1; 837if($fetch_from==1) { 838print STDERR ", fetching from beginning.\n"; 839}else{ 840print STDERR ", fetching from here.\n"; 841} 842my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 843 844# Creation of the fast-import stream 845print STDERR "Fetching & writing export data...\n"; 846 847$n=0; 848my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 849 850foreachmy$pagerevid(sort{$a->{revid} <=>$b->{revid}}@revisions) { 851# fetch the content of the pages 852my$query= { 853 action =>'query', 854 prop =>'revisions', 855 rvprop =>'content|timestamp|comment|user|ids', 856 revids =>$pagerevid->{revid}, 857}; 858 859my$result=$mediawiki->api($query); 860 861my$rev=pop(@{$result->{query}->{pages}->{$pagerevid->{pageid}}->{revisions}}); 862 863$n++; 864 865my$page_title=$result->{query}->{pages}->{$pagerevid->{pageid}}->{title}; 866my%commit; 867$commit{author} =$rev->{user} ||'Anonymous'; 868$commit{comment} =$rev->{comment} ||'*Empty MediaWiki Message*'; 869$commit{title} = mediawiki_smudge_filename($page_title); 870$commit{mw_revision} =$pagerevid->{revid}; 871$commit{content} = mediawiki_smudge($rev->{'*'}); 872 873if(!defined($rev->{timestamp})) { 874$last_timestamp++; 875}else{ 876$last_timestamp=$rev->{timestamp}; 877} 878$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 879 880# Differentiates classic pages and media files. 881my@prefix=split(":",$page_title); 882 883my%mediafile; 884if($prefix[0]eq"File"||$prefix[0]eq"Image") { 885# The name of the file is the same as the media page. 886my$filename=$page_title; 887%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 888} 889# If this is a revision of the media page for new version 890# of a file do one common commit for both file and media page. 891# Else do commit only for that page. 892print STDERR "$n/",scalar(@revisions),": Revision #$pagerevid->{revid} of$commit{title}\n"; 893if(%mediafile) { 894print STDERR "\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 895 import_file_revision(\%commit, ($fetch_from==1),$n, \%mediafile); 896}else{ 897 import_file_revision(\%commit, ($fetch_from==1),$n); 898} 899} 900 901if($fetch_from==1&&$n==0) { 902print STDERR "You appear to have cloned an empty MediaWiki.\n"; 903# Something has to be done remote-helper side. If nothing is done, an error is 904# thrown saying that HEAD is refering to unknown object 0000000000000000000 905# and the clone fails. 906} 907} 908 909sub error_non_fast_forward { 910my$advice= run_git("config --bool advice.pushNonFastForward"); 911chomp($advice); 912if($advicene"false") { 913# Native git-push would show this after the summary. 914# We can't ask it to display it cleanly, so print it 915# ourselves before. 916print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; 917print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 918print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; 919} 920print STDOUT "error$_[0]\"non-fast-forward\"\n"; 921return0; 922} 923 924sub mw_upload_file { 925my$complete_file_name=shift; 926my$new_sha1=shift; 927my$extension=shift; 928my$file_deleted=shift; 929my$summary=shift; 930my$newrevid; 931my$path="File:".$complete_file_name; 932my%hashFiles= get_allowed_file_extensions(); 933if(!exists($hashFiles{$extension})) { 934print STDERR "$complete_file_nameis not a permitted file on this wiki.\n"; 935print STDERR "Check the configuration of file uploads in your mediawiki.\n"; 936return$newrevid; 937} 938# Deleting and uploading a file requires a priviledged user 939if($file_deleted) { 940 mw_connect_maybe(); 941my$query= { 942 action =>'delete', 943 title =>$path, 944 reason =>$summary 945}; 946if(!$mediawiki->edit($query)) { 947print STDERR "Failed to delete file on remote wiki\n"; 948print STDERR "Check your permissions on the remote site. Error code:\n"; 949print STDERR $mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details}; 950exit1; 951} 952}else{ 953# Don't let perl try to interpret file content as UTF-8 => use "raw" 954my$content= run_git("cat-file blob$new_sha1","raw"); 955if($contentne"") { 956 mw_connect_maybe(); 957$mediawiki->{config}->{upload_url} = 958"$url/index.php/Special:Upload"; 959$mediawiki->edit({ 960 action =>'upload', 961 filename =>$complete_file_name, 962 comment =>$summary, 963 file => [undef, 964$complete_file_name, 965 Content =>$content], 966 ignorewarnings =>1, 967}, { 968 skip_encoding =>1 969} ) ||die$mediawiki->{error}->{code} .':' 970.$mediawiki->{error}->{details}; 971my$last_file_page=$mediawiki->get_page({title =>$path}); 972$newrevid=$last_file_page->{revid}; 973print STDERR "Pushed file:$new_sha1-$complete_file_name.\n"; 974}else{ 975print STDERR "Empty file$complete_file_namenot pushed.\n"; 976} 977} 978return$newrevid; 979} 980 981sub mw_push_file { 982my$diff_info=shift; 983# $diff_info contains a string in this format: 984# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status> 985my@diff_info_split=split(/[ \t]/,$diff_info); 986 987# Filename, including .mw extension 988my$complete_file_name=shift; 989# Commit message 990my$summary=shift; 991# MediaWiki revision number. Keep the previous one by default, 992# in case there's no edit to perform. 993my$oldrevid=shift; 994my$newrevid; 995 996my$new_sha1=$diff_info_split[3]; 997my$old_sha1=$diff_info_split[2]; 998my$page_created= ($old_sha1eq NULL_SHA1); 999my$page_deleted= ($new_sha1eq NULL_SHA1);1000$complete_file_name= mediawiki_clean_filename($complete_file_name);10011002my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/;1003if(!defined($extension)) {1004$extension="";1005}1006if($extensioneq"mw") {1007my$file_content;1008if($page_deleted) {1009# Deleting a page usually requires1010# special priviledges. A common1011# convention is to replace the page1012# with this content instead:1013$file_content= DELETED_CONTENT;1014}else{1015$file_content= run_git("cat-file blob$new_sha1");1016}10171018 mw_connect_maybe();10191020my$result=$mediawiki->edit( {1021 action =>'edit',1022 summary =>$summary,1023 title =>$title,1024 basetimestamp =>$basetimestamps{$oldrevid},1025 text => mediawiki_clean($file_content,$page_created),1026}, {1027 skip_encoding =>1# Helps with names with accentuated characters1028});1029if(!$result) {1030if($mediawiki->{error}->{code} ==3) {1031# edit conflicts, considered as non-fast-forward1032print STDERR 'Warning: Error '.1033$mediawiki->{error}->{code} .1034' from mediwiki: '.$mediawiki->{error}->{details} .1035".\n";1036return($oldrevid,"non-fast-forward");1037}else{1038# Other errors. Shouldn't happen => just die()1039die'Fatal: Error '.1040$mediawiki->{error}->{code} .1041' from mediwiki: '.$mediawiki->{error}->{details};1042}1043}1044$newrevid=$result->{edit}->{newrevid};1045print STDERR "Pushed file:$new_sha1-$title\n";1046}else{1047$newrevid= mw_upload_file($complete_file_name,$new_sha1,1048$extension,$page_deleted,1049$summary);1050}1051$newrevid= ($newrevidor$oldrevid);1052return($newrevid,"ok");1053}10541055sub mw_push {1056# multiple push statements can follow each other1057my@refsspecs= (shift, get_more_refs("push"));1058my$pushed;1059formy$refspec(@refsspecs) {1060my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1061or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>");1062if($force) {1063print STDERR "Warning: forced push not allowed on a MediaWiki.\n";1064}1065if($localeq"") {1066print STDERR "Cannot delete remote branch on a MediaWiki\n";1067print STDOUT "error$remotecannot delete\n";1068next;1069}1070if($remotene"refs/heads/master") {1071print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n";1072print STDOUT "error$remoteonly master allowed\n";1073next;1074}1075if(mw_push_revision($local,$remote)) {1076$pushed=1;1077}1078}10791080# Notify Git that the push is done1081print STDOUT "\n";10821083if($pushed&&$dumb_push) {1084print STDERR "Just pushed some revisions to MediaWiki.\n";1085print STDERR "The pushed revisions now have to be re-imported, and your current branch\n";1086print STDERR "needs to be updated with these re-imported commits. You can do this with\n";1087print STDERR "\n";1088print STDERR " git pull --rebase\n";1089print STDERR "\n";1090}1091}10921093sub mw_push_revision {1094my$local=shift;1095my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1096my$last_local_revid= get_last_local_revision();1097print STDERR ".\n";# Finish sentence started by get_last_local_revision()1098my$last_remote_revid= get_last_remote_revision();1099my$mw_revision=$last_remote_revid;11001101# Get sha1 of commit pointed by local HEAD1102my$HEAD_sha1= run_git("rev-parse$local2>/dev/null");chomp($HEAD_sha1);1103# Get sha1 of commit pointed by remotes/$remotename/master1104my$remoteorigin_sha1= run_git("rev-parse refs/remotes/$remotename/master2>/dev/null");1105chomp($remoteorigin_sha1);11061107if($last_local_revid>0&&1108$last_local_revid<$last_remote_revid) {1109return error_non_fast_forward($remote);1110}11111112if($HEAD_sha1eq$remoteorigin_sha1) {1113# nothing to push1114return0;1115}11161117# Get every commit in between HEAD and refs/remotes/origin/master,1118# including HEAD and refs/remotes/origin/master1119my@commit_pairs= ();1120if($last_local_revid>0) {1121my$parsed_sha1=$remoteorigin_sha1;1122# Find a path from last MediaWiki commit to pushed commit1123while($parsed_sha1ne$HEAD_sha1) {1124my@commit_info=grep(/^$parsed_sha1/,split(/\n/, run_git("rev-list --children$local")));1125if(!@commit_info) {1126return error_non_fast_forward($remote);1127}1128my@commit_info_split=split(/ |\n/,$commit_info[0]);1129# $commit_info_split[1] is the sha1 of the commit to export1130# $commit_info_split[0] is the sha1 of its direct child1131push(@commit_pairs, \@commit_info_split);1132$parsed_sha1=$commit_info_split[1];1133}1134}else{1135# No remote mediawiki revision. Export the whole1136# history (linearized with --first-parent)1137print STDERR "Warning: no common ancestor, pushing complete history\n";1138my$history= run_git("rev-list --first-parent --children$local");1139my@history=split('\n',$history);1140@history=@history[1..$#history];1141foreachmy$line(reverse@history) {1142my@commit_info_split=split(/ |\n/,$line);1143push(@commit_pairs, \@commit_info_split);1144}1145}11461147foreachmy$commit_info_split(@commit_pairs) {1148my$sha1_child= @{$commit_info_split}[0];1149my$sha1_commit= @{$commit_info_split}[1];1150my$diff_infos= run_git("diff-tree -r --raw -z$sha1_child$sha1_commit");1151# TODO: we could detect rename, and encode them with a #redirect on the wiki.1152# TODO: for now, it's just a delete+add1153my@diff_info_list=split(/\0/,$diff_infos);1154# Keep the subject line of the commit message as mediawiki comment for the revision1155my$commit_msg= run_git("log --no-walk --format=\"%s\"$sha1_commit");1156chomp($commit_msg);1157# Push every blob1158while(@diff_info_list) {1159my$status;1160# git diff-tree -z gives an output like1161# <metadata>\0<filename1>\01162# <metadata>\0<filename2>\01163# and we've split on \0.1164my$info=shift(@diff_info_list);1165my$file=shift(@diff_info_list);1166($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1167if($statuseq"non-fast-forward") {1168# we may already have sent part of the1169# commit to MediaWiki, but it's too1170# late to cancel it. Stop the push in1171# the middle, but still give an1172# accurate error message.1173return error_non_fast_forward($remote);1174}1175if($statusne"ok") {1176die("Unknown error from mw_push_file()");1177}1178}1179unless($dumb_push) {1180 run_git("notes --ref=$remotename/mediawikiadd -m\"mediawiki_revision:$mw_revision\"$sha1_commit");1181 run_git("update-ref -m\"Git-MediaWiki push\"refs/mediawiki/$remotename/master$sha1_commit$sha1_child");1182}1183}11841185print STDOUT "ok$remote\n";1186return1;1187}11881189sub get_allowed_file_extensions {1190 mw_connect_maybe();11911192my$query= {1193 action =>'query',1194 meta =>'siteinfo',1195 siprop =>'fileextensions'1196};1197my$result=$mediawiki->api($query);1198my@file_extensions=map$_->{ext},@{$result->{query}->{fileextensions}};1199my%hashFile=map{$_=>1}@file_extensions;12001201return%hashFile;1202}12031204# Return MediaWiki id for a canonical namespace name.1205# Ex.: "File", "Project".1206# Looks for the namespace id in the local configuration1207# variables, if it is not found asks MW API.1208sub get_mw_namespace_id {1209 mw_connect_maybe();1210my$name=shift;12111212if(!exists$namespace_id{$name}) {1213# Look at configuration file, if the record for that namespace is1214# already stored. Namespaces are stored in form:1215# "Name_of_namespace:Id_namespace", ex.: "File:6".1216my@temp=split(/[ \n]/, run_git("config --get-all remote."1217.$remotename.".namespaces"));1218chomp(@temp);1219foreachmy$ns(@temp) {1220my($n,$s) =split(/:/,$ns);1221$namespace_id{$n} =$s;1222}1223}12241225if(!exists$namespace_id{$name}) {1226# NS not found => get namespace id from MW and store it in1227# configuration file.1228my$query= {1229 action =>'query',1230 meta =>'siteinfo',1231 siprop =>'namespaces'1232};1233my$result=$mediawiki->api($query);12341235while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1236if(defined($ns->{canonical}) && ($ns->{canonical}eq$name)) {1237 run_git("config --add remote.".$remotename1238.".namespaces ".$name.":".$ns->{id});1239$namespace_id{$name} =$ns->{id};1240}1241}1242}12431244if(exists$namespace_id{$name}) {1245return$namespace_id{$name};1246}else{1247die"No such namespace$nameon MediaWiki.";1248}1249}