1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# https://github.com/Bibzball/Git-Mediawiki/wiki 13# 14# Known limitations: 15# 16# - Poor performance in the best case: it takes forever to check 17# whether we're up-to-date (on fetch or push) or to fetch a few 18# revisions from a large wiki, because we use exclusively a 19# page-based synchronization. We could switch to a wiki-wide 20# synchronization when the synchronization involves few revisions 21# but the wiki is large. 22# 23# - Git renames could be turned into MediaWiki renames (see TODO 24# below) 25# 26# - No way to import "one page, and all pages included in it" 27# 28# - Multiple remote MediaWikis have not been very well tested. 29 30use strict; 31use MediaWiki::API; 32use DateTime::Format::ISO8601; 33 34# By default, use UTF-8 to communicate with Git and the user 35binmode STDERR,":utf8"; 36binmode STDOUT,":utf8"; 37 38use URI::Escape; 39use IPC::Open2; 40 41use warnings; 42 43# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 44useconstant SLASH_REPLACEMENT =>"%2F"; 45 46# It's not always possible to delete pages (may require some 47# priviledges). Deleted pages are replaced with this content. 48useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 49 50# It's not possible to create empty pages. New empty files in Git are 51# sent with this content instead. 52useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 53 54# used to reflect file creation or deletion in diff. 55useconstant NULL_SHA1 =>"0000000000000000000000000000000000000000"; 56 57my$remotename=$ARGV[0]; 58my$url=$ARGV[1]; 59 60# Accept both space-separated and multiple keys in config file. 61# Spaces should be written as _ anyway because we'll use chomp. 62my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".pages")); 63chomp(@tracked_pages); 64 65# Just like @tracked_pages, but for MediaWiki categories. 66my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".categories")); 67chomp(@tracked_categories); 68 69# Import media files too. 70my$import_media= run_git("config --get --bool remote.".$remotename.".mediaimport"); 71chomp($import_media); 72$import_media= ($import_mediaeq"true"); 73 74my$wiki_login= run_git("config --get remote.".$remotename.".mwLogin"); 75# Note: mwPassword is discourraged. Use the credential system instead. 76my$wiki_passwd= run_git("config --get remote.".$remotename.".mwPassword"); 77my$wiki_domain= run_git("config --get remote.".$remotename.".mwDomain"); 78chomp($wiki_login); 79chomp($wiki_passwd); 80chomp($wiki_domain); 81 82# Import only last revisions (both for clone and fetch) 83my$shallow_import= run_git("config --get --bool remote.".$remotename.".shallow"); 84chomp($shallow_import); 85$shallow_import= ($shallow_importeq"true"); 86 87# Dumb push: don't update notes and mediawiki ref to reflect the last push. 88# 89# Configurable with mediawiki.dumbPush, or per-remote with 90# remote.<remotename>.dumbPush. 91# 92# This means the user will have to re-import the just-pushed 93# revisions. On the other hand, this means that the Git revisions 94# corresponding to MediaWiki revisions are all imported from the wiki, 95# regardless of whether they were initially created in Git or from the 96# web interface, hence all users will get the same history (i.e. if 97# the push from Git to MediaWiki loses some information, everybody 98# will get the history with information lost). If the import is 99# deterministic, this means everybody gets the same sha1 for each 100# MediaWiki revision. 101my$dumb_push= run_git("config --get --bool remote.$remotename.dumbPush"); 102unless($dumb_push) { 103$dumb_push= run_git("config --get --bool mediawiki.dumbPush"); 104} 105chomp($dumb_push); 106$dumb_push= ($dumb_pusheq"true"); 107 108my$wiki_name=$url; 109$wiki_name=~s/[^\/]*:\/\///; 110# If URL is like http://user:password@example.com/, we clearly don't 111# want the password in $wiki_name. While we're there, also remove user 112# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 113$wiki_name=~s/^.*@//; 114 115# Commands parser 116my$entry; 117my@cmd; 118while(<STDIN>) { 119chomp; 120@cmd=split(/ /); 121if(defined($cmd[0])) { 122# Line not blank 123if($cmd[0]eq"capabilities") { 124die("Too many arguments for capabilities")unless(!defined($cmd[1])); 125 mw_capabilities(); 126}elsif($cmd[0]eq"list") { 127die("Too many arguments for list")unless(!defined($cmd[2])); 128 mw_list($cmd[1]); 129}elsif($cmd[0]eq"import") { 130die("Invalid arguments for import")unless($cmd[1]ne""&& !defined($cmd[2])); 131 mw_import($cmd[1]); 132}elsif($cmd[0]eq"option") { 133die("Too many arguments for option")unless($cmd[1]ne""&&$cmd[2]ne""&& !defined($cmd[3])); 134 mw_option($cmd[1],$cmd[2]); 135}elsif($cmd[0]eq"push") { 136 mw_push($cmd[1]); 137}else{ 138print STDERR "Unknown command. Aborting...\n"; 139last; 140} 141}else{ 142# blank line: we should terminate 143last; 144} 145 146BEGIN{ $| =1}# flush STDOUT, to make sure the previous 147# command is fully processed. 148} 149 150########################## Functions ############################## 151 152## credential API management (generic functions) 153 154sub credential_from_url { 155my$url=shift; 156my$parsed= URI->new($url); 157my%credential; 158 159if($parsed->scheme) { 160$credential{protocol} =$parsed->scheme; 161} 162if($parsed->host) { 163$credential{host} =$parsed->host; 164} 165if($parsed->path) { 166$credential{path} =$parsed->path; 167} 168if($parsed->userinfo) { 169if($parsed->userinfo=~/([^:]*):(.*)/) { 170$credential{username} =$1; 171$credential{password} =$2; 172}else{ 173$credential{username} =$parsed->userinfo; 174} 175} 176 177return%credential; 178} 179 180sub credential_read { 181my%credential; 182my$reader=shift; 183my$op=shift; 184while(<$reader>) { 185my($key,$value) =/([^=]*)=(.*)/; 186if(not defined$key) { 187die"ERROR receiving response from git credential$op:\n$_\n"; 188} 189$credential{$key} =$value; 190} 191return%credential; 192} 193 194sub credential_write { 195my$credential=shift; 196my$writer=shift; 197while(my($key,$value) =each(%$credential) ) { 198if($value) { 199print$writer"$key=$value\n"; 200} 201} 202} 203 204sub credential_run { 205my$op=shift; 206my$credential=shift; 207my$pid= open2(my$reader,my$writer,"git credential$op"); 208 credential_write($credential,$writer); 209print$writer"\n"; 210close($writer); 211 212if($opeq"fill") { 213%$credential= credential_read($reader,$op); 214}else{ 215if(<$reader>) { 216die"ERROR while running git credential$op:\n$_"; 217} 218} 219close($reader); 220waitpid($pid,0); 221my$child_exit_status=$?>>8; 222if($child_exit_status!=0) { 223die"'git credential$op' failed with code$child_exit_status."; 224} 225} 226 227# MediaWiki API instance, created lazily. 228my$mediawiki; 229 230sub mw_connect_maybe { 231if($mediawiki) { 232return; 233} 234$mediawiki= MediaWiki::API->new; 235$mediawiki->{config}->{api_url} ="$url/api.php"; 236if($wiki_login) { 237my%credential= credential_from_url($url); 238$credential{username} =$wiki_login; 239$credential{password} =$wiki_passwd; 240 credential_run("fill", \%credential); 241my$request= {lgname =>$credential{username}, 242 lgpassword =>$credential{password}, 243 lgdomain =>$wiki_domain}; 244if($mediawiki->login($request)) { 245 credential_run("approve", \%credential); 246print STDERR "Logged in mediawiki user\"$credential{username}\".\n"; 247}else{ 248print STDERR "Failed to log in mediawiki user\"$credential{username}\"on$url\n"; 249print STDERR " (error ". 250$mediawiki->{error}->{code} .': '. 251$mediawiki->{error}->{details} .")\n"; 252 credential_run("reject", \%credential); 253exit1; 254} 255} 256} 257 258## Functions for listing pages on the remote wiki 259sub get_mw_tracked_pages { 260my$pages=shift; 261 get_mw_page_list(\@tracked_pages,$pages); 262} 263 264sub get_mw_page_list { 265my$page_list=shift; 266my$pages=shift; 267my@some_pages=@$page_list; 268while(@some_pages) { 269my$last=50; 270if($#some_pages<$last) { 271$last=$#some_pages; 272} 273my@slice=@some_pages[0..$last]; 274 get_mw_first_pages(\@slice,$pages); 275@some_pages=@some_pages[51..$#some_pages]; 276} 277} 278 279sub get_mw_tracked_categories { 280my$pages=shift; 281foreachmy$category(@tracked_categories) { 282if(index($category,':') <0) { 283# Mediawiki requires the Category 284# prefix, but let's not force the user 285# to specify it. 286$category="Category:".$category; 287} 288my$mw_pages=$mediawiki->list( { 289 action =>'query', 290 list =>'categorymembers', 291 cmtitle =>$category, 292 cmlimit =>'max'} ) 293||die$mediawiki->{error}->{code} .': ' 294.$mediawiki->{error}->{details}; 295foreachmy$page(@{$mw_pages}) { 296$pages->{$page->{title}} =$page; 297} 298} 299} 300 301sub get_mw_all_pages { 302my$pages=shift; 303# No user-provided list, get the list of pages from the API. 304my$mw_pages=$mediawiki->list({ 305 action =>'query', 306 list =>'allpages', 307 aplimit =>'max' 308}); 309if(!defined($mw_pages)) { 310print STDERR "fatal: could not get the list of wiki pages.\n"; 311print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 312print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 313exit1; 314} 315foreachmy$page(@{$mw_pages}) { 316$pages->{$page->{title}} =$page; 317} 318} 319 320# queries the wiki for a set of pages. Meant to be used within a loop 321# querying the wiki for slices of page list. 322sub get_mw_first_pages { 323my$some_pages=shift; 324my@some_pages= @{$some_pages}; 325 326my$pages=shift; 327 328# pattern 'page1|page2|...' required by the API 329my$titles=join('|',@some_pages); 330 331my$mw_pages=$mediawiki->api({ 332 action =>'query', 333 titles =>$titles, 334}); 335if(!defined($mw_pages)) { 336print STDERR "fatal: could not query the list of wiki pages.\n"; 337print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 338print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 339exit1; 340} 341while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 342if($id<0) { 343print STDERR "Warning: page$page->{title} not found on wiki\n"; 344}else{ 345$pages->{$page->{title}} =$page; 346} 347} 348} 349 350# Get the list of pages to be fetched according to configuration. 351sub get_mw_pages { 352 mw_connect_maybe(); 353 354my%pages;# hash on page titles to avoid duplicates 355my$user_defined; 356if(@tracked_pages) { 357$user_defined=1; 358# The user provided a list of pages titles, but we 359# still need to query the API to get the page IDs. 360 get_mw_tracked_pages(\%pages); 361} 362if(@tracked_categories) { 363$user_defined=1; 364 get_mw_tracked_categories(\%pages); 365} 366if(!$user_defined) { 367 get_mw_all_pages(\%pages); 368} 369if($import_media) { 370print STDERR "Getting media files for selected pages...\n"; 371if($user_defined) { 372 get_linked_mediafiles(\%pages); 373}else{ 374 get_all_mediafiles(\%pages); 375} 376} 377returnvalues(%pages); 378} 379 380# usage: $out = run_git("command args"); 381# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 382sub run_git { 383my$args=shift; 384my$encoding= (shift||"encoding(UTF-8)"); 385open(my$git,"-|:$encoding","git ".$args); 386my$res=do{local$/; <$git> }; 387close($git); 388 389return$res; 390} 391 392 393sub get_all_mediafiles { 394my$pages=shift; 395# Attach list of all pages for media files from the API, 396# they are in a different namespace, only one namespace 397# can be queried at the same moment 398my$mw_pages=$mediawiki->list({ 399 action =>'query', 400 list =>'allpages', 401 apnamespace => get_mw_namespace_id("File"), 402 aplimit =>'max' 403}); 404if(!defined($mw_pages)) { 405print STDERR "fatal: could not get the list of pages for media files.\n"; 406print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 407print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 408exit1; 409} 410foreachmy$page(@{$mw_pages}) { 411$pages->{$page->{title}} =$page; 412} 413} 414 415sub get_linked_mediafiles { 416my$pages=shift; 417my@titles=map$_->{title},values(%{$pages}); 418 419# The query is split in small batches because of the MW API limit of 420# the number of links to be returned (500 links max). 421my$batch=10; 422while(@titles) { 423if($#titles<$batch) { 424$batch=$#titles; 425} 426my@slice=@titles[0..$batch]; 427 428# pattern 'page1|page2|...' required by the API 429my$mw_titles=join('|',@slice); 430 431# Media files could be included or linked from 432# a page, get all related 433my$query= { 434 action =>'query', 435 prop =>'links|images', 436 titles =>$mw_titles, 437 plnamespace => get_mw_namespace_id("File"), 438 pllimit =>'max' 439}; 440my$result=$mediawiki->api($query); 441 442while(my($id,$page) =each(%{$result->{query}->{pages}})) { 443my@media_titles; 444if(defined($page->{links})) { 445my@link_titles=map$_->{title}, @{$page->{links}}; 446push(@media_titles,@link_titles); 447} 448if(defined($page->{images})) { 449my@image_titles=map$_->{title}, @{$page->{images}}; 450push(@media_titles,@image_titles); 451} 452if(@media_titles) { 453 get_mw_page_list(\@media_titles,$pages); 454} 455} 456 457@titles=@titles[($batch+1)..$#titles]; 458} 459} 460 461sub get_mw_mediafile_for_page_revision { 462# Name of the file on Wiki, with the prefix. 463my$filename=shift; 464my$timestamp=shift; 465my%mediafile; 466 467# Search if on a media file with given timestamp exists on 468# MediaWiki. In that case download the file. 469my$query= { 470 action =>'query', 471 prop =>'imageinfo', 472 titles =>"File:".$filename, 473 iistart =>$timestamp, 474 iiend =>$timestamp, 475 iiprop =>'timestamp|archivename|url', 476 iilimit =>1 477}; 478my$result=$mediawiki->api($query); 479 480my($fileid,$file) =each( %{$result->{query}->{pages}} ); 481# If not defined it means there is no revision of the file for 482# given timestamp. 483if(defined($file->{imageinfo})) { 484$mediafile{title} =$filename; 485 486my$fileinfo=pop(@{$file->{imageinfo}}); 487$mediafile{timestamp} =$fileinfo->{timestamp}; 488# Mediawiki::API's download function doesn't support https URLs 489# and can't download old versions of files. 490print STDERR "\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 491$mediafile{content} = download_mw_mediafile($fileinfo->{url}); 492} 493return%mediafile; 494} 495 496sub download_mw_mediafile { 497my$url=shift; 498 499my$response=$mediawiki->{ua}->get($url); 500if($response->code==200) { 501return$response->decoded_content; 502}else{ 503print STDERR "Error downloading mediafile from :\n"; 504print STDERR "URL:$url\n"; 505print STDERR "Server response: ".$response->code." ".$response->message."\n"; 506exit1; 507} 508} 509 510sub get_last_local_revision { 511# Get note regarding last mediawiki revision 512my$note= run_git("notes --ref=$remotename/mediawikishow refs/mediawiki/$remotename/master2>/dev/null"); 513my@note_info=split(/ /,$note); 514 515my$lastrevision_number; 516if(!(defined($note_info[0]) &&$note_info[0]eq"mediawiki_revision:")) { 517print STDERR "No previous mediawiki revision found"; 518$lastrevision_number=0; 519}else{ 520# Notes are formatted : mediawiki_revision: #number 521$lastrevision_number=$note_info[1]; 522chomp($lastrevision_number); 523print STDERR "Last local mediawiki revision found is$lastrevision_number"; 524} 525return$lastrevision_number; 526} 527 528# Remember the timestamp corresponding to a revision id. 529my%basetimestamps; 530 531sub get_last_remote_revision { 532 mw_connect_maybe(); 533 534my@pages= get_mw_pages(); 535 536my$max_rev_num=0; 537 538foreachmy$page(@pages) { 539my$id=$page->{pageid}; 540 541my$query= { 542 action =>'query', 543 prop =>'revisions', 544 rvprop =>'ids|timestamp', 545 pageids =>$id, 546}; 547 548my$result=$mediawiki->api($query); 549 550my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 551 552$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 553 554$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 555} 556 557print STDERR "Last remote revision found is$max_rev_num.\n"; 558return$max_rev_num; 559} 560 561# Clean content before sending it to MediaWiki 562sub mediawiki_clean { 563my$string=shift; 564my$page_created=shift; 565# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 566# This function right trims a string and adds a \n at the end to follow this rule 567$string=~s/\s+$//; 568if($stringeq""&&$page_created) { 569# Creating empty pages is forbidden. 570$string= EMPTY_CONTENT; 571} 572return$string."\n"; 573} 574 575# Filter applied on MediaWiki data before adding them to Git 576sub mediawiki_smudge { 577my$string=shift; 578if($stringeq EMPTY_CONTENT) { 579$string=""; 580} 581# This \n is important. This is due to mediawiki's way to handle end of files. 582return$string."\n"; 583} 584 585sub mediawiki_clean_filename { 586my$filename=shift; 587$filename=~s/@{[SLASH_REPLACEMENT]}/\//g; 588# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 589# Do a variant of URL-encoding, i.e. looks like URL-encoding, 590# but with _ added to prevent MediaWiki from thinking this is 591# an actual special character. 592$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 593# If we use the uri escape before 594# we should unescape here, before anything 595 596return$filename; 597} 598 599sub mediawiki_smudge_filename { 600my$filename=shift; 601$filename=~s/\//@{[SLASH_REPLACEMENT]}/g; 602$filename=~s/ /_/g; 603# Decode forbidden characters encoded in mediawiki_clean_filename 604$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; 605return$filename; 606} 607 608sub literal_data { 609my($content) =@_; 610print STDOUT "data ", bytes::length($content),"\n",$content; 611} 612 613sub literal_data_raw { 614# Output possibly binary content. 615my($content) =@_; 616# Avoid confusion between size in bytes and in characters 617 utf8::downgrade($content); 618binmode STDOUT,":raw"; 619print STDOUT "data ", bytes::length($content),"\n",$content; 620binmode STDOUT,":utf8"; 621} 622 623sub mw_capabilities { 624# Revisions are imported to the private namespace 625# refs/mediawiki/$remotename/ by the helper and fetched into 626# refs/remotes/$remotename later by fetch. 627print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; 628print STDOUT "import\n"; 629print STDOUT "list\n"; 630print STDOUT "push\n"; 631print STDOUT "\n"; 632} 633 634sub mw_list { 635# MediaWiki do not have branches, we consider one branch arbitrarily 636# called master, and HEAD pointing to it. 637print STDOUT "? refs/heads/master\n"; 638print STDOUT "\@refs/heads/masterHEAD\n"; 639print STDOUT "\n"; 640} 641 642sub mw_option { 643print STDERR "remote-helper command 'option$_[0]' not yet implemented\n"; 644print STDOUT "unsupported\n"; 645} 646 647sub fetch_mw_revisions_for_page { 648my$page=shift; 649my$id=shift; 650my$fetch_from=shift; 651my@page_revs= (); 652my$query= { 653 action =>'query', 654 prop =>'revisions', 655 rvprop =>'ids', 656 rvdir =>'newer', 657 rvstartid =>$fetch_from, 658 rvlimit =>500, 659 pageids =>$id, 660}; 661 662my$revnum=0; 663# Get 500 revisions at a time due to the mediawiki api limit 664while(1) { 665my$result=$mediawiki->api($query); 666 667# Parse each of those 500 revisions 668foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 669my$page_rev_ids; 670$page_rev_ids->{pageid} =$page->{pageid}; 671$page_rev_ids->{revid} =$revision->{revid}; 672push(@page_revs,$page_rev_ids); 673$revnum++; 674} 675last unless$result->{'query-continue'}; 676$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 677} 678if($shallow_import&&@page_revs) { 679print STDERR " Found 1 revision (shallow import).\n"; 680@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 681return$page_revs[0]; 682} 683print STDERR " Found ",$revnum," revision(s).\n"; 684return@page_revs; 685} 686 687sub fetch_mw_revisions { 688my$pages=shift;my@pages= @{$pages}; 689my$fetch_from=shift; 690 691my@revisions= (); 692my$n=1; 693foreachmy$page(@pages) { 694my$id=$page->{pageid}; 695 696print STDERR "page$n/",scalar(@pages),": ".$page->{title} ."\n"; 697$n++; 698my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 699@revisions= (@page_revs,@revisions); 700} 701 702return($n,@revisions); 703} 704 705sub import_file_revision { 706my$commit=shift; 707my%commit= %{$commit}; 708my$full_import=shift; 709my$n=shift; 710my$mediafile=shift; 711my%mediafile; 712if($mediafile) { 713%mediafile= %{$mediafile}; 714} 715 716my$title=$commit{title}; 717my$comment=$commit{comment}; 718my$content=$commit{content}; 719my$author=$commit{author}; 720my$date=$commit{date}; 721 722print STDOUT "commit refs/mediawiki/$remotename/master\n"; 723print STDOUT "mark :$n\n"; 724print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 725 literal_data($comment); 726 727# If it's not a clone, we need to know where to start from 728if(!$full_import&&$n==1) { 729print STDOUT "from refs/mediawiki/$remotename/master^0\n"; 730} 731if($contentne DELETED_CONTENT) { 732print STDOUT "M 644 inline$title.mw\n"; 733 literal_data($content); 734if(%mediafile) { 735print STDOUT "M 644 inline$mediafile{title}\n"; 736 literal_data_raw($mediafile{content}); 737} 738print STDOUT "\n\n"; 739}else{ 740print STDOUT "D$title.mw\n"; 741} 742 743# mediawiki revision number in the git note 744if($full_import&&$n==1) { 745print STDOUT "reset refs/notes/$remotename/mediawiki\n"; 746} 747print STDOUT "commit refs/notes/$remotename/mediawiki\n"; 748print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 749 literal_data("Note added by git-mediawiki during import"); 750if(!$full_import&&$n==1) { 751print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; 752} 753print STDOUT "N inline :$n\n"; 754 literal_data("mediawiki_revision: ".$commit{mw_revision}); 755print STDOUT "\n\n"; 756} 757 758# parse a sequence of 759# <cmd> <arg1> 760# <cmd> <arg2> 761# \n 762# (like batch sequence of import and sequence of push statements) 763sub get_more_refs { 764my$cmd=shift; 765my@refs; 766while(1) { 767my$line= <STDIN>; 768if($line=~m/^$cmd (.*)$/) { 769push(@refs,$1); 770}elsif($lineeq"\n") { 771return@refs; 772}else{ 773die("Invalid command in a '$cmd' batch: ".$_); 774} 775} 776} 777 778sub mw_import { 779# multiple import commands can follow each other. 780my@refs= (shift, get_more_refs("import")); 781foreachmy$ref(@refs) { 782 mw_import_ref($ref); 783} 784print STDOUT "done\n"; 785} 786 787sub mw_import_ref { 788my$ref=shift; 789# The remote helper will call "import HEAD" and 790# "import refs/heads/master". 791# Since HEAD is a symbolic ref to master (by convention, 792# followed by the output of the command "list" that we gave), 793# we don't need to do anything in this case. 794if($refeq"HEAD") { 795return; 796} 797 798 mw_connect_maybe(); 799 800my@pages= get_mw_pages(); 801 802print STDERR "Searching revisions...\n"; 803my$last_local= get_last_local_revision(); 804my$fetch_from=$last_local+1; 805if($fetch_from==1) { 806print STDERR ", fetching from beginning.\n"; 807}else{ 808print STDERR ", fetching from here.\n"; 809} 810my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 811 812# Creation of the fast-import stream 813print STDERR "Fetching & writing export data...\n"; 814 815$n=0; 816my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 817 818foreachmy$pagerevid(sort{$a->{revid} <=>$b->{revid}}@revisions) { 819# fetch the content of the pages 820my$query= { 821 action =>'query', 822 prop =>'revisions', 823 rvprop =>'content|timestamp|comment|user|ids', 824 revids =>$pagerevid->{revid}, 825}; 826 827my$result=$mediawiki->api($query); 828 829my$rev=pop(@{$result->{query}->{pages}->{$pagerevid->{pageid}}->{revisions}}); 830 831$n++; 832 833my$page_title=$result->{query}->{pages}->{$pagerevid->{pageid}}->{title}; 834my%commit; 835$commit{author} =$rev->{user} ||'Anonymous'; 836$commit{comment} =$rev->{comment} ||'*Empty MediaWiki Message*'; 837$commit{title} = mediawiki_smudge_filename($page_title); 838$commit{mw_revision} =$pagerevid->{revid}; 839$commit{content} = mediawiki_smudge($rev->{'*'}); 840 841if(!defined($rev->{timestamp})) { 842$last_timestamp++; 843}else{ 844$last_timestamp=$rev->{timestamp}; 845} 846$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 847 848# Differentiates classic pages and media files. 849my($namespace,$filename) =$page_title=~/^([^:]*):(.*)$/; 850my%mediafile; 851if($namespace&& get_mw_namespace_id($namespace) == get_mw_namespace_id("File")) { 852%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 853} 854# If this is a revision of the media page for new version 855# of a file do one common commit for both file and media page. 856# Else do commit only for that page. 857print STDERR "$n/",scalar(@revisions),": Revision #$pagerevid->{revid} of$commit{title}\n"; 858 import_file_revision(\%commit, ($fetch_from==1),$n, \%mediafile); 859} 860 861if($fetch_from==1&&$n==0) { 862print STDERR "You appear to have cloned an empty MediaWiki.\n"; 863# Something has to be done remote-helper side. If nothing is done, an error is 864# thrown saying that HEAD is refering to unknown object 0000000000000000000 865# and the clone fails. 866} 867} 868 869sub error_non_fast_forward { 870my$advice= run_git("config --bool advice.pushNonFastForward"); 871chomp($advice); 872if($advicene"false") { 873# Native git-push would show this after the summary. 874# We can't ask it to display it cleanly, so print it 875# ourselves before. 876print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; 877print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 878print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; 879} 880print STDOUT "error$_[0]\"non-fast-forward\"\n"; 881return0; 882} 883 884sub mw_upload_file { 885my$complete_file_name=shift; 886my$new_sha1=shift; 887my$extension=shift; 888my$file_deleted=shift; 889my$summary=shift; 890my$newrevid; 891my$path="File:".$complete_file_name; 892my%hashFiles= get_allowed_file_extensions(); 893if(!exists($hashFiles{$extension})) { 894print STDERR "$complete_file_nameis not a permitted file on this wiki.\n"; 895print STDERR "Check the configuration of file uploads in your mediawiki.\n"; 896return$newrevid; 897} 898# Deleting and uploading a file requires a priviledged user 899if($file_deleted) { 900 mw_connect_maybe(); 901my$query= { 902 action =>'delete', 903 title =>$path, 904 reason =>$summary 905}; 906if(!$mediawiki->edit($query)) { 907print STDERR "Failed to delete file on remote wiki\n"; 908print STDERR "Check your permissions on the remote site. Error code:\n"; 909print STDERR $mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details}; 910exit1; 911} 912}else{ 913# Don't let perl try to interpret file content as UTF-8 => use "raw" 914my$content= run_git("cat-file blob$new_sha1","raw"); 915if($contentne"") { 916 mw_connect_maybe(); 917$mediawiki->{config}->{upload_url} = 918"$url/index.php/Special:Upload"; 919$mediawiki->edit({ 920 action =>'upload', 921 filename =>$complete_file_name, 922 comment =>$summary, 923 file => [undef, 924$complete_file_name, 925 Content =>$content], 926 ignorewarnings =>1, 927}, { 928 skip_encoding =>1 929} ) ||die$mediawiki->{error}->{code} .':' 930.$mediawiki->{error}->{details}; 931my$last_file_page=$mediawiki->get_page({title =>$path}); 932$newrevid=$last_file_page->{revid}; 933print STDERR "Pushed file:$new_sha1-$complete_file_name.\n"; 934}else{ 935print STDERR "Empty file$complete_file_namenot pushed.\n"; 936} 937} 938return$newrevid; 939} 940 941sub mw_push_file { 942my$diff_info=shift; 943# $diff_info contains a string in this format: 944# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status> 945my@diff_info_split=split(/[ \t]/,$diff_info); 946 947# Filename, including .mw extension 948my$complete_file_name=shift; 949# Commit message 950my$summary=shift; 951# MediaWiki revision number. Keep the previous one by default, 952# in case there's no edit to perform. 953my$oldrevid=shift; 954my$newrevid; 955 956my$new_sha1=$diff_info_split[3]; 957my$old_sha1=$diff_info_split[2]; 958my$page_created= ($old_sha1eq NULL_SHA1); 959my$page_deleted= ($new_sha1eq NULL_SHA1); 960$complete_file_name= mediawiki_clean_filename($complete_file_name); 961 962my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/; 963if(!defined($extension)) { 964$extension=""; 965} 966if($extensioneq"mw") { 967my$file_content; 968if($page_deleted) { 969# Deleting a page usually requires 970# special priviledges. A common 971# convention is to replace the page 972# with this content instead: 973$file_content= DELETED_CONTENT; 974}else{ 975$file_content= run_git("cat-file blob$new_sha1"); 976} 977 978 mw_connect_maybe(); 979 980my$result=$mediawiki->edit( { 981 action =>'edit', 982 summary =>$summary, 983 title =>$title, 984 basetimestamp =>$basetimestamps{$oldrevid}, 985 text => mediawiki_clean($file_content,$page_created), 986}, { 987 skip_encoding =>1# Helps with names with accentuated characters 988}); 989if(!$result) { 990if($mediawiki->{error}->{code} ==3) { 991# edit conflicts, considered as non-fast-forward 992print STDERR 'Warning: Error '. 993$mediawiki->{error}->{code} . 994' from mediwiki: '.$mediawiki->{error}->{details} . 995".\n"; 996return($oldrevid,"non-fast-forward"); 997}else{ 998# Other errors. Shouldn't happen => just die() 999die'Fatal: Error '.1000$mediawiki->{error}->{code} .1001' from mediwiki: '.$mediawiki->{error}->{details};1002}1003}1004$newrevid=$result->{edit}->{newrevid};1005print STDERR "Pushed file:$new_sha1-$title\n";1006}else{1007$newrevid= mw_upload_file($complete_file_name,$new_sha1,1008$extension,$page_deleted,1009$summary);1010}1011$newrevid= ($newrevidor$oldrevid);1012return($newrevid,"ok");1013}10141015sub mw_push {1016# multiple push statements can follow each other1017my@refsspecs= (shift, get_more_refs("push"));1018my$pushed;1019formy$refspec(@refsspecs) {1020my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1021or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>");1022if($force) {1023print STDERR "Warning: forced push not allowed on a MediaWiki.\n";1024}1025if($localeq"") {1026print STDERR "Cannot delete remote branch on a MediaWiki\n";1027print STDOUT "error$remotecannot delete\n";1028next;1029}1030if($remotene"refs/heads/master") {1031print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n";1032print STDOUT "error$remoteonly master allowed\n";1033next;1034}1035if(mw_push_revision($local,$remote)) {1036$pushed=1;1037}1038}10391040# Notify Git that the push is done1041print STDOUT "\n";10421043if($pushed&&$dumb_push) {1044print STDERR "Just pushed some revisions to MediaWiki.\n";1045print STDERR "The pushed revisions now have to be re-imported, and your current branch\n";1046print STDERR "needs to be updated with these re-imported commits. You can do this with\n";1047print STDERR "\n";1048print STDERR " git pull --rebase\n";1049print STDERR "\n";1050}1051}10521053sub mw_push_revision {1054my$local=shift;1055my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1056my$last_local_revid= get_last_local_revision();1057print STDERR ".\n";# Finish sentence started by get_last_local_revision()1058my$last_remote_revid= get_last_remote_revision();1059my$mw_revision=$last_remote_revid;10601061# Get sha1 of commit pointed by local HEAD1062my$HEAD_sha1= run_git("rev-parse$local2>/dev/null");chomp($HEAD_sha1);1063# Get sha1 of commit pointed by remotes/$remotename/master1064my$remoteorigin_sha1= run_git("rev-parse refs/remotes/$remotename/master2>/dev/null");1065chomp($remoteorigin_sha1);10661067if($last_local_revid>0&&1068$last_local_revid<$last_remote_revid) {1069return error_non_fast_forward($remote);1070}10711072if($HEAD_sha1eq$remoteorigin_sha1) {1073# nothing to push1074return0;1075}10761077# Get every commit in between HEAD and refs/remotes/origin/master,1078# including HEAD and refs/remotes/origin/master1079my@commit_pairs= ();1080if($last_local_revid>0) {1081my$parsed_sha1=$remoteorigin_sha1;1082# Find a path from last MediaWiki commit to pushed commit1083while($parsed_sha1ne$HEAD_sha1) {1084my@commit_info=grep(/^$parsed_sha1/,split(/\n/, run_git("rev-list --children$local")));1085if(!@commit_info) {1086return error_non_fast_forward($remote);1087}1088my@commit_info_split=split(/ |\n/,$commit_info[0]);1089# $commit_info_split[1] is the sha1 of the commit to export1090# $commit_info_split[0] is the sha1 of its direct child1091push(@commit_pairs, \@commit_info_split);1092$parsed_sha1=$commit_info_split[1];1093}1094}else{1095# No remote mediawiki revision. Export the whole1096# history (linearized with --first-parent)1097print STDERR "Warning: no common ancestor, pushing complete history\n";1098my$history= run_git("rev-list --first-parent --children$local");1099my@history=split('\n',$history);1100@history=@history[1..$#history];1101foreachmy$line(reverse@history) {1102my@commit_info_split=split(/ |\n/,$line);1103push(@commit_pairs, \@commit_info_split);1104}1105}11061107foreachmy$commit_info_split(@commit_pairs) {1108my$sha1_child= @{$commit_info_split}[0];1109my$sha1_commit= @{$commit_info_split}[1];1110my$diff_infos= run_git("diff-tree -r --raw -z$sha1_child$sha1_commit");1111# TODO: we could detect rename, and encode them with a #redirect on the wiki.1112# TODO: for now, it's just a delete+add1113my@diff_info_list=split(/\0/,$diff_infos);1114# Keep the subject line of the commit message as mediawiki comment for the revision1115my$commit_msg= run_git("log --no-walk --format=\"%s\"$sha1_commit");1116chomp($commit_msg);1117# Push every blob1118while(@diff_info_list) {1119my$status;1120# git diff-tree -z gives an output like1121# <metadata>\0<filename1>\01122# <metadata>\0<filename2>\01123# and we've split on \0.1124my$info=shift(@diff_info_list);1125my$file=shift(@diff_info_list);1126($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1127if($statuseq"non-fast-forward") {1128# we may already have sent part of the1129# commit to MediaWiki, but it's too1130# late to cancel it. Stop the push in1131# the middle, but still give an1132# accurate error message.1133return error_non_fast_forward($remote);1134}1135if($statusne"ok") {1136die("Unknown error from mw_push_file()");1137}1138}1139unless($dumb_push) {1140 run_git("notes --ref=$remotename/mediawikiadd -m\"mediawiki_revision:$mw_revision\"$sha1_commit");1141 run_git("update-ref -m\"Git-MediaWiki push\"refs/mediawiki/$remotename/master$sha1_commit$sha1_child");1142}1143}11441145print STDOUT "ok$remote\n";1146return1;1147}11481149sub get_allowed_file_extensions {1150 mw_connect_maybe();11511152my$query= {1153 action =>'query',1154 meta =>'siteinfo',1155 siprop =>'fileextensions'1156};1157my$result=$mediawiki->api($query);1158my@file_extensions=map$_->{ext},@{$result->{query}->{fileextensions}};1159my%hashFile=map{$_=>1}@file_extensions;11601161return%hashFile;1162}11631164# In memory cache for MediaWiki namespace ids.1165my%namespace_id;11661167# Namespaces whose id is cached in the configuration file1168# (to avoid duplicates)1169my%cached_mw_namespace_id;11701171# Return MediaWiki id for a canonical namespace name.1172# Ex.: "File", "Project".1173sub get_mw_namespace_id {1174 mw_connect_maybe();1175my$name=shift;11761177if(!exists$namespace_id{$name}) {1178# Look at configuration file, if the record for that namespace is1179# already cached. Namespaces are stored in form:1180# "Name_of_namespace:Id_namespace", ex.: "File:6".1181my@temp=split(/[ \n]/, run_git("config --get-all remote."1182.$remotename.".namespaceCache"));1183chomp(@temp);1184foreachmy$ns(@temp) {1185my($n,$id) =split(/:/,$ns);1186$namespace_id{$n} =$id;1187$cached_mw_namespace_id{$n} =1;1188}1189}11901191if(!exists$namespace_id{$name}) {1192print STDERR "Namespace$namenot found in cache, querying the wiki ...\n";1193# NS not found => get namespace id from MW and store it in1194# configuration file.1195my$query= {1196 action =>'query',1197 meta =>'siteinfo',1198 siprop =>'namespaces'1199};1200my$result=$mediawiki->api($query);12011202while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1203if(defined($ns->{id}) &&defined($ns->{canonical})) {1204$namespace_id{$ns->{canonical}} =$ns->{id};1205if($ns->{'*'}) {1206# alias (e.g. french Fichier: as alias for canonical File:)1207$namespace_id{$ns->{'*'}} =$ns->{id};1208}1209}1210}1211}12121213my$id=$namespace_id{$name};12141215if(defined$id) {1216# Store explicitely requested namespaces on disk1217if(!exists$cached_mw_namespace_id{$name}) {1218 run_git("config --add remote.".$remotename1219.".namespaceCache\"".$name.":".$id."\"");1220$cached_mw_namespace_id{$name} =1;1221}1222return$id;1223}else{1224die"No such namespace$nameon MediaWiki.";1225}1226}