1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/ 13 14use strict; 15use MediaWiki::API; 16use DateTime::Format::ISO8601; 17 18# By default, use UTF-8 to communicate with Git and the user 19binmode STDERR,":utf8"; 20binmode STDOUT,":utf8"; 21 22use URI::Escape; 23use IPC::Open2; 24 25use warnings; 26 27# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 28useconstant SLASH_REPLACEMENT =>"%2F"; 29 30# It's not always possible to delete pages (may require some 31# privileges). Deleted pages are replaced with this content. 32useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 33 34# It's not possible to create empty pages. New empty files in Git are 35# sent with this content instead. 36useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 37 38# used to reflect file creation or deletion in diff. 39useconstant NULL_SHA1 =>"0000000000000000000000000000000000000000"; 40 41# Used on Git's side to reflect empty edit messages on the wiki 42useconstant EMPTY_MESSAGE =>'*Empty MediaWiki Message*'; 43 44my$remotename=$ARGV[0]; 45my$url=$ARGV[1]; 46 47# Accept both space-separated and multiple keys in config file. 48# Spaces should be written as _ anyway because we'll use chomp. 49my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".pages")); 50chomp(@tracked_pages); 51 52# Just like @tracked_pages, but for MediaWiki categories. 53my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".categories")); 54chomp(@tracked_categories); 55 56# Import media files on pull 57my$import_media= run_git("config --get --bool remote.".$remotename.".mediaimport"); 58chomp($import_media); 59$import_media= ($import_mediaeq"true"); 60 61# Export media files on push 62my$export_media= run_git("config --get --bool remote.".$remotename.".mediaexport"); 63chomp($export_media); 64$export_media= !($export_mediaeq"false"); 65 66my$wiki_login= run_git("config --get remote.".$remotename.".mwLogin"); 67# Note: mwPassword is discourraged. Use the credential system instead. 68my$wiki_passwd= run_git("config --get remote.".$remotename.".mwPassword"); 69my$wiki_domain= run_git("config --get remote.".$remotename.".mwDomain"); 70chomp($wiki_login); 71chomp($wiki_passwd); 72chomp($wiki_domain); 73 74# Import only last revisions (both for clone and fetch) 75my$shallow_import= run_git("config --get --bool remote.".$remotename.".shallow"); 76chomp($shallow_import); 77$shallow_import= ($shallow_importeq"true"); 78 79# Fetch (clone and pull) by revisions instead of by pages. This behavior 80# is more efficient when we have a wiki with lots of pages and we fetch 81# the revisions quite often so that they concern only few pages. 82# Possible values: 83# - by_rev: perform one query per new revision on the remote wiki 84# - by_page: query each tracked page for new revision 85my$fetch_strategy= run_git("config --get remote.$remotename.fetchStrategy"); 86unless($fetch_strategy) { 87$fetch_strategy= run_git("config --get mediawiki.fetchStrategy"); 88} 89chomp($fetch_strategy); 90unless($fetch_strategy) { 91$fetch_strategy="by_page"; 92} 93 94# Dumb push: don't update notes and mediawiki ref to reflect the last push. 95# 96# Configurable with mediawiki.dumbPush, or per-remote with 97# remote.<remotename>.dumbPush. 98# 99# This means the user will have to re-import the just-pushed 100# revisions. On the other hand, this means that the Git revisions 101# corresponding to MediaWiki revisions are all imported from the wiki, 102# regardless of whether they were initially created in Git or from the 103# web interface, hence all users will get the same history (i.e. if 104# the push from Git to MediaWiki loses some information, everybody 105# will get the history with information lost). If the import is 106# deterministic, this means everybody gets the same sha1 for each 107# MediaWiki revision. 108my$dumb_push= run_git("config --get --bool remote.$remotename.dumbPush"); 109unless($dumb_push) { 110$dumb_push= run_git("config --get --bool mediawiki.dumbPush"); 111} 112chomp($dumb_push); 113$dumb_push= ($dumb_pusheq"true"); 114 115my$wiki_name=$url; 116$wiki_name=~s/[^\/]*:\/\///; 117# If URL is like http://user:password@example.com/, we clearly don't 118# want the password in $wiki_name. While we're there, also remove user 119# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 120$wiki_name=~s/^.*@//; 121 122# Commands parser 123my$entry; 124my@cmd; 125while(<STDIN>) { 126chomp; 127@cmd=split(/ /); 128if(defined($cmd[0])) { 129# Line not blank 130if($cmd[0]eq"capabilities") { 131die("Too many arguments for capabilities")unless(!defined($cmd[1])); 132 mw_capabilities(); 133}elsif($cmd[0]eq"list") { 134die("Too many arguments for list")unless(!defined($cmd[2])); 135 mw_list($cmd[1]); 136}elsif($cmd[0]eq"import") { 137die("Invalid arguments for import")unless($cmd[1]ne""&& !defined($cmd[2])); 138 mw_import($cmd[1]); 139}elsif($cmd[0]eq"option") { 140die("Too many arguments for option")unless($cmd[1]ne""&&$cmd[2]ne""&& !defined($cmd[3])); 141 mw_option($cmd[1],$cmd[2]); 142}elsif($cmd[0]eq"push") { 143 mw_push($cmd[1]); 144}else{ 145print STDERR "Unknown command. Aborting...\n"; 146last; 147} 148}else{ 149# blank line: we should terminate 150last; 151} 152 153BEGIN{ $| =1}# flush STDOUT, to make sure the previous 154# command is fully processed. 155} 156 157########################## Functions ############################## 158 159## credential API management (generic functions) 160 161sub credential_read { 162my%credential; 163my$reader=shift; 164my$op=shift; 165while(<$reader>) { 166my($key,$value) =/([^=]*)=(.*)/; 167if(not defined$key) { 168die"ERROR receiving response from git credential$op:\n$_\n"; 169} 170$credential{$key} =$value; 171} 172return%credential; 173} 174 175sub credential_write { 176my$credential=shift; 177my$writer=shift; 178# url overwrites other fields, so it must come first 179print$writer"url=$credential->{url}\n"ifexists$credential->{url}; 180while(my($key,$value) =each(%$credential) ) { 181if(length$value&&$keyne'url') { 182print$writer"$key=$value\n"; 183} 184} 185} 186 187sub credential_run { 188my$op=shift; 189my$credential=shift; 190my$pid= open2(my$reader,my$writer,"git credential$op"); 191 credential_write($credential,$writer); 192print$writer"\n"; 193close($writer); 194 195if($opeq"fill") { 196%$credential= credential_read($reader,$op); 197}else{ 198if(<$reader>) { 199die"ERROR while running git credential$op:\n$_"; 200} 201} 202close($reader); 203waitpid($pid,0); 204my$child_exit_status=$?>>8; 205if($child_exit_status!=0) { 206die"'git credential$op' failed with code$child_exit_status."; 207} 208} 209 210# MediaWiki API instance, created lazily. 211my$mediawiki; 212 213sub mw_connect_maybe { 214if($mediawiki) { 215return; 216} 217$mediawiki= MediaWiki::API->new; 218$mediawiki->{config}->{api_url} ="$url/api.php"; 219if($wiki_login) { 220my%credential= (url =>$url); 221$credential{username} =$wiki_login; 222$credential{password} =$wiki_passwd; 223 credential_run("fill", \%credential); 224my$request= {lgname =>$credential{username}, 225 lgpassword =>$credential{password}, 226 lgdomain =>$wiki_domain}; 227if($mediawiki->login($request)) { 228 credential_run("approve", \%credential); 229print STDERR "Logged in mediawiki user\"$credential{username}\".\n"; 230}else{ 231print STDERR "Failed to log in mediawiki user\"$credential{username}\"on$url\n"; 232print STDERR " (error ". 233$mediawiki->{error}->{code} .': '. 234$mediawiki->{error}->{details} .")\n"; 235 credential_run("reject", \%credential); 236exit1; 237} 238} 239} 240 241sub fatal_mw_error { 242my$action=shift; 243print STDERR "fatal: could not$action.\n"; 244print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 245if($url=~/^https/) { 246print STDERR "fatal: make sure '$url/api.php' is a valid page\n"; 247print STDERR "fatal: and the SSL certificate is correct.\n"; 248}else{ 249print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 250} 251print STDERR "fatal: (error ". 252$mediawiki->{error}->{code} .': '. 253$mediawiki->{error}->{details} .")\n"; 254exit1; 255} 256 257## Functions for listing pages on the remote wiki 258sub get_mw_tracked_pages { 259my$pages=shift; 260 get_mw_page_list(\@tracked_pages,$pages); 261} 262 263sub get_mw_page_list { 264my$page_list=shift; 265my$pages=shift; 266my@some_pages=@$page_list; 267while(@some_pages) { 268my$last=50; 269if($#some_pages<$last) { 270$last=$#some_pages; 271} 272my@slice=@some_pages[0..$last]; 273 get_mw_first_pages(\@slice,$pages); 274@some_pages=@some_pages[51..$#some_pages]; 275} 276} 277 278sub get_mw_tracked_categories { 279my$pages=shift; 280foreachmy$category(@tracked_categories) { 281if(index($category,':') <0) { 282# Mediawiki requires the Category 283# prefix, but let's not force the user 284# to specify it. 285$category="Category:".$category; 286} 287my$mw_pages=$mediawiki->list( { 288 action =>'query', 289 list =>'categorymembers', 290 cmtitle =>$category, 291 cmlimit =>'max'} ) 292||die$mediawiki->{error}->{code} .': ' 293.$mediawiki->{error}->{details}; 294foreachmy$page(@{$mw_pages}) { 295$pages->{$page->{title}} =$page; 296} 297} 298} 299 300sub get_mw_all_pages { 301my$pages=shift; 302# No user-provided list, get the list of pages from the API. 303my$mw_pages=$mediawiki->list({ 304 action =>'query', 305 list =>'allpages', 306 aplimit =>'max' 307}); 308if(!defined($mw_pages)) { 309 fatal_mw_error("get the list of wiki pages"); 310} 311foreachmy$page(@{$mw_pages}) { 312$pages->{$page->{title}} =$page; 313} 314} 315 316# queries the wiki for a set of pages. Meant to be used within a loop 317# querying the wiki for slices of page list. 318sub get_mw_first_pages { 319my$some_pages=shift; 320my@some_pages= @{$some_pages}; 321 322my$pages=shift; 323 324# pattern 'page1|page2|...' required by the API 325my$titles=join('|',@some_pages); 326 327my$mw_pages=$mediawiki->api({ 328 action =>'query', 329 titles =>$titles, 330}); 331if(!defined($mw_pages)) { 332 fatal_mw_error("query the list of wiki pages"); 333} 334while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 335if($id<0) { 336print STDERR "Warning: page$page->{title} not found on wiki\n"; 337}else{ 338$pages->{$page->{title}} =$page; 339} 340} 341} 342 343# Get the list of pages to be fetched according to configuration. 344sub get_mw_pages { 345 mw_connect_maybe(); 346 347print STDERR "Listing pages on remote wiki...\n"; 348 349my%pages;# hash on page titles to avoid duplicates 350my$user_defined; 351if(@tracked_pages) { 352$user_defined=1; 353# The user provided a list of pages titles, but we 354# still need to query the API to get the page IDs. 355 get_mw_tracked_pages(\%pages); 356} 357if(@tracked_categories) { 358$user_defined=1; 359 get_mw_tracked_categories(\%pages); 360} 361if(!$user_defined) { 362 get_mw_all_pages(\%pages); 363} 364if($import_media) { 365print STDERR "Getting media files for selected pages...\n"; 366if($user_defined) { 367 get_linked_mediafiles(\%pages); 368}else{ 369 get_all_mediafiles(\%pages); 370} 371} 372print STDERR (scalar keys%pages) ." pages found.\n"; 373return%pages; 374} 375 376# usage: $out = run_git("command args"); 377# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 378sub run_git { 379my$args=shift; 380my$encoding= (shift||"encoding(UTF-8)"); 381open(my$git,"-|:$encoding","git ".$args); 382my$res=do{local$/; <$git> }; 383close($git); 384 385return$res; 386} 387 388 389sub get_all_mediafiles { 390my$pages=shift; 391# Attach list of all pages for media files from the API, 392# they are in a different namespace, only one namespace 393# can be queried at the same moment 394my$mw_pages=$mediawiki->list({ 395 action =>'query', 396 list =>'allpages', 397 apnamespace => get_mw_namespace_id("File"), 398 aplimit =>'max' 399}); 400if(!defined($mw_pages)) { 401print STDERR "fatal: could not get the list of pages for media files.\n"; 402print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 403print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 404exit1; 405} 406foreachmy$page(@{$mw_pages}) { 407$pages->{$page->{title}} =$page; 408} 409} 410 411sub get_linked_mediafiles { 412my$pages=shift; 413my@titles=map$_->{title},values(%{$pages}); 414 415# The query is split in small batches because of the MW API limit of 416# the number of links to be returned (500 links max). 417my$batch=10; 418while(@titles) { 419if($#titles<$batch) { 420$batch=$#titles; 421} 422my@slice=@titles[0..$batch]; 423 424# pattern 'page1|page2|...' required by the API 425my$mw_titles=join('|',@slice); 426 427# Media files could be included or linked from 428# a page, get all related 429my$query= { 430 action =>'query', 431 prop =>'links|images', 432 titles =>$mw_titles, 433 plnamespace => get_mw_namespace_id("File"), 434 pllimit =>'max' 435}; 436my$result=$mediawiki->api($query); 437 438while(my($id,$page) =each(%{$result->{query}->{pages}})) { 439my@media_titles; 440if(defined($page->{links})) { 441my@link_titles=map$_->{title}, @{$page->{links}}; 442push(@media_titles,@link_titles); 443} 444if(defined($page->{images})) { 445my@image_titles=map$_->{title}, @{$page->{images}}; 446push(@media_titles,@image_titles); 447} 448if(@media_titles) { 449 get_mw_page_list(\@media_titles,$pages); 450} 451} 452 453@titles=@titles[($batch+1)..$#titles]; 454} 455} 456 457sub get_mw_mediafile_for_page_revision { 458# Name of the file on Wiki, with the prefix. 459my$filename=shift; 460my$timestamp=shift; 461my%mediafile; 462 463# Search if on a media file with given timestamp exists on 464# MediaWiki. In that case download the file. 465my$query= { 466 action =>'query', 467 prop =>'imageinfo', 468 titles =>"File:".$filename, 469 iistart =>$timestamp, 470 iiend =>$timestamp, 471 iiprop =>'timestamp|archivename|url', 472 iilimit =>1 473}; 474my$result=$mediawiki->api($query); 475 476my($fileid,$file) =each( %{$result->{query}->{pages}} ); 477# If not defined it means there is no revision of the file for 478# given timestamp. 479if(defined($file->{imageinfo})) { 480$mediafile{title} =$filename; 481 482my$fileinfo=pop(@{$file->{imageinfo}}); 483$mediafile{timestamp} =$fileinfo->{timestamp}; 484# Mediawiki::API's download function doesn't support https URLs 485# and can't download old versions of files. 486print STDERR "\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 487$mediafile{content} = download_mw_mediafile($fileinfo->{url}); 488} 489return%mediafile; 490} 491 492sub download_mw_mediafile { 493my$url=shift; 494 495my$response=$mediawiki->{ua}->get($url); 496if($response->code==200) { 497return$response->decoded_content; 498}else{ 499print STDERR "Error downloading mediafile from :\n"; 500print STDERR "URL:$url\n"; 501print STDERR "Server response: ".$response->code." ".$response->message."\n"; 502exit1; 503} 504} 505 506sub get_last_local_revision { 507# Get note regarding last mediawiki revision 508my$note= run_git("notes --ref=$remotename/mediawikishow refs/mediawiki/$remotename/master2>/dev/null"); 509my@note_info=split(/ /,$note); 510 511my$lastrevision_number; 512if(!(defined($note_info[0]) &&$note_info[0]eq"mediawiki_revision:")) { 513print STDERR "No previous mediawiki revision found"; 514$lastrevision_number=0; 515}else{ 516# Notes are formatted : mediawiki_revision: #number 517$lastrevision_number=$note_info[1]; 518chomp($lastrevision_number); 519print STDERR "Last local mediawiki revision found is$lastrevision_number"; 520} 521return$lastrevision_number; 522} 523 524# Remember the timestamp corresponding to a revision id. 525my%basetimestamps; 526 527# Get the last remote revision without taking in account which pages are 528# tracked or not. This function makes a single request to the wiki thus 529# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev 530# option. 531sub get_last_global_remote_rev { 532 mw_connect_maybe(); 533 534my$query= { 535 action =>'query', 536 list =>'recentchanges', 537 prop =>'revisions', 538 rclimit =>'1', 539 rcdir =>'older', 540}; 541my$result=$mediawiki->api($query); 542return$result->{query}->{recentchanges}[0]->{revid}; 543} 544 545# Get the last remote revision concerning the tracked pages and the tracked 546# categories. 547sub get_last_remote_revision { 548 mw_connect_maybe(); 549 550my%pages_hash= get_mw_pages(); 551my@pages=values(%pages_hash); 552 553my$max_rev_num=0; 554 555print STDERR "Getting last revision id on tracked pages...\n"; 556 557foreachmy$page(@pages) { 558my$id=$page->{pageid}; 559 560my$query= { 561 action =>'query', 562 prop =>'revisions', 563 rvprop =>'ids|timestamp', 564 pageids =>$id, 565}; 566 567my$result=$mediawiki->api($query); 568 569my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 570 571$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 572 573$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 574} 575 576print STDERR "Last remote revision found is$max_rev_num.\n"; 577return$max_rev_num; 578} 579 580# Clean content before sending it to MediaWiki 581sub mediawiki_clean { 582my$string=shift; 583my$page_created=shift; 584# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 585# This function right trims a string and adds a \n at the end to follow this rule 586$string=~s/\s+$//; 587if($stringeq""&&$page_created) { 588# Creating empty pages is forbidden. 589$string= EMPTY_CONTENT; 590} 591return$string."\n"; 592} 593 594# Filter applied on MediaWiki data before adding them to Git 595sub mediawiki_smudge { 596my$string=shift; 597if($stringeq EMPTY_CONTENT) { 598$string=""; 599} 600# This \n is important. This is due to mediawiki's way to handle end of files. 601return$string."\n"; 602} 603 604sub mediawiki_clean_filename { 605my$filename=shift; 606$filename=~s/@{[SLASH_REPLACEMENT]}/\//g; 607# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 608# Do a variant of URL-encoding, i.e. looks like URL-encoding, 609# but with _ added to prevent MediaWiki from thinking this is 610# an actual special character. 611$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 612# If we use the uri escape before 613# we should unescape here, before anything 614 615return$filename; 616} 617 618sub mediawiki_smudge_filename { 619my$filename=shift; 620$filename=~s/\//@{[SLASH_REPLACEMENT]}/g; 621$filename=~s/ /_/g; 622# Decode forbidden characters encoded in mediawiki_clean_filename 623$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; 624return$filename; 625} 626 627sub literal_data { 628my($content) =@_; 629print STDOUT "data ", bytes::length($content),"\n",$content; 630} 631 632sub literal_data_raw { 633# Output possibly binary content. 634my($content) =@_; 635# Avoid confusion between size in bytes and in characters 636 utf8::downgrade($content); 637binmode STDOUT,":raw"; 638print STDOUT "data ", bytes::length($content),"\n",$content; 639binmode STDOUT,":utf8"; 640} 641 642sub mw_capabilities { 643# Revisions are imported to the private namespace 644# refs/mediawiki/$remotename/ by the helper and fetched into 645# refs/remotes/$remotename later by fetch. 646print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; 647print STDOUT "import\n"; 648print STDOUT "list\n"; 649print STDOUT "push\n"; 650print STDOUT "\n"; 651} 652 653sub mw_list { 654# MediaWiki do not have branches, we consider one branch arbitrarily 655# called master, and HEAD pointing to it. 656print STDOUT "? refs/heads/master\n"; 657print STDOUT "\@refs/heads/masterHEAD\n"; 658print STDOUT "\n"; 659} 660 661sub mw_option { 662print STDERR "remote-helper command 'option$_[0]' not yet implemented\n"; 663print STDOUT "unsupported\n"; 664} 665 666sub fetch_mw_revisions_for_page { 667my$page=shift; 668my$id=shift; 669my$fetch_from=shift; 670my@page_revs= (); 671my$query= { 672 action =>'query', 673 prop =>'revisions', 674 rvprop =>'ids', 675 rvdir =>'newer', 676 rvstartid =>$fetch_from, 677 rvlimit =>500, 678 pageids =>$id, 679}; 680 681my$revnum=0; 682# Get 500 revisions at a time due to the mediawiki api limit 683while(1) { 684my$result=$mediawiki->api($query); 685 686# Parse each of those 500 revisions 687foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 688my$page_rev_ids; 689$page_rev_ids->{pageid} =$page->{pageid}; 690$page_rev_ids->{revid} =$revision->{revid}; 691push(@page_revs,$page_rev_ids); 692$revnum++; 693} 694last unless$result->{'query-continue'}; 695$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 696} 697if($shallow_import&&@page_revs) { 698print STDERR " Found 1 revision (shallow import).\n"; 699@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 700return$page_revs[0]; 701} 702print STDERR " Found ",$revnum," revision(s).\n"; 703return@page_revs; 704} 705 706sub fetch_mw_revisions { 707my$pages=shift;my@pages= @{$pages}; 708my$fetch_from=shift; 709 710my@revisions= (); 711my$n=1; 712foreachmy$page(@pages) { 713my$id=$page->{pageid}; 714 715print STDERR "page$n/",scalar(@pages),": ".$page->{title} ."\n"; 716$n++; 717my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 718@revisions= (@page_revs,@revisions); 719} 720 721return($n,@revisions); 722} 723 724sub fe_escape_path { 725my$path=shift; 726$path=~s/\\/\\\\/g; 727$path=~s/"/\\"/g; 728$path=~s/\n/\\n/g; 729return'"'.$path.'"'; 730} 731 732sub import_file_revision { 733my$commit=shift; 734my%commit= %{$commit}; 735my$full_import=shift; 736my$n=shift; 737my$mediafile=shift; 738my%mediafile; 739if($mediafile) { 740%mediafile= %{$mediafile}; 741} 742 743my$title=$commit{title}; 744my$comment=$commit{comment}; 745my$content=$commit{content}; 746my$author=$commit{author}; 747my$date=$commit{date}; 748 749print STDOUT "commit refs/mediawiki/$remotename/master\n"; 750print STDOUT "mark :$n\n"; 751print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 752 literal_data($comment); 753 754# If it's not a clone, we need to know where to start from 755if(!$full_import&&$n==1) { 756print STDOUT "from refs/mediawiki/$remotename/master^0\n"; 757} 758if($contentne DELETED_CONTENT) { 759print STDOUT "M 644 inline ". 760 fe_escape_path($title.".mw") ."\n"; 761 literal_data($content); 762if(%mediafile) { 763print STDOUT "M 644 inline " 764. fe_escape_path($mediafile{title}) ."\n"; 765 literal_data_raw($mediafile{content}); 766} 767print STDOUT "\n\n"; 768}else{ 769print STDOUT "D ". fe_escape_path($title.".mw") ."\n"; 770} 771 772# mediawiki revision number in the git note 773if($full_import&&$n==1) { 774print STDOUT "reset refs/notes/$remotename/mediawiki\n"; 775} 776print STDOUT "commit refs/notes/$remotename/mediawiki\n"; 777print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 778 literal_data("Note added by git-mediawiki during import"); 779if(!$full_import&&$n==1) { 780print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; 781} 782print STDOUT "N inline :$n\n"; 783 literal_data("mediawiki_revision: ".$commit{mw_revision}); 784print STDOUT "\n\n"; 785} 786 787# parse a sequence of 788# <cmd> <arg1> 789# <cmd> <arg2> 790# \n 791# (like batch sequence of import and sequence of push statements) 792sub get_more_refs { 793my$cmd=shift; 794my@refs; 795while(1) { 796my$line= <STDIN>; 797if($line=~m/^$cmd (.*)$/) { 798push(@refs,$1); 799}elsif($lineeq"\n") { 800return@refs; 801}else{ 802die("Invalid command in a '$cmd' batch: ".$_); 803} 804} 805} 806 807sub mw_import { 808# multiple import commands can follow each other. 809my@refs= (shift, get_more_refs("import")); 810foreachmy$ref(@refs) { 811 mw_import_ref($ref); 812} 813print STDOUT "done\n"; 814} 815 816sub mw_import_ref { 817my$ref=shift; 818# The remote helper will call "import HEAD" and 819# "import refs/heads/master". 820# Since HEAD is a symbolic ref to master (by convention, 821# followed by the output of the command "list" that we gave), 822# we don't need to do anything in this case. 823if($refeq"HEAD") { 824return; 825} 826 827 mw_connect_maybe(); 828 829print STDERR "Searching revisions...\n"; 830my$last_local= get_last_local_revision(); 831my$fetch_from=$last_local+1; 832if($fetch_from==1) { 833print STDERR ", fetching from beginning.\n"; 834}else{ 835print STDERR ", fetching from here.\n"; 836} 837 838my$n=0; 839if($fetch_strategyeq"by_rev") { 840print STDERR "Fetching & writing export data by revs...\n"; 841$n= mw_import_ref_by_revs($fetch_from); 842}elsif($fetch_strategyeq"by_page") { 843print STDERR "Fetching & writing export data by pages...\n"; 844$n= mw_import_ref_by_pages($fetch_from); 845}else{ 846print STDERR "fatal: invalid fetch strategy\"$fetch_strategy\".\n"; 847print STDERR "Check your configuration variables remote.$remotename.fetchStrategy and mediawiki.fetchStrategy\n"; 848exit1; 849} 850 851if($fetch_from==1&&$n==0) { 852print STDERR "You appear to have cloned an empty MediaWiki.\n"; 853# Something has to be done remote-helper side. If nothing is done, an error is 854# thrown saying that HEAD is referring to unknown object 0000000000000000000 855# and the clone fails. 856} 857} 858 859sub mw_import_ref_by_pages { 860 861my$fetch_from=shift; 862my%pages_hash= get_mw_pages(); 863my@pages=values(%pages_hash); 864 865my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 866 867@revisions=sort{$a->{revid} <=>$b->{revid}}@revisions; 868my@revision_ids=map$_->{revid},@revisions; 869 870return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 871} 872 873sub mw_import_ref_by_revs { 874 875my$fetch_from=shift; 876my%pages_hash= get_mw_pages(); 877 878my$last_remote= get_last_global_remote_rev(); 879my@revision_ids=$fetch_from..$last_remote; 880return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 881} 882 883# Import revisions given in second argument (array of integers). 884# Only pages appearing in the third argument (hash indexed by page titles) 885# will be imported. 886sub mw_import_revids { 887my$fetch_from=shift; 888my$revision_ids=shift; 889my$pages=shift; 890 891my$n=0; 892my$n_actual=0; 893my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 894 895foreachmy$pagerevid(@$revision_ids) { 896# Count page even if we skip it, since we display 897# $n/$total and $total includes skipped pages. 898$n++; 899 900# fetch the content of the pages 901my$query= { 902 action =>'query', 903 prop =>'revisions', 904 rvprop =>'content|timestamp|comment|user|ids', 905 revids =>$pagerevid, 906}; 907 908my$result=$mediawiki->api($query); 909 910if(!$result) { 911die"Failed to retrieve modified page for revision$pagerevid"; 912} 913 914if(defined($result->{query}->{badrevids}->{$pagerevid})) { 915# The revision id does not exist on the remote wiki. 916next; 917} 918 919if(!defined($result->{query}->{pages})) { 920die"Invalid revision$pagerevid."; 921} 922 923my@result_pages=values(%{$result->{query}->{pages}}); 924my$result_page=$result_pages[0]; 925my$rev=$result_pages[0]->{revisions}->[0]; 926 927my$page_title=$result_page->{title}; 928 929if(!exists($pages->{$page_title})) { 930print STDERR "$n/",scalar(@$revision_ids), 931": Skipping revision #$rev->{revid} of$page_title\n"; 932next; 933} 934 935$n_actual++; 936 937my%commit; 938$commit{author} =$rev->{user} ||'Anonymous'; 939$commit{comment} =$rev->{comment} || EMPTY_MESSAGE; 940$commit{title} = mediawiki_smudge_filename($page_title); 941$commit{mw_revision} =$rev->{revid}; 942$commit{content} = mediawiki_smudge($rev->{'*'}); 943 944if(!defined($rev->{timestamp})) { 945$last_timestamp++; 946}else{ 947$last_timestamp=$rev->{timestamp}; 948} 949$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 950 951# Differentiates classic pages and media files. 952my($namespace,$filename) =$page_title=~/^([^:]*):(.*)$/; 953my%mediafile; 954if($namespace) { 955my$id= get_mw_namespace_id($namespace); 956if($id&&$id== get_mw_namespace_id("File")) { 957%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 958} 959} 960# If this is a revision of the media page for new version 961# of a file do one common commit for both file and media page. 962# Else do commit only for that page. 963print STDERR "$n/",scalar(@$revision_ids),": Revision #$rev->{revid} of$commit{title}\n"; 964 import_file_revision(\%commit, ($fetch_from==1),$n_actual, \%mediafile); 965} 966 967return$n_actual; 968} 969 970sub error_non_fast_forward { 971my$advice= run_git("config --bool advice.pushNonFastForward"); 972chomp($advice); 973if($advicene"false") { 974# Native git-push would show this after the summary. 975# We can't ask it to display it cleanly, so print it 976# ourselves before. 977print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; 978print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 979print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; 980} 981print STDOUT "error$_[0]\"non-fast-forward\"\n"; 982return0; 983} 984 985sub mw_upload_file { 986my$complete_file_name=shift; 987my$new_sha1=shift; 988my$extension=shift; 989my$file_deleted=shift; 990my$summary=shift; 991my$newrevid; 992my$path="File:".$complete_file_name; 993my%hashFiles= get_allowed_file_extensions(); 994if(!exists($hashFiles{$extension})) { 995print STDERR "$complete_file_nameis not a permitted file on this wiki.\n"; 996print STDERR "Check the configuration of file uploads in your mediawiki.\n"; 997return$newrevid; 998} 999# Deleting and uploading a file requires a priviledged user1000if($file_deleted) {1001 mw_connect_maybe();1002my$query= {1003 action =>'delete',1004 title =>$path,1005 reason =>$summary1006};1007if(!$mediawiki->edit($query)) {1008print STDERR "Failed to delete file on remote wiki\n";1009print STDERR "Check your permissions on the remote site. Error code:\n";1010print STDERR $mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details};1011exit1;1012}1013}else{1014# Don't let perl try to interpret file content as UTF-8 => use "raw"1015my$content= run_git("cat-file blob$new_sha1","raw");1016if($contentne"") {1017 mw_connect_maybe();1018$mediawiki->{config}->{upload_url} =1019"$url/index.php/Special:Upload";1020$mediawiki->edit({1021 action =>'upload',1022 filename =>$complete_file_name,1023 comment =>$summary,1024 file => [undef,1025$complete_file_name,1026 Content =>$content],1027 ignorewarnings =>1,1028}, {1029 skip_encoding =>11030} ) ||die$mediawiki->{error}->{code} .':'1031.$mediawiki->{error}->{details};1032my$last_file_page=$mediawiki->get_page({title =>$path});1033$newrevid=$last_file_page->{revid};1034print STDERR "Pushed file:$new_sha1-$complete_file_name.\n";1035}else{1036print STDERR "Empty file$complete_file_namenot pushed.\n";1037}1038}1039return$newrevid;1040}10411042sub mw_push_file {1043my$diff_info=shift;1044# $diff_info contains a string in this format:1045# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status>1046my@diff_info_split=split(/[ \t]/,$diff_info);10471048# Filename, including .mw extension1049my$complete_file_name=shift;1050# Commit message1051my$summary=shift;1052# MediaWiki revision number. Keep the previous one by default,1053# in case there's no edit to perform.1054my$oldrevid=shift;1055my$newrevid;10561057if($summaryeq EMPTY_MESSAGE) {1058$summary='';1059}10601061my$new_sha1=$diff_info_split[3];1062my$old_sha1=$diff_info_split[2];1063my$page_created= ($old_sha1eq NULL_SHA1);1064my$page_deleted= ($new_sha1eq NULL_SHA1);1065$complete_file_name= mediawiki_clean_filename($complete_file_name);10661067my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/;1068if(!defined($extension)) {1069$extension="";1070}1071if($extensioneq"mw") {1072my$ns= get_mw_namespace_id_for_page($complete_file_name);1073if($ns&&$ns== get_mw_namespace_id("File") && (!$export_media)) {1074print STDERR "Ignoring media file related page:$complete_file_name\n";1075return($oldrevid,"ok");1076}1077my$file_content;1078if($page_deleted) {1079# Deleting a page usually requires1080# special privileges. A common1081# convention is to replace the page1082# with this content instead:1083$file_content= DELETED_CONTENT;1084}else{1085$file_content= run_git("cat-file blob$new_sha1");1086}10871088 mw_connect_maybe();10891090my$result=$mediawiki->edit( {1091 action =>'edit',1092 summary =>$summary,1093 title =>$title,1094 basetimestamp =>$basetimestamps{$oldrevid},1095 text => mediawiki_clean($file_content,$page_created),1096}, {1097 skip_encoding =>1# Helps with names with accentuated characters1098});1099if(!$result) {1100if($mediawiki->{error}->{code} ==3) {1101# edit conflicts, considered as non-fast-forward1102print STDERR 'Warning: Error '.1103$mediawiki->{error}->{code} .1104' from mediwiki: '.$mediawiki->{error}->{details} .1105".\n";1106return($oldrevid,"non-fast-forward");1107}else{1108# Other errors. Shouldn't happen => just die()1109die'Fatal: Error '.1110$mediawiki->{error}->{code} .1111' from mediwiki: '.$mediawiki->{error}->{details};1112}1113}1114$newrevid=$result->{edit}->{newrevid};1115print STDERR "Pushed file:$new_sha1-$title\n";1116}elsif($export_media) {1117$newrevid= mw_upload_file($complete_file_name,$new_sha1,1118$extension,$page_deleted,1119$summary);1120}else{1121print STDERR "Ignoring media file$title\n";1122}1123$newrevid= ($newrevidor$oldrevid);1124return($newrevid,"ok");1125}11261127sub mw_push {1128# multiple push statements can follow each other1129my@refsspecs= (shift, get_more_refs("push"));1130my$pushed;1131formy$refspec(@refsspecs) {1132my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1133or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>");1134if($force) {1135print STDERR "Warning: forced push not allowed on a MediaWiki.\n";1136}1137if($localeq"") {1138print STDERR "Cannot delete remote branch on a MediaWiki\n";1139print STDOUT "error$remotecannot delete\n";1140next;1141}1142if($remotene"refs/heads/master") {1143print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n";1144print STDOUT "error$remoteonly master allowed\n";1145next;1146}1147if(mw_push_revision($local,$remote)) {1148$pushed=1;1149}1150}11511152# Notify Git that the push is done1153print STDOUT "\n";11541155if($pushed&&$dumb_push) {1156print STDERR "Just pushed some revisions to MediaWiki.\n";1157print STDERR "The pushed revisions now have to be re-imported, and your current branch\n";1158print STDERR "needs to be updated with these re-imported commits. You can do this with\n";1159print STDERR "\n";1160print STDERR " git pull --rebase\n";1161print STDERR "\n";1162}1163}11641165sub mw_push_revision {1166my$local=shift;1167my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1168my$last_local_revid= get_last_local_revision();1169print STDERR ".\n";# Finish sentence started by get_last_local_revision()1170my$last_remote_revid= get_last_remote_revision();1171my$mw_revision=$last_remote_revid;11721173# Get sha1 of commit pointed by local HEAD1174my$HEAD_sha1= run_git("rev-parse$local2>/dev/null");chomp($HEAD_sha1);1175# Get sha1 of commit pointed by remotes/$remotename/master1176my$remoteorigin_sha1= run_git("rev-parse refs/remotes/$remotename/master2>/dev/null");1177chomp($remoteorigin_sha1);11781179if($last_local_revid>0&&1180$last_local_revid<$last_remote_revid) {1181return error_non_fast_forward($remote);1182}11831184if($HEAD_sha1eq$remoteorigin_sha1) {1185# nothing to push1186return0;1187}11881189# Get every commit in between HEAD and refs/remotes/origin/master,1190# including HEAD and refs/remotes/origin/master1191my@commit_pairs= ();1192if($last_local_revid>0) {1193my$parsed_sha1=$remoteorigin_sha1;1194# Find a path from last MediaWiki commit to pushed commit1195print STDERR "Computing path from local to remote ...\n";1196my@local_ancestry=split(/\n/, run_git("rev-list --boundary --parents$local^$parsed_sha1"));1197my%local_ancestry;1198foreachmy$line(@local_ancestry) {1199if(my($child,$parents) =$line=~m/^-?([a-f0-9]+) ([a-f0-9 ]+)/) {1200foreachmy$parent(split(' ',$parents)) {1201$local_ancestry{$parent} =$child;1202}1203}elsif(!$line=~m/^([a-f0-9]+)/) {1204die"Unexpected output from git rev-list:$line";1205}1206}1207while($parsed_sha1ne$HEAD_sha1) {1208my$child=$local_ancestry{$parsed_sha1};1209if(!$child) {1210printf STDERR "Cannot find a path in history from remote commit to last commit\n";1211return error_non_fast_forward($remote);1212}1213push(@commit_pairs, [$parsed_sha1,$child]);1214$parsed_sha1=$child;1215}1216}else{1217# No remote mediawiki revision. Export the whole1218# history (linearized with --first-parent)1219print STDERR "Warning: no common ancestor, pushing complete history\n";1220my$history= run_git("rev-list --first-parent --children$local");1221my@history=split('\n',$history);1222@history=@history[1..$#history];1223foreachmy$line(reverse@history) {1224my@commit_info_split=split(/ |\n/,$line);1225push(@commit_pairs, \@commit_info_split);1226}1227}12281229foreachmy$commit_info_split(@commit_pairs) {1230my$sha1_child= @{$commit_info_split}[0];1231my$sha1_commit= @{$commit_info_split}[1];1232my$diff_infos= run_git("diff-tree -r --raw -z$sha1_child$sha1_commit");1233# TODO: we could detect rename, and encode them with a #redirect on the wiki.1234# TODO: for now, it's just a delete+add1235my@diff_info_list=split(/\0/,$diff_infos);1236# Keep the subject line of the commit message as mediawiki comment for the revision1237my$commit_msg= run_git("log --no-walk --format=\"%s\"$sha1_commit");1238chomp($commit_msg);1239# Push every blob1240while(@diff_info_list) {1241my$status;1242# git diff-tree -z gives an output like1243# <metadata>\0<filename1>\01244# <metadata>\0<filename2>\01245# and we've split on \0.1246my$info=shift(@diff_info_list);1247my$file=shift(@diff_info_list);1248($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1249if($statuseq"non-fast-forward") {1250# we may already have sent part of the1251# commit to MediaWiki, but it's too1252# late to cancel it. Stop the push in1253# the middle, but still give an1254# accurate error message.1255return error_non_fast_forward($remote);1256}1257if($statusne"ok") {1258die("Unknown error from mw_push_file()");1259}1260}1261unless($dumb_push) {1262 run_git("notes --ref=$remotename/mediawikiadd -f -m\"mediawiki_revision:$mw_revision\"$sha1_commit");1263 run_git("update-ref -m\"Git-MediaWiki push\"refs/mediawiki/$remotename/master$sha1_commit$sha1_child");1264}1265}12661267print STDOUT "ok$remote\n";1268return1;1269}12701271sub get_allowed_file_extensions {1272 mw_connect_maybe();12731274my$query= {1275 action =>'query',1276 meta =>'siteinfo',1277 siprop =>'fileextensions'1278};1279my$result=$mediawiki->api($query);1280my@file_extensions=map$_->{ext},@{$result->{query}->{fileextensions}};1281my%hashFile=map{$_=>1}@file_extensions;12821283return%hashFile;1284}12851286# In memory cache for MediaWiki namespace ids.1287my%namespace_id;12881289# Namespaces whose id is cached in the configuration file1290# (to avoid duplicates)1291my%cached_mw_namespace_id;12921293# Return MediaWiki id for a canonical namespace name.1294# Ex.: "File", "Project".1295sub get_mw_namespace_id {1296 mw_connect_maybe();1297my$name=shift;12981299if(!exists$namespace_id{$name}) {1300# Look at configuration file, if the record for that namespace is1301# already cached. Namespaces are stored in form:1302# "Name_of_namespace:Id_namespace", ex.: "File:6".1303my@temp=split(/[\n]/, run_git("config --get-all remote."1304.$remotename.".namespaceCache"));1305chomp(@temp);1306foreachmy$ns(@temp) {1307my($n,$id) =split(/:/,$ns);1308if($ideq'notANameSpace') {1309$namespace_id{$n} = {is_namespace =>0};1310}else{1311$namespace_id{$n} = {is_namespace =>1, id =>$id};1312}1313$cached_mw_namespace_id{$n} =1;1314}1315}13161317if(!exists$namespace_id{$name}) {1318print STDERR "Namespace$namenot found in cache, querying the wiki ...\n";1319# NS not found => get namespace id from MW and store it in1320# configuration file.1321my$query= {1322 action =>'query',1323 meta =>'siteinfo',1324 siprop =>'namespaces'1325};1326my$result=$mediawiki->api($query);13271328while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1329if(defined($ns->{id}) &&defined($ns->{canonical})) {1330$namespace_id{$ns->{canonical}} = {is_namespace =>1, id =>$ns->{id}};1331if($ns->{'*'}) {1332# alias (e.g. french Fichier: as alias for canonical File:)1333$namespace_id{$ns->{'*'}} = {is_namespace =>1, id =>$ns->{id}};1334}1335}1336}1337}13381339my$ns=$namespace_id{$name};1340my$id;13411342unless(defined$ns) {1343print STDERR "No such namespace$nameon MediaWiki.\n";1344$ns= {is_namespace =>0};1345$namespace_id{$name} =$ns;1346}13471348if($ns->{is_namespace}) {1349$id=$ns->{id};1350}13511352# Store "notANameSpace" as special value for inexisting namespaces1353my$store_id= ($id||'notANameSpace');13541355# Store explicitely requested namespaces on disk1356if(!exists$cached_mw_namespace_id{$name}) {1357 run_git("config --add remote.".$remotename1358.".namespaceCache\"".$name.":".$store_id."\"");1359$cached_mw_namespace_id{$name} =1;1360}1361return$id;1362}13631364sub get_mw_namespace_id_for_page {1365if(my($namespace) =$_[0] =~/^([^:]*):/) {1366return get_mw_namespace_id($namespace);1367}else{1368return;1369}1370}