1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/ 13 14use strict; 15use MediaWiki::API; 16use Git; 17use DateTime::Format::ISO8601; 18use warnings; 19 20# By default, use UTF-8 to communicate with Git and the user 21binmode STDERR,':encoding(UTF-8)'; 22binmode STDOUT,':encoding(UTF-8)'; 23 24use URI::Escape; 25 26# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 27useconstant SLASH_REPLACEMENT =>'%2F'; 28 29# It's not always possible to delete pages (may require some 30# privileges). Deleted pages are replaced with this content. 31useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 32 33# It's not possible to create empty pages. New empty files in Git are 34# sent with this content instead. 35useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 36 37# used to reflect file creation or deletion in diff. 38useconstant NULL_SHA1 =>'0000000000000000000000000000000000000000'; 39 40# Used on Git's side to reflect empty edit messages on the wiki 41useconstant EMPTY_MESSAGE =>'*Empty MediaWiki Message*'; 42 43useconstant EMPTY =>q{}; 44 45# Number of pages taken into account at once in submodule get_mw_page_list 46useconstant SLICE_SIZE =>50; 47 48# Number of linked mediafile to get at once in get_linked_mediafiles 49# The query is split in small batches because of the MW API limit of 50# the number of links to be returned (500 links max). 51useconstant BATCH_SIZE =>10; 52 53useconstant HTTP_CODE_OK =>200; 54 55if(@ARGV!=2) { 56 exit_error_usage(); 57} 58 59my$remotename=$ARGV[0]; 60my$url=$ARGV[1]; 61 62# Accept both space-separated and multiple keys in config file. 63# Spaces should be written as _ anyway because we'll use chomp. 64my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.${remotename}.pages")); 65chomp(@tracked_pages); 66 67# Just like @tracked_pages, but for MediaWiki categories. 68my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.${remotename}.categories")); 69chomp(@tracked_categories); 70 71# Import media files on pull 72my$import_media= run_git("config --get --bool remote.${remotename}.mediaimport"); 73chomp($import_media); 74$import_media= ($import_mediaeq'true'); 75 76# Export media files on push 77my$export_media= run_git("config --get --bool remote.${remotename}.mediaexport"); 78chomp($export_media); 79$export_media= !($export_mediaeq'false'); 80 81my$wiki_login= run_git("config --get remote.${remotename}.mwLogin"); 82# Note: mwPassword is discourraged. Use the credential system instead. 83my$wiki_passwd= run_git("config --get remote.${remotename}.mwPassword"); 84my$wiki_domain= run_git("config --get remote.${remotename}.mwDomain"); 85chomp($wiki_login); 86chomp($wiki_passwd); 87chomp($wiki_domain); 88 89# Import only last revisions (both for clone and fetch) 90my$shallow_import= run_git("config --get --bool remote.${remotename}.shallow"); 91chomp($shallow_import); 92$shallow_import= ($shallow_importeq'true'); 93 94# Fetch (clone and pull) by revisions instead of by pages. This behavior 95# is more efficient when we have a wiki with lots of pages and we fetch 96# the revisions quite often so that they concern only few pages. 97# Possible values: 98# - by_rev: perform one query per new revision on the remote wiki 99# - by_page: query each tracked page for new revision 100my$fetch_strategy= run_git("config --get remote.${remotename}.fetchStrategy"); 101if(!$fetch_strategy) { 102$fetch_strategy= run_git('config --get mediawiki.fetchStrategy'); 103} 104chomp($fetch_strategy); 105if(!$fetch_strategy) { 106$fetch_strategy='by_page'; 107} 108 109# Remember the timestamp corresponding to a revision id. 110my%basetimestamps; 111 112# Dumb push: don't update notes and mediawiki ref to reflect the last push. 113# 114# Configurable with mediawiki.dumbPush, or per-remote with 115# remote.<remotename>.dumbPush. 116# 117# This means the user will have to re-import the just-pushed 118# revisions. On the other hand, this means that the Git revisions 119# corresponding to MediaWiki revisions are all imported from the wiki, 120# regardless of whether they were initially created in Git or from the 121# web interface, hence all users will get the same history (i.e. if 122# the push from Git to MediaWiki loses some information, everybody 123# will get the history with information lost). If the import is 124# deterministic, this means everybody gets the same sha1 for each 125# MediaWiki revision. 126my$dumb_push= run_git("config --get --bool remote.${remotename}.dumbPush"); 127if(!$dumb_push) { 128$dumb_push= run_git('config --get --bool mediawiki.dumbPush'); 129} 130chomp($dumb_push); 131$dumb_push= ($dumb_pusheq'true'); 132 133my$wiki_name=$url; 134$wiki_name=~s{[^/]*://}{}; 135# If URL is like http://user:password@example.com/, we clearly don't 136# want the password in $wiki_name. While we're there, also remove user 137# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 138$wiki_name=~s/^.*@//; 139 140# Commands parser 141while(<STDIN>) { 142chomp; 143 144if(!parse_command($_)) { 145last; 146} 147 148BEGIN{ $| =1}# flush STDOUT, to make sure the previous 149# command is fully processed. 150} 151 152########################## Functions ############################## 153 154## error handling 155sub exit_error_usage { 156die"ERROR: git-remote-mediawiki module was not called with a correct number of\n". 157"parameters\n". 158"You may obtain this error because you attempted to run the git-remote-mediawiki\n". 159"module directly.\n". 160"This module can be used the following way:\n". 161"\tgit clone mediawiki://<address of a mediawiki>\n". 162"Then, use git commit, push and pull as with every normal git repository.\n"; 163} 164 165sub parse_command { 166my($line) =@_; 167my@cmd=split(/ /,$line); 168if(!defined$cmd[0]) { 169return0; 170} 171if($cmd[0]eq'capabilities') { 172die("Too many arguments for capabilities\n") 173if(defined($cmd[1])); 174 mw_capabilities(); 175}elsif($cmd[0]eq'list') { 176die("Too many arguments for list\n")if(defined($cmd[2])); 177 mw_list($cmd[1]); 178}elsif($cmd[0]eq'import') { 179die("Invalid argument for import\n") 180if($cmd[1]eq EMPTY); 181die("Too many arguments for import\n") 182if(defined($cmd[2])); 183 mw_import($cmd[1]); 184}elsif($cmd[0]eq'option') { 185die("Invalid arguments for option\n") 186if($cmd[1]eq EMPTY ||$cmd[2]eq EMPTY); 187die("Too many arguments for option\n") 188if(defined($cmd[3])); 189 mw_option($cmd[1],$cmd[2]); 190}elsif($cmd[0]eq'push') { 191 mw_push($cmd[1]); 192}else{ 193print{*STDERR}"Unknown command. Aborting...\n"; 194return0; 195} 196return1; 197} 198 199# MediaWiki API instance, created lazily. 200my$mediawiki; 201 202sub mw_connect_maybe { 203if($mediawiki) { 204return; 205} 206$mediawiki= MediaWiki::API->new; 207$mediawiki->{config}->{api_url} ="${url}/api.php"; 208if($wiki_login) { 209my%credential= ( 210'url'=>$url, 211'username'=>$wiki_login, 212'password'=>$wiki_passwd 213); 214 Git::credential(\%credential); 215my$request= {lgname =>$credential{username}, 216 lgpassword =>$credential{password}, 217 lgdomain =>$wiki_domain}; 218if($mediawiki->login($request)) { 219 Git::credential(\%credential,'approve'); 220print{*STDERR}qq(Logged in mediawiki user "$credential{username}".\n); 221}else{ 222print{*STDERR}qq(Failed to log in mediawiki user "$credential{username}" on ${url}\n); 223print{*STDERR}' (error '. 224$mediawiki->{error}->{code} .': '. 225$mediawiki->{error}->{details} .")\n"; 226 Git::credential(\%credential,'reject'); 227exit1; 228} 229} 230return; 231} 232 233sub fatal_mw_error { 234my$action=shift; 235print STDERR "fatal: could not$action.\n"; 236print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 237if($url=~/^https/) { 238print STDERR "fatal: make sure '$url/api.php' is a valid page\n"; 239print STDERR "fatal: and the SSL certificate is correct.\n"; 240}else{ 241print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 242} 243print STDERR "fatal: (error ". 244$mediawiki->{error}->{code} .': '. 245$mediawiki->{error}->{details} .")\n"; 246exit1; 247} 248 249## Functions for listing pages on the remote wiki 250sub get_mw_tracked_pages { 251my$pages=shift; 252 get_mw_page_list(\@tracked_pages,$pages); 253return; 254} 255 256sub get_mw_page_list { 257my$page_list=shift; 258my$pages=shift; 259my@some_pages= @{$page_list}; 260while(@some_pages) { 261my$last_page= SLICE_SIZE; 262if($#some_pages<$last_page) { 263$last_page=$#some_pages; 264} 265my@slice=@some_pages[0..$last_page]; 266 get_mw_first_pages(\@slice,$pages); 267@some_pages=@some_pages[(SLICE_SIZE +1)..$#some_pages]; 268} 269return; 270} 271 272sub get_mw_tracked_categories { 273my$pages=shift; 274foreachmy$category(@tracked_categories) { 275if(index($category,':') <0) { 276# Mediawiki requires the Category 277# prefix, but let's not force the user 278# to specify it. 279$category="Category:${category}"; 280} 281my$mw_pages=$mediawiki->list( { 282 action =>'query', 283 list =>'categorymembers', 284 cmtitle =>$category, 285 cmlimit =>'max'} ) 286||die$mediawiki->{error}->{code} .': ' 287.$mediawiki->{error}->{details} ."\n"; 288foreachmy$page(@{$mw_pages}) { 289$pages->{$page->{title}} =$page; 290} 291} 292return; 293} 294 295sub get_mw_all_pages { 296my$pages=shift; 297# No user-provided list, get the list of pages from the API. 298my$mw_pages=$mediawiki->list({ 299 action =>'query', 300 list =>'allpages', 301 aplimit =>'max' 302}); 303if(!defined($mw_pages)) { 304 fatal_mw_error("get the list of wiki pages"); 305} 306foreachmy$page(@{$mw_pages}) { 307$pages->{$page->{title}} =$page; 308} 309return; 310} 311 312# queries the wiki for a set of pages. Meant to be used within a loop 313# querying the wiki for slices of page list. 314sub get_mw_first_pages { 315my$some_pages=shift; 316my@some_pages= @{$some_pages}; 317 318my$pages=shift; 319 320# pattern 'page1|page2|...' required by the API 321my$titles=join('|',@some_pages); 322 323my$mw_pages=$mediawiki->api({ 324 action =>'query', 325 titles =>$titles, 326}); 327if(!defined($mw_pages)) { 328 fatal_mw_error("query the list of wiki pages"); 329} 330while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 331if($id<0) { 332print{*STDERR}"Warning: page$page->{title} not found on wiki\n"; 333}else{ 334$pages->{$page->{title}} =$page; 335} 336} 337return; 338} 339 340# Get the list of pages to be fetched according to configuration. 341sub get_mw_pages { 342 mw_connect_maybe(); 343 344print{*STDERR}"Listing pages on remote wiki...\n"; 345 346my%pages;# hash on page titles to avoid duplicates 347my$user_defined; 348if(@tracked_pages) { 349$user_defined=1; 350# The user provided a list of pages titles, but we 351# still need to query the API to get the page IDs. 352 get_mw_tracked_pages(\%pages); 353} 354if(@tracked_categories) { 355$user_defined=1; 356 get_mw_tracked_categories(\%pages); 357} 358if(!$user_defined) { 359 get_mw_all_pages(\%pages); 360} 361if($import_media) { 362print{*STDERR}"Getting media files for selected pages...\n"; 363if($user_defined) { 364 get_linked_mediafiles(\%pages); 365}else{ 366 get_all_mediafiles(\%pages); 367} 368} 369print{*STDERR} (scalar keys%pages) ." pages found.\n"; 370return%pages; 371} 372 373# usage: $out = run_git("command args"); 374# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 375sub run_git { 376my$args=shift; 377my$encoding= (shift||'encoding(UTF-8)'); 378open(my$git,"-|:${encoding}","git ${args}") 379or die"Unable to fork:$!\n"; 380my$res=do{ 381local$/=undef; 382<$git> 383}; 384close($git); 385 386return$res; 387} 388 389 390sub get_all_mediafiles { 391my$pages=shift; 392# Attach list of all pages for media files from the API, 393# they are in a different namespace, only one namespace 394# can be queried at the same moment 395my$mw_pages=$mediawiki->list({ 396 action =>'query', 397 list =>'allpages', 398 apnamespace => get_mw_namespace_id('File'), 399 aplimit =>'max' 400}); 401if(!defined($mw_pages)) { 402print{*STDERR}"fatal: could not get the list of pages for media files.\n"; 403print{*STDERR}"fatal: '$url' does not appear to be a mediawiki\n"; 404print{*STDERR}"fatal: make sure '$url/api.php' is a valid page.\n"; 405exit1; 406} 407foreachmy$page(@{$mw_pages}) { 408$pages->{$page->{title}} =$page; 409} 410return; 411} 412 413sub get_linked_mediafiles { 414my$pages=shift; 415my@titles=map{$_->{title} }values(%{$pages}); 416 417my$batch= BATCH_SIZE; 418while(@titles) { 419if($#titles<$batch) { 420$batch=$#titles; 421} 422my@slice=@titles[0..$batch]; 423 424# pattern 'page1|page2|...' required by the API 425my$mw_titles=join('|',@slice); 426 427# Media files could be included or linked from 428# a page, get all related 429my$query= { 430 action =>'query', 431 prop =>'links|images', 432 titles =>$mw_titles, 433 plnamespace => get_mw_namespace_id('File'), 434 pllimit =>'max' 435}; 436my$result=$mediawiki->api($query); 437 438while(my($id,$page) =each(%{$result->{query}->{pages}})) { 439my@media_titles; 440if(defined($page->{links})) { 441my@link_titles 442=map{$_->{title} } @{$page->{links}}; 443push(@media_titles,@link_titles); 444} 445if(defined($page->{images})) { 446my@image_titles 447=map{$_->{title} } @{$page->{images}}; 448push(@media_titles,@image_titles); 449} 450if(@media_titles) { 451 get_mw_page_list(\@media_titles,$pages); 452} 453} 454 455@titles=@titles[($batch+1)..$#titles]; 456} 457return; 458} 459 460sub get_mw_mediafile_for_page_revision { 461# Name of the file on Wiki, with the prefix. 462my$filename=shift; 463my$timestamp=shift; 464my%mediafile; 465 466# Search if on a media file with given timestamp exists on 467# MediaWiki. In that case download the file. 468my$query= { 469 action =>'query', 470 prop =>'imageinfo', 471 titles =>"File:${filename}", 472 iistart =>$timestamp, 473 iiend =>$timestamp, 474 iiprop =>'timestamp|archivename|url', 475 iilimit =>1 476}; 477my$result=$mediawiki->api($query); 478 479my($fileid,$file) =each( %{$result->{query}->{pages}} ); 480# If not defined it means there is no revision of the file for 481# given timestamp. 482if(defined($file->{imageinfo})) { 483$mediafile{title} =$filename; 484 485my$fileinfo=pop(@{$file->{imageinfo}}); 486$mediafile{timestamp} =$fileinfo->{timestamp}; 487# Mediawiki::API's download function doesn't support https URLs 488# and can't download old versions of files. 489print{*STDERR}"\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 490$mediafile{content} = download_mw_mediafile($fileinfo->{url}); 491} 492return%mediafile; 493} 494 495sub download_mw_mediafile { 496my$download_url=shift; 497 498my$response=$mediawiki->{ua}->get($download_url); 499if($response->code== HTTP_CODE_OK) { 500return$response->decoded_content; 501}else{ 502print{*STDERR}"Error downloading mediafile from :\n"; 503print{*STDERR}"URL: ${download_url}\n"; 504print{*STDERR}'Server response: '.$response->code.q{ }.$response->message."\n"; 505exit1; 506} 507} 508 509sub get_last_local_revision { 510# Get note regarding last mediawiki revision 511my$note= run_git("notes --ref=${remotename}/mediawiki show refs/mediawiki/${remotename}/master 2>/dev/null"); 512my@note_info=split(/ /,$note); 513 514my$lastrevision_number; 515if(!(defined($note_info[0]) &&$note_info[0]eq'mediawiki_revision:')) { 516print{*STDERR}'No previous mediawiki revision found'; 517$lastrevision_number=0; 518}else{ 519# Notes are formatted : mediawiki_revision: #number 520$lastrevision_number=$note_info[1]; 521chomp($lastrevision_number); 522print{*STDERR}"Last local mediawiki revision found is ${lastrevision_number}"; 523} 524return$lastrevision_number; 525} 526 527# Get the last remote revision without taking in account which pages are 528# tracked or not. This function makes a single request to the wiki thus 529# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev 530# option. 531sub get_last_global_remote_rev { 532 mw_connect_maybe(); 533 534my$query= { 535 action =>'query', 536 list =>'recentchanges', 537 prop =>'revisions', 538 rclimit =>'1', 539 rcdir =>'older', 540}; 541my$result=$mediawiki->api($query); 542return$result->{query}->{recentchanges}[0]->{revid}; 543} 544 545# Get the last remote revision concerning the tracked pages and the tracked 546# categories. 547sub get_last_remote_revision { 548 mw_connect_maybe(); 549 550my%pages_hash= get_mw_pages(); 551my@pages=values(%pages_hash); 552 553my$max_rev_num=0; 554 555print{*STDERR}"Getting last revision id on tracked pages...\n"; 556 557foreachmy$page(@pages) { 558my$id=$page->{pageid}; 559 560my$query= { 561 action =>'query', 562 prop =>'revisions', 563 rvprop =>'ids|timestamp', 564 pageids =>$id, 565}; 566 567my$result=$mediawiki->api($query); 568 569my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 570 571$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 572 573$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 574} 575 576print{*STDERR}"Last remote revision found is$max_rev_num.\n"; 577return$max_rev_num; 578} 579 580# Clean content before sending it to MediaWiki 581sub mediawiki_clean { 582my$string=shift; 583my$page_created=shift; 584# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 585# This function right trims a string and adds a \n at the end to follow this rule 586$string=~s/\s+$//; 587if($stringeq EMPTY &&$page_created) { 588# Creating empty pages is forbidden. 589$string= EMPTY_CONTENT; 590} 591return$string."\n"; 592} 593 594# Filter applied on MediaWiki data before adding them to Git 595sub mediawiki_smudge { 596my$string=shift; 597if($stringeq EMPTY_CONTENT) { 598$string= EMPTY; 599} 600# This \n is important. This is due to mediawiki's way to handle end of files. 601return"${string}\n"; 602} 603 604sub mediawiki_clean_filename { 605my$filename=shift; 606$filename=~s{@{[SLASH_REPLACEMENT]}}{/}g; 607# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 608# Do a variant of URL-encoding, i.e. looks like URL-encoding, 609# but with _ added to prevent MediaWiki from thinking this is 610# an actual special character. 611$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 612# If we use the uri escape before 613# we should unescape here, before anything 614 615return$filename; 616} 617 618sub mediawiki_smudge_filename { 619my$filename=shift; 620$filename=~s{/}{@{[SLASH_REPLACEMENT]}}g; 621$filename=~s/ /_/g; 622# Decode forbidden characters encoded in mediawiki_clean_filename 623$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf('%c', hex($1))/ge; 624return$filename; 625} 626 627sub literal_data { 628my($content) =@_; 629print{*STDOUT}'data ', bytes::length($content),"\n",$content; 630return; 631} 632 633sub literal_data_raw { 634# Output possibly binary content. 635my($content) =@_; 636# Avoid confusion between size in bytes and in characters 637 utf8::downgrade($content); 638binmode STDOUT,':raw'; 639print{*STDOUT}'data ', bytes::length($content),"\n",$content; 640binmode STDOUT,':encoding(UTF-8)'; 641return; 642} 643 644sub mw_capabilities { 645# Revisions are imported to the private namespace 646# refs/mediawiki/$remotename/ by the helper and fetched into 647# refs/remotes/$remotename later by fetch. 648print{*STDOUT}"refspec refs/heads/*:refs/mediawiki/${remotename}/*\n"; 649print{*STDOUT}"import\n"; 650print{*STDOUT}"list\n"; 651print{*STDOUT}"push\n"; 652print{*STDOUT}"\n"; 653return; 654} 655 656sub mw_list { 657# MediaWiki do not have branches, we consider one branch arbitrarily 658# called master, and HEAD pointing to it. 659print{*STDOUT}"? refs/heads/master\n"; 660print{*STDOUT}"\@refs/heads/masterHEAD\n"; 661print{*STDOUT}"\n"; 662return; 663} 664 665sub mw_option { 666print{*STDERR}"remote-helper command 'option$_[0]' not yet implemented\n"; 667print{*STDOUT}"unsupported\n"; 668return; 669} 670 671sub fetch_mw_revisions_for_page { 672my$page=shift; 673my$id=shift; 674my$fetch_from=shift; 675my@page_revs= (); 676my$query= { 677 action =>'query', 678 prop =>'revisions', 679 rvprop =>'ids', 680 rvdir =>'newer', 681 rvstartid =>$fetch_from, 682 rvlimit =>500, 683 pageids =>$id, 684}; 685 686my$revnum=0; 687# Get 500 revisions at a time due to the mediawiki api limit 688while(1) { 689my$result=$mediawiki->api($query); 690 691# Parse each of those 500 revisions 692foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 693my$page_rev_ids; 694$page_rev_ids->{pageid} =$page->{pageid}; 695$page_rev_ids->{revid} =$revision->{revid}; 696push(@page_revs,$page_rev_ids); 697$revnum++; 698} 699last if(!$result->{'query-continue'}); 700$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 701} 702if($shallow_import&&@page_revs) { 703print{*STDERR}" Found 1 revision (shallow import).\n"; 704@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 705return$page_revs[0]; 706} 707print{*STDERR}" Found ${revnum} revision(s).\n"; 708return@page_revs; 709} 710 711sub fetch_mw_revisions { 712my$pages=shift;my@pages= @{$pages}; 713my$fetch_from=shift; 714 715my@revisions= (); 716my$n=1; 717foreachmy$page(@pages) { 718my$id=$page->{pageid}; 719print{*STDERR}"page ${n}/",scalar(@pages),': ',$page->{title},"\n"; 720$n++; 721my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 722@revisions= (@page_revs,@revisions); 723} 724 725return($n,@revisions); 726} 727 728sub fe_escape_path { 729my$path=shift; 730$path=~s/\\/\\\\/g; 731$path=~s/"/\\"/g; 732$path=~s/\n/\\n/g; 733returnqq("${path}"); 734} 735 736sub import_file_revision { 737my$commit=shift; 738my%commit= %{$commit}; 739my$full_import=shift; 740my$n=shift; 741my$mediafile=shift; 742my%mediafile; 743if($mediafile) { 744%mediafile= %{$mediafile}; 745} 746 747my$title=$commit{title}; 748my$comment=$commit{comment}; 749my$content=$commit{content}; 750my$author=$commit{author}; 751my$date=$commit{date}; 752 753print{*STDOUT}"commit refs/mediawiki/${remotename}/master\n"; 754print{*STDOUT}"mark :${n}\n"; 755print{*STDOUT}"committer ${author} <${author}\@${wiki_name}> ".$date->epoch." +0000\n"; 756 literal_data($comment); 757 758# If it's not a clone, we need to know where to start from 759if(!$full_import&&$n==1) { 760print{*STDOUT}"from refs/mediawiki/${remotename}/master^0\n"; 761} 762if($contentne DELETED_CONTENT) { 763print{*STDOUT}'M 644 inline '. 764 fe_escape_path("${title}.mw") ."\n"; 765 literal_data($content); 766if(%mediafile) { 767print{*STDOUT}'M 644 inline ' 768. fe_escape_path($mediafile{title}) ."\n"; 769 literal_data_raw($mediafile{content}); 770} 771print{*STDOUT}"\n\n"; 772}else{ 773print{*STDOUT}'D '. fe_escape_path("${title}.mw") ."\n"; 774} 775 776# mediawiki revision number in the git note 777if($full_import&&$n==1) { 778print{*STDOUT}"reset refs/notes/${remotename}/mediawiki\n"; 779} 780print{*STDOUT}"commit refs/notes/${remotename}/mediawiki\n"; 781print{*STDOUT}"committer ${author} <${author}\@${wiki_name}> ".$date->epoch." +0000\n"; 782 literal_data('Note added by git-mediawiki during import'); 783if(!$full_import&&$n==1) { 784print{*STDOUT}"from refs/notes/${remotename}/mediawiki^0\n"; 785} 786print{*STDOUT}"N inline :${n}\n"; 787 literal_data("mediawiki_revision:$commit{mw_revision}"); 788print{*STDOUT}"\n\n"; 789return; 790} 791 792# parse a sequence of 793# <cmd> <arg1> 794# <cmd> <arg2> 795# \n 796# (like batch sequence of import and sequence of push statements) 797sub get_more_refs { 798my$cmd=shift; 799my@refs; 800while(1) { 801my$line= <STDIN>; 802if($line=~/^$cmd (.*)$/) { 803push(@refs,$1); 804}elsif($lineeq"\n") { 805return@refs; 806}else{ 807die("Invalid command in a '$cmd' batch:$_\n"); 808} 809} 810return; 811} 812 813sub mw_import { 814# multiple import commands can follow each other. 815my@refs= (shift, get_more_refs('import')); 816foreachmy$ref(@refs) { 817 mw_import_ref($ref); 818} 819print{*STDOUT}"done\n"; 820return; 821} 822 823sub mw_import_ref { 824my$ref=shift; 825# The remote helper will call "import HEAD" and 826# "import refs/heads/master". 827# Since HEAD is a symbolic ref to master (by convention, 828# followed by the output of the command "list" that we gave), 829# we don't need to do anything in this case. 830if($refeq'HEAD') { 831return; 832} 833 834 mw_connect_maybe(); 835 836print{*STDERR}"Searching revisions...\n"; 837my$last_local= get_last_local_revision(); 838my$fetch_from=$last_local+1; 839if($fetch_from==1) { 840print{*STDERR}", fetching from beginning.\n"; 841}else{ 842print{*STDERR}", fetching from here.\n"; 843} 844 845my$n=0; 846if($fetch_strategyeq'by_rev') { 847print{*STDERR}"Fetching & writing export data by revs...\n"; 848$n= mw_import_ref_by_revs($fetch_from); 849}elsif($fetch_strategyeq'by_page') { 850print{*STDERR}"Fetching & writing export data by pages...\n"; 851$n= mw_import_ref_by_pages($fetch_from); 852}else{ 853print{*STDERR}qq(fatal: invalid fetch strategy "${fetch_strategy}".\n); 854print{*STDERR}"Check your configuration variables remote.${remotename}.fetchStrategy and mediawiki.fetchStrategy\n"; 855exit1; 856} 857 858if($fetch_from==1&&$n==0) { 859print{*STDERR}"You appear to have cloned an empty MediaWiki.\n"; 860# Something has to be done remote-helper side. If nothing is done, an error is 861# thrown saying that HEAD is referring to unknown object 0000000000000000000 862# and the clone fails. 863} 864return; 865} 866 867sub mw_import_ref_by_pages { 868 869my$fetch_from=shift; 870my%pages_hash= get_mw_pages(); 871my@pages=values(%pages_hash); 872 873my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 874 875@revisions=sort{$a->{revid} <=>$b->{revid}}@revisions; 876my@revision_ids=map{$_->{revid} }@revisions; 877 878return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 879} 880 881sub mw_import_ref_by_revs { 882 883my$fetch_from=shift; 884my%pages_hash= get_mw_pages(); 885 886my$last_remote= get_last_global_remote_rev(); 887my@revision_ids=$fetch_from..$last_remote; 888return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 889} 890 891# Import revisions given in second argument (array of integers). 892# Only pages appearing in the third argument (hash indexed by page titles) 893# will be imported. 894sub mw_import_revids { 895my$fetch_from=shift; 896my$revision_ids=shift; 897my$pages=shift; 898 899my$n=0; 900my$n_actual=0; 901my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 902 903foreachmy$pagerevid(@{$revision_ids}) { 904# Count page even if we skip it, since we display 905# $n/$total and $total includes skipped pages. 906$n++; 907 908# fetch the content of the pages 909my$query= { 910 action =>'query', 911 prop =>'revisions', 912 rvprop =>'content|timestamp|comment|user|ids', 913 revids =>$pagerevid, 914}; 915 916my$result=$mediawiki->api($query); 917 918if(!$result) { 919die"Failed to retrieve modified page for revision$pagerevid\n"; 920} 921 922if(defined($result->{query}->{badrevids}->{$pagerevid})) { 923# The revision id does not exist on the remote wiki. 924next; 925} 926 927if(!defined($result->{query}->{pages})) { 928die"Invalid revision ${pagerevid}.\n"; 929} 930 931my@result_pages=values(%{$result->{query}->{pages}}); 932my$result_page=$result_pages[0]; 933my$rev=$result_pages[0]->{revisions}->[0]; 934 935my$page_title=$result_page->{title}; 936 937if(!exists($pages->{$page_title})) { 938print{*STDERR}"${n}/",scalar(@{$revision_ids}), 939": Skipping revision #$rev->{revid} of ${page_title}\n"; 940next; 941} 942 943$n_actual++; 944 945my%commit; 946$commit{author} =$rev->{user} ||'Anonymous'; 947$commit{comment} =$rev->{comment} || EMPTY_MESSAGE; 948$commit{title} = mediawiki_smudge_filename($page_title); 949$commit{mw_revision} =$rev->{revid}; 950$commit{content} = mediawiki_smudge($rev->{'*'}); 951 952if(!defined($rev->{timestamp})) { 953$last_timestamp++; 954}else{ 955$last_timestamp=$rev->{timestamp}; 956} 957$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 958 959# Differentiates classic pages and media files. 960my($namespace,$filename) =$page_title=~/^([^:]*):(.*)$/; 961my%mediafile; 962if($namespace) { 963my$id= get_mw_namespace_id($namespace); 964if($id&&$id== get_mw_namespace_id('File')) { 965%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 966} 967} 968# If this is a revision of the media page for new version 969# of a file do one common commit for both file and media page. 970# Else do commit only for that page. 971print{*STDERR}"${n}/",scalar(@{$revision_ids}),": Revision #$rev->{revid} of$commit{title}\n"; 972 import_file_revision(\%commit, ($fetch_from==1),$n_actual, \%mediafile); 973} 974 975return$n_actual; 976} 977 978sub error_non_fast_forward { 979my$advice= run_git('config --bool advice.pushNonFastForward'); 980chomp($advice); 981if($advicene'false') { 982# Native git-push would show this after the summary. 983# We can't ask it to display it cleanly, so print it 984# ourselves before. 985print{*STDERR}"To prevent you from losing history, non-fast-forward updates were rejected\n"; 986print{*STDERR}"Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 987print{*STDERR}"'Note about fast-forwards' section of 'git push --help' for details.\n"; 988} 989print{*STDOUT}qq(error$_[0] "non-fast-forward"\n); 990return0; 991} 992 993sub mw_upload_file { 994my$complete_file_name=shift; 995my$new_sha1=shift; 996my$extension=shift; 997my$file_deleted=shift; 998my$summary=shift; 999my$newrevid;1000my$path="File:${complete_file_name}";1001my%hashFiles= get_allowed_file_extensions();1002if(!exists($hashFiles{$extension})) {1003print{*STDERR}"${complete_file_name} is not a permitted file on this wiki.\n";1004print{*STDERR}"Check the configuration of file uploads in your mediawiki.\n";1005return$newrevid;1006}1007# Deleting and uploading a file requires a priviledged user1008if($file_deleted) {1009 mw_connect_maybe();1010my$query= {1011 action =>'delete',1012 title =>$path,1013 reason =>$summary1014};1015if(!$mediawiki->edit($query)) {1016print{*STDERR}"Failed to delete file on remote wiki\n";1017print{*STDERR}"Check your permissions on the remote site. Error code:\n";1018print{*STDERR}$mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details};1019exit1;1020}1021}else{1022# Don't let perl try to interpret file content as UTF-8 => use "raw"1023my$content= run_git("cat-file blob ${new_sha1}",'raw');1024if($contentne EMPTY) {1025 mw_connect_maybe();1026$mediawiki->{config}->{upload_url} =1027"${url}/index.php/Special:Upload";1028$mediawiki->edit({1029 action =>'upload',1030 filename =>$complete_file_name,1031 comment =>$summary,1032 file => [undef,1033$complete_file_name,1034 Content =>$content],1035 ignorewarnings =>1,1036}, {1037 skip_encoding =>11038} ) ||die$mediawiki->{error}->{code} .':'1039.$mediawiki->{error}->{details} ."\n";1040my$last_file_page=$mediawiki->get_page({title =>$path});1041$newrevid=$last_file_page->{revid};1042print{*STDERR}"Pushed file: ${new_sha1} - ${complete_file_name}.\n";1043}else{1044print{*STDERR}"Empty file ${complete_file_name} not pushed.\n";1045}1046}1047return$newrevid;1048}10491050sub mw_push_file {1051my$diff_info=shift;1052# $diff_info contains a string in this format:1053# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status>1054my@diff_info_split=split(/[ \t]/,$diff_info);10551056# Filename, including .mw extension1057my$complete_file_name=shift;1058# Commit message1059my$summary=shift;1060# MediaWiki revision number. Keep the previous one by default,1061# in case there's no edit to perform.1062my$oldrevid=shift;1063my$newrevid;10641065if($summaryeq EMPTY_MESSAGE) {1066$summary= EMPTY;1067}10681069my$new_sha1=$diff_info_split[3];1070my$old_sha1=$diff_info_split[2];1071my$page_created= ($old_sha1eq NULL_SHA1);1072my$page_deleted= ($new_sha1eq NULL_SHA1);1073$complete_file_name= mediawiki_clean_filename($complete_file_name);10741075my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/;1076if(!defined($extension)) {1077$extension= EMPTY;1078}1079if($extensioneq'mw') {1080my$ns= get_mw_namespace_id_for_page($complete_file_name);1081if($ns&&$ns== get_mw_namespace_id('File') && (!$export_media)) {1082print{*STDERR}"Ignoring media file related page: ${complete_file_name}\n";1083return($oldrevid,'ok');1084}1085my$file_content;1086if($page_deleted) {1087# Deleting a page usually requires1088# special privileges. A common1089# convention is to replace the page1090# with this content instead:1091$file_content= DELETED_CONTENT;1092}else{1093$file_content= run_git("cat-file blob ${new_sha1}");1094}10951096 mw_connect_maybe();10971098my$result=$mediawiki->edit( {1099 action =>'edit',1100 summary =>$summary,1101 title =>$title,1102 basetimestamp =>$basetimestamps{$oldrevid},1103 text => mediawiki_clean($file_content,$page_created),1104}, {1105 skip_encoding =>1# Helps with names with accentuated characters1106});1107if(!$result) {1108if($mediawiki->{error}->{code} ==3) {1109# edit conflicts, considered as non-fast-forward1110print{*STDERR}'Warning: Error '.1111$mediawiki->{error}->{code} .1112' from mediawiki: '.$mediawiki->{error}->{details} .1113".\n";1114return($oldrevid,'non-fast-forward');1115}else{1116# Other errors. Shouldn't happen => just die()1117die'Fatal: Error '.1118$mediawiki->{error}->{code} .1119' from mediawiki: '.$mediawiki->{error}->{details} ."\n";1120}1121}1122$newrevid=$result->{edit}->{newrevid};1123print{*STDERR}"Pushed file: ${new_sha1} - ${title}\n";1124}elsif($export_media) {1125$newrevid= mw_upload_file($complete_file_name,$new_sha1,1126$extension,$page_deleted,1127$summary);1128}else{1129print{*STDERR}"Ignoring media file ${title}\n";1130}1131$newrevid= ($newrevidor$oldrevid);1132return($newrevid,'ok');1133}11341135sub mw_push {1136# multiple push statements can follow each other1137my@refsspecs= (shift, get_more_refs('push'));1138my$pushed;1139formy$refspec(@refsspecs) {1140my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1141or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>\n");1142if($force) {1143print{*STDERR}"Warning: forced push not allowed on a MediaWiki.\n";1144}1145if($localeq EMPTY) {1146print{*STDERR}"Cannot delete remote branch on a MediaWiki\n";1147print{*STDOUT}"error ${remote} cannot delete\n";1148next;1149}1150if($remotene'refs/heads/master') {1151print{*STDERR}"Only push to the branch 'master' is supported on a MediaWiki\n";1152print{*STDOUT}"error ${remote} only master allowed\n";1153next;1154}1155if(mw_push_revision($local,$remote)) {1156$pushed=1;1157}1158}11591160# Notify Git that the push is done1161print{*STDOUT}"\n";11621163if($pushed&&$dumb_push) {1164print{*STDERR}"Just pushed some revisions to MediaWiki.\n";1165print{*STDERR}"The pushed revisions now have to be re-imported, and your current branch\n";1166print{*STDERR}"needs to be updated with these re-imported commits. You can do this with\n";1167print{*STDERR}"\n";1168print{*STDERR}" git pull --rebase\n";1169print{*STDERR}"\n";1170}1171return;1172}11731174sub mw_push_revision {1175my$local=shift;1176my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1177my$last_local_revid= get_last_local_revision();1178print{*STDERR}".\n";# Finish sentence started by get_last_local_revision()1179my$last_remote_revid= get_last_remote_revision();1180my$mw_revision=$last_remote_revid;11811182# Get sha1 of commit pointed by local HEAD1183my$HEAD_sha1= run_git("rev-parse ${local} 2>/dev/null");1184chomp($HEAD_sha1);1185# Get sha1 of commit pointed by remotes/$remotename/master1186my$remoteorigin_sha1= run_git("rev-parse refs/remotes/${remotename}/master 2>/dev/null");1187chomp($remoteorigin_sha1);11881189if($last_local_revid>0&&1190$last_local_revid<$last_remote_revid) {1191return error_non_fast_forward($remote);1192}11931194if($HEAD_sha1eq$remoteorigin_sha1) {1195# nothing to push1196return0;1197}11981199# Get every commit in between HEAD and refs/remotes/origin/master,1200# including HEAD and refs/remotes/origin/master1201my@commit_pairs= ();1202if($last_local_revid>0) {1203my$parsed_sha1=$remoteorigin_sha1;1204# Find a path from last MediaWiki commit to pushed commit1205print{*STDERR}"Computing path from local to remote ...\n";1206my@local_ancestry=split(/\n/, run_git("rev-list --boundary --parents ${local} ^${parsed_sha1}"));1207my%local_ancestry;1208foreachmy$line(@local_ancestry) {1209if(my($child,$parents) =$line=~/^-?([a-f0-9]+) ([a-f0-9 ]+)/) {1210foreachmy$parent(split(/ /,$parents)) {1211$local_ancestry{$parent} =$child;1212}1213}elsif(!$line=~/^([a-f0-9]+)/) {1214die"Unexpected output from git rev-list: ${line}\n";1215}1216}1217while($parsed_sha1ne$HEAD_sha1) {1218my$child=$local_ancestry{$parsed_sha1};1219if(!$child) {1220print{*STDERR}"Cannot find a path in history from remote commit to last commit\n";1221return error_non_fast_forward($remote);1222}1223push(@commit_pairs, [$parsed_sha1,$child]);1224$parsed_sha1=$child;1225}1226}else{1227# No remote mediawiki revision. Export the whole1228# history (linearized with --first-parent)1229print{*STDERR}"Warning: no common ancestor, pushing complete history\n";1230my$history= run_git("rev-list --first-parent --children ${local}");1231my@history=split(/\n/,$history);1232@history=@history[1..$#history];1233foreachmy$line(reverse@history) {1234my@commit_info_split=split(/[ \n]/,$line);1235push(@commit_pairs, \@commit_info_split);1236}1237}12381239foreachmy$commit_info_split(@commit_pairs) {1240my$sha1_child= @{$commit_info_split}[0];1241my$sha1_commit= @{$commit_info_split}[1];1242my$diff_infos= run_git("diff-tree -r --raw -z ${sha1_child} ${sha1_commit}");1243# TODO: we could detect rename, and encode them with a #redirect on the wiki.1244# TODO: for now, it's just a delete+add1245my@diff_info_list=split(/\0/,$diff_infos);1246# Keep the subject line of the commit message as mediawiki comment for the revision1247my$commit_msg= run_git(qq(log --no-walk --format="%s" ${sha1_commit}));1248chomp($commit_msg);1249# Push every blob1250while(@diff_info_list) {1251my$status;1252# git diff-tree -z gives an output like1253# <metadata>\0<filename1>\01254# <metadata>\0<filename2>\01255# and we've split on \0.1256my$info=shift(@diff_info_list);1257my$file=shift(@diff_info_list);1258($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1259if($statuseq'non-fast-forward') {1260# we may already have sent part of the1261# commit to MediaWiki, but it's too1262# late to cancel it. Stop the push in1263# the middle, but still give an1264# accurate error message.1265return error_non_fast_forward($remote);1266}1267if($statusne'ok') {1268die("Unknown error from mw_push_file()\n");1269}1270}1271if(!$dumb_push) {1272 run_git(qq(notes --ref=${remotename}/mediawiki add -f -m "mediawiki_revision: ${mw_revision}" ${sha1_commit}));1273 run_git(qq(update-ref -m "Git-MediaWiki push" refs/mediawiki/${remotename}/master ${sha1_commit} ${sha1_child}));1274}1275}12761277print{*STDOUT}"ok ${remote}\n";1278return1;1279}12801281sub get_allowed_file_extensions {1282 mw_connect_maybe();12831284my$query= {1285 action =>'query',1286 meta =>'siteinfo',1287 siprop =>'fileextensions'1288};1289my$result=$mediawiki->api($query);1290my@file_extensions=map{$_->{ext}} @{$result->{query}->{fileextensions}};1291my%hashFile=map{$_=>1}@file_extensions;12921293return%hashFile;1294}12951296# In memory cache for MediaWiki namespace ids.1297my%namespace_id;12981299# Namespaces whose id is cached in the configuration file1300# (to avoid duplicates)1301my%cached_mw_namespace_id;13021303# Return MediaWiki id for a canonical namespace name.1304# Ex.: "File", "Project".1305sub get_mw_namespace_id {1306 mw_connect_maybe();1307my$name=shift;13081309if(!exists$namespace_id{$name}) {1310# Look at configuration file, if the record for that namespace is1311# already cached. Namespaces are stored in form:1312# "Name_of_namespace:Id_namespace", ex.: "File:6".1313my@temp=split(/\n/,1314 run_git("config --get-all remote.${remotename}.namespaceCache"));1315chomp(@temp);1316foreachmy$ns(@temp) {1317my($n,$id) =split(/:/,$ns);1318if($ideq'notANameSpace') {1319$namespace_id{$n} = {is_namespace =>0};1320}else{1321$namespace_id{$n} = {is_namespace =>1, id =>$id};1322}1323$cached_mw_namespace_id{$n} =1;1324}1325}13261327if(!exists$namespace_id{$name}) {1328print{*STDERR}"Namespace ${name} not found in cache, querying the wiki ...\n";1329# NS not found => get namespace id from MW and store it in1330# configuration file.1331my$query= {1332 action =>'query',1333 meta =>'siteinfo',1334 siprop =>'namespaces'1335};1336my$result=$mediawiki->api($query);13371338while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1339if(defined($ns->{id}) &&defined($ns->{canonical})) {1340$namespace_id{$ns->{canonical}} = {is_namespace =>1, id =>$ns->{id}};1341if($ns->{'*'}) {1342# alias (e.g. french Fichier: as alias for canonical File:)1343$namespace_id{$ns->{'*'}} = {is_namespace =>1, id =>$ns->{id}};1344}1345}1346}1347}13481349my$ns=$namespace_id{$name};1350my$id;13511352if(!defined$ns) {1353print{*STDERR}"No such namespace ${name} on MediaWiki.\n";1354$ns= {is_namespace =>0};1355$namespace_id{$name} =$ns;1356}13571358if($ns->{is_namespace}) {1359$id=$ns->{id};1360}13611362# Store "notANameSpace" as special value for inexisting namespaces1363my$store_id= ($id||'notANameSpace');13641365# Store explicitely requested namespaces on disk1366if(!exists$cached_mw_namespace_id{$name}) {1367 run_git(qq(config --add remote.${remotename}.namespaceCache "${name}:${store_id}"));1368$cached_mw_namespace_id{$name} =1;1369}1370return$id;1371}13721373sub get_mw_namespace_id_for_page {1374my$namespace=shift;1375if($namespace=~/^([^:]*):/) {1376return get_mw_namespace_id($namespace);1377}else{1378return;1379}1380}