1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/ 13 14use strict; 15use MediaWiki::API; 16use Git; 17use DateTime::Format::ISO8601; 18use warnings; 19 20# By default, use UTF-8 to communicate with Git and the user 21binmode STDERR,":encoding(UTF-8)"; 22binmode STDOUT,":encoding(UTF-8)"; 23 24use URI::Escape; 25 26# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 27useconstant SLASH_REPLACEMENT =>"%2F"; 28 29# It's not always possible to delete pages (may require some 30# privileges). Deleted pages are replaced with this content. 31useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 32 33# It's not possible to create empty pages. New empty files in Git are 34# sent with this content instead. 35useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 36 37# used to reflect file creation or deletion in diff. 38useconstant NULL_SHA1 =>"0000000000000000000000000000000000000000"; 39 40# Used on Git's side to reflect empty edit messages on the wiki 41useconstant EMPTY_MESSAGE =>'*Empty MediaWiki Message*'; 42 43my$remotename=$ARGV[0]; 44my$url=$ARGV[1]; 45 46# Accept both space-separated and multiple keys in config file. 47# Spaces should be written as _ anyway because we'll use chomp. 48my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".pages")); 49chomp(@tracked_pages); 50 51# Just like @tracked_pages, but for MediaWiki categories. 52my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".categories")); 53chomp(@tracked_categories); 54 55# Import media files on pull 56my$import_media= run_git("config --get --bool remote.".$remotename.".mediaimport"); 57chomp($import_media); 58$import_media= ($import_mediaeq"true"); 59 60# Export media files on push 61my$export_media= run_git("config --get --bool remote.".$remotename.".mediaexport"); 62chomp($export_media); 63$export_media= !($export_mediaeq"false"); 64 65my$wiki_login= run_git("config --get remote.".$remotename.".mwLogin"); 66# Note: mwPassword is discourraged. Use the credential system instead. 67my$wiki_passwd= run_git("config --get remote.".$remotename.".mwPassword"); 68my$wiki_domain= run_git("config --get remote.".$remotename.".mwDomain"); 69chomp($wiki_login); 70chomp($wiki_passwd); 71chomp($wiki_domain); 72 73# Import only last revisions (both for clone and fetch) 74my$shallow_import= run_git("config --get --bool remote.".$remotename.".shallow"); 75chomp($shallow_import); 76$shallow_import= ($shallow_importeq"true"); 77 78# Fetch (clone and pull) by revisions instead of by pages. This behavior 79# is more efficient when we have a wiki with lots of pages and we fetch 80# the revisions quite often so that they concern only few pages. 81# Possible values: 82# - by_rev: perform one query per new revision on the remote wiki 83# - by_page: query each tracked page for new revision 84my$fetch_strategy= run_git("config --get remote.$remotename.fetchStrategy"); 85unless($fetch_strategy) { 86$fetch_strategy= run_git("config --get mediawiki.fetchStrategy"); 87} 88chomp($fetch_strategy); 89unless($fetch_strategy) { 90$fetch_strategy="by_page"; 91} 92 93# Remember the timestamp corresponding to a revision id. 94my%basetimestamps; 95 96# Dumb push: don't update notes and mediawiki ref to reflect the last push. 97# 98# Configurable with mediawiki.dumbPush, or per-remote with 99# remote.<remotename>.dumbPush. 100# 101# This means the user will have to re-import the just-pushed 102# revisions. On the other hand, this means that the Git revisions 103# corresponding to MediaWiki revisions are all imported from the wiki, 104# regardless of whether they were initially created in Git or from the 105# web interface, hence all users will get the same history (i.e. if 106# the push from Git to MediaWiki loses some information, everybody 107# will get the history with information lost). If the import is 108# deterministic, this means everybody gets the same sha1 for each 109# MediaWiki revision. 110my$dumb_push= run_git("config --get --bool remote.$remotename.dumbPush"); 111unless($dumb_push) { 112$dumb_push= run_git("config --get --bool mediawiki.dumbPush"); 113} 114chomp($dumb_push); 115$dumb_push= ($dumb_pusheq"true"); 116 117my$wiki_name=$url; 118$wiki_name=~s{[^/]*://}{}; 119# If URL is like http://user:password@example.com/, we clearly don't 120# want the password in $wiki_name. While we're there, also remove user 121# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 122$wiki_name=~s/^.*@//; 123 124# Commands parser 125my@cmd; 126while(<STDIN>) { 127chomp; 128@cmd=split(/ /); 129if(defined($cmd[0])) { 130# Line not blank 131if($cmd[0]eq"capabilities") { 132die("Too many arguments for capabilities\n")if(defined($cmd[1])); 133 mw_capabilities(); 134}elsif($cmd[0]eq"list") { 135die("Too many arguments for list\n")if(defined($cmd[2])); 136 mw_list($cmd[1]); 137}elsif($cmd[0]eq"import") { 138die("Invalid arguments for import\n")if($cmd[1]eq""||defined($cmd[2])); 139 mw_import($cmd[1]); 140}elsif($cmd[0]eq"option") { 141die("Too many arguments for option\n")if($cmd[1]eq""||$cmd[2]eq""||defined($cmd[3])); 142 mw_option($cmd[1],$cmd[2]); 143}elsif($cmd[0]eq"push") { 144 mw_push($cmd[1]); 145}else{ 146print STDERR "Unknown command. Aborting...\n"; 147last; 148} 149}else{ 150# blank line: we should terminate 151last; 152} 153 154BEGIN{ $| =1}# flush STDOUT, to make sure the previous 155# command is fully processed. 156} 157 158########################## Functions ############################## 159 160# MediaWiki API instance, created lazily. 161my$mediawiki; 162 163sub mw_connect_maybe { 164if($mediawiki) { 165return; 166} 167$mediawiki= MediaWiki::API->new; 168$mediawiki->{config}->{api_url} ="$url/api.php"; 169if($wiki_login) { 170my%credential= ( 171'url'=>$url, 172'username'=>$wiki_login, 173'password'=>$wiki_passwd 174); 175 Git::credential(\%credential); 176my$request= {lgname =>$credential{username}, 177 lgpassword =>$credential{password}, 178 lgdomain =>$wiki_domain}; 179if($mediawiki->login($request)) { 180 Git::credential(\%credential,'approve'); 181print STDERR "Logged in mediawiki user\"$credential{username}\".\n"; 182}else{ 183print STDERR "Failed to log in mediawiki user\"$credential{username}\"on$url\n"; 184print STDERR " (error ". 185$mediawiki->{error}->{code} .': '. 186$mediawiki->{error}->{details} .")\n"; 187 Git::credential(\%credential,'reject'); 188exit1; 189} 190} 191return; 192} 193 194sub fatal_mw_error { 195my$action=shift; 196print STDERR "fatal: could not$action.\n"; 197print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 198if($url=~/^https/) { 199print STDERR "fatal: make sure '$url/api.php' is a valid page\n"; 200print STDERR "fatal: and the SSL certificate is correct.\n"; 201}else{ 202print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 203} 204print STDERR "fatal: (error ". 205$mediawiki->{error}->{code} .': '. 206$mediawiki->{error}->{details} .")\n"; 207exit1; 208} 209 210## Functions for listing pages on the remote wiki 211sub get_mw_tracked_pages { 212my$pages=shift; 213 get_mw_page_list(\@tracked_pages,$pages); 214return; 215} 216 217sub get_mw_page_list { 218my$page_list=shift; 219my$pages=shift; 220my@some_pages=@$page_list; 221while(@some_pages) { 222my$last_page=50; 223if($#some_pages<$last_page) { 224$last_page=$#some_pages; 225} 226my@slice=@some_pages[0..$last_page]; 227 get_mw_first_pages(\@slice,$pages); 228@some_pages=@some_pages[51..$#some_pages]; 229} 230return; 231} 232 233sub get_mw_tracked_categories { 234my$pages=shift; 235foreachmy$category(@tracked_categories) { 236if(index($category,':') <0) { 237# Mediawiki requires the Category 238# prefix, but let's not force the user 239# to specify it. 240$category="Category:".$category; 241} 242my$mw_pages=$mediawiki->list( { 243 action =>'query', 244 list =>'categorymembers', 245 cmtitle =>$category, 246 cmlimit =>'max'} ) 247||die$mediawiki->{error}->{code} .': ' 248.$mediawiki->{error}->{details} ."\n"; 249foreachmy$page(@{$mw_pages}) { 250$pages->{$page->{title}} =$page; 251} 252} 253return; 254} 255 256sub get_mw_all_pages { 257my$pages=shift; 258# No user-provided list, get the list of pages from the API. 259my$mw_pages=$mediawiki->list({ 260 action =>'query', 261 list =>'allpages', 262 aplimit =>'max' 263}); 264if(!defined($mw_pages)) { 265 fatal_mw_error("get the list of wiki pages"); 266} 267foreachmy$page(@{$mw_pages}) { 268$pages->{$page->{title}} =$page; 269} 270return; 271} 272 273# queries the wiki for a set of pages. Meant to be used within a loop 274# querying the wiki for slices of page list. 275sub get_mw_first_pages { 276my$some_pages=shift; 277my@some_pages= @{$some_pages}; 278 279my$pages=shift; 280 281# pattern 'page1|page2|...' required by the API 282my$titles=join('|',@some_pages); 283 284my$mw_pages=$mediawiki->api({ 285 action =>'query', 286 titles =>$titles, 287}); 288if(!defined($mw_pages)) { 289 fatal_mw_error("query the list of wiki pages"); 290} 291while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 292if($id<0) { 293print STDERR "Warning: page$page->{title} not found on wiki\n"; 294}else{ 295$pages->{$page->{title}} =$page; 296} 297} 298return; 299} 300 301# Get the list of pages to be fetched according to configuration. 302sub get_mw_pages { 303 mw_connect_maybe(); 304 305print STDERR "Listing pages on remote wiki...\n"; 306 307my%pages;# hash on page titles to avoid duplicates 308my$user_defined; 309if(@tracked_pages) { 310$user_defined=1; 311# The user provided a list of pages titles, but we 312# still need to query the API to get the page IDs. 313 get_mw_tracked_pages(\%pages); 314} 315if(@tracked_categories) { 316$user_defined=1; 317 get_mw_tracked_categories(\%pages); 318} 319if(!$user_defined) { 320 get_mw_all_pages(\%pages); 321} 322if($import_media) { 323print STDERR "Getting media files for selected pages...\n"; 324if($user_defined) { 325 get_linked_mediafiles(\%pages); 326}else{ 327 get_all_mediafiles(\%pages); 328} 329} 330print STDERR (scalar keys%pages) ." pages found.\n"; 331return%pages; 332} 333 334# usage: $out = run_git("command args"); 335# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 336sub run_git { 337my$args=shift; 338my$encoding= (shift||"encoding(UTF-8)"); 339open(my$git,"-|:$encoding","git ".$args) 340or die"Unable to open:$!\n"; 341my$res=do{ 342local$/=undef; 343<$git> 344}; 345close($git); 346 347return$res; 348} 349 350 351sub get_all_mediafiles { 352my$pages=shift; 353# Attach list of all pages for media files from the API, 354# they are in a different namespace, only one namespace 355# can be queried at the same moment 356my$mw_pages=$mediawiki->list({ 357 action =>'query', 358 list =>'allpages', 359 apnamespace => get_mw_namespace_id("File"), 360 aplimit =>'max' 361}); 362if(!defined($mw_pages)) { 363print STDERR "fatal: could not get the list of pages for media files.\n"; 364print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 365print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 366exit1; 367} 368foreachmy$page(@{$mw_pages}) { 369$pages->{$page->{title}} =$page; 370} 371return; 372} 373 374sub get_linked_mediafiles { 375my$pages=shift; 376my@titles=map{$_->{title} }values(%{$pages}); 377 378# The query is split in small batches because of the MW API limit of 379# the number of links to be returned (500 links max). 380my$batch=10; 381while(@titles) { 382if($#titles<$batch) { 383$batch=$#titles; 384} 385my@slice=@titles[0..$batch]; 386 387# pattern 'page1|page2|...' required by the API 388my$mw_titles=join('|',@slice); 389 390# Media files could be included or linked from 391# a page, get all related 392my$query= { 393 action =>'query', 394 prop =>'links|images', 395 titles =>$mw_titles, 396 plnamespace => get_mw_namespace_id("File"), 397 pllimit =>'max' 398}; 399my$result=$mediawiki->api($query); 400 401while(my($id,$page) =each(%{$result->{query}->{pages}})) { 402my@media_titles; 403if(defined($page->{links})) { 404my@link_titles 405=map{$_->{title} } @{$page->{links}}; 406push(@media_titles,@link_titles); 407} 408if(defined($page->{images})) { 409my@image_titles 410=map{$_->{title} } @{$page->{images}}; 411push(@media_titles,@image_titles); 412} 413if(@media_titles) { 414 get_mw_page_list(\@media_titles,$pages); 415} 416} 417 418@titles=@titles[($batch+1)..$#titles]; 419} 420return; 421} 422 423sub get_mw_mediafile_for_page_revision { 424# Name of the file on Wiki, with the prefix. 425my$filename=shift; 426my$timestamp=shift; 427my%mediafile; 428 429# Search if on a media file with given timestamp exists on 430# MediaWiki. In that case download the file. 431my$query= { 432 action =>'query', 433 prop =>'imageinfo', 434 titles =>"File:".$filename, 435 iistart =>$timestamp, 436 iiend =>$timestamp, 437 iiprop =>'timestamp|archivename|url', 438 iilimit =>1 439}; 440my$result=$mediawiki->api($query); 441 442my($fileid,$file) =each( %{$result->{query}->{pages}} ); 443# If not defined it means there is no revision of the file for 444# given timestamp. 445if(defined($file->{imageinfo})) { 446$mediafile{title} =$filename; 447 448my$fileinfo=pop(@{$file->{imageinfo}}); 449$mediafile{timestamp} =$fileinfo->{timestamp}; 450# Mediawiki::API's download function doesn't support https URLs 451# and can't download old versions of files. 452print STDERR "\tDownloading file$mediafile{title}, version$mediafile{timestamp}\n"; 453$mediafile{content} = download_mw_mediafile($fileinfo->{url}); 454} 455return%mediafile; 456} 457 458sub download_mw_mediafile { 459my$download_url=shift; 460 461my$response=$mediawiki->{ua}->get($download_url); 462if($response->code==200) { 463return$response->decoded_content; 464}else{ 465print STDERR "Error downloading mediafile from :\n"; 466print STDERR "URL:$download_url\n"; 467print STDERR "Server response: ".$response->code." ".$response->message."\n"; 468exit1; 469} 470} 471 472sub get_last_local_revision { 473# Get note regarding last mediawiki revision 474my$note= run_git("notes --ref=$remotename/mediawikishow refs/mediawiki/$remotename/master2>/dev/null"); 475my@note_info=split(/ /,$note); 476 477my$lastrevision_number; 478if(!(defined($note_info[0]) &&$note_info[0]eq"mediawiki_revision:")) { 479print STDERR "No previous mediawiki revision found"; 480$lastrevision_number=0; 481}else{ 482# Notes are formatted : mediawiki_revision: #number 483$lastrevision_number=$note_info[1]; 484chomp($lastrevision_number); 485print STDERR "Last local mediawiki revision found is$lastrevision_number"; 486} 487return$lastrevision_number; 488} 489 490# Get the last remote revision without taking in account which pages are 491# tracked or not. This function makes a single request to the wiki thus 492# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev 493# option. 494sub get_last_global_remote_rev { 495 mw_connect_maybe(); 496 497my$query= { 498 action =>'query', 499 list =>'recentchanges', 500 prop =>'revisions', 501 rclimit =>'1', 502 rcdir =>'older', 503}; 504my$result=$mediawiki->api($query); 505return$result->{query}->{recentchanges}[0]->{revid}; 506} 507 508# Get the last remote revision concerning the tracked pages and the tracked 509# categories. 510sub get_last_remote_revision { 511 mw_connect_maybe(); 512 513my%pages_hash= get_mw_pages(); 514my@pages=values(%pages_hash); 515 516my$max_rev_num=0; 517 518print STDERR "Getting last revision id on tracked pages...\n"; 519 520foreachmy$page(@pages) { 521my$id=$page->{pageid}; 522 523my$query= { 524 action =>'query', 525 prop =>'revisions', 526 rvprop =>'ids|timestamp', 527 pageids =>$id, 528}; 529 530my$result=$mediawiki->api($query); 531 532my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 533 534$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 535 536$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 537} 538 539print STDERR "Last remote revision found is$max_rev_num.\n"; 540return$max_rev_num; 541} 542 543# Clean content before sending it to MediaWiki 544sub mediawiki_clean { 545my$string=shift; 546my$page_created=shift; 547# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 548# This function right trims a string and adds a \n at the end to follow this rule 549$string=~s/\s+$//; 550if($stringeq""&&$page_created) { 551# Creating empty pages is forbidden. 552$string= EMPTY_CONTENT; 553} 554return$string."\n"; 555} 556 557# Filter applied on MediaWiki data before adding them to Git 558sub mediawiki_smudge { 559my$string=shift; 560if($stringeq EMPTY_CONTENT) { 561$string=""; 562} 563# This \n is important. This is due to mediawiki's way to handle end of files. 564return$string."\n"; 565} 566 567sub mediawiki_clean_filename { 568my$filename=shift; 569$filename=~s{@{[SLASH_REPLACEMENT]}}{/}g; 570# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 571# Do a variant of URL-encoding, i.e. looks like URL-encoding, 572# but with _ added to prevent MediaWiki from thinking this is 573# an actual special character. 574$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 575# If we use the uri escape before 576# we should unescape here, before anything 577 578return$filename; 579} 580 581sub mediawiki_smudge_filename { 582my$filename=shift; 583$filename=~s{/}{@{[SLASH_REPLACEMENT]}}g; 584$filename=~s/ /_/g; 585# Decode forbidden characters encoded in mediawiki_clean_filename 586$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; 587return$filename; 588} 589 590sub literal_data { 591my($content) =@_; 592print STDOUT "data ", bytes::length($content),"\n",$content; 593return; 594} 595 596sub literal_data_raw { 597# Output possibly binary content. 598my($content) =@_; 599# Avoid confusion between size in bytes and in characters 600 utf8::downgrade($content); 601binmode STDOUT,":raw"; 602print STDOUT "data ", bytes::length($content),"\n",$content; 603binmode STDOUT,":encoding(UTF-8)"; 604return; 605} 606 607sub mw_capabilities { 608# Revisions are imported to the private namespace 609# refs/mediawiki/$remotename/ by the helper and fetched into 610# refs/remotes/$remotename later by fetch. 611print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; 612print STDOUT "import\n"; 613print STDOUT "list\n"; 614print STDOUT "push\n"; 615print STDOUT "\n"; 616return; 617} 618 619sub mw_list { 620# MediaWiki do not have branches, we consider one branch arbitrarily 621# called master, and HEAD pointing to it. 622print STDOUT "? refs/heads/master\n"; 623print STDOUT "\@refs/heads/masterHEAD\n"; 624print STDOUT "\n"; 625return; 626} 627 628sub mw_option { 629print STDERR "remote-helper command 'option$_[0]' not yet implemented\n"; 630print STDOUT "unsupported\n"; 631return; 632} 633 634sub fetch_mw_revisions_for_page { 635my$page=shift; 636my$id=shift; 637my$fetch_from=shift; 638my@page_revs= (); 639my$query= { 640 action =>'query', 641 prop =>'revisions', 642 rvprop =>'ids', 643 rvdir =>'newer', 644 rvstartid =>$fetch_from, 645 rvlimit =>500, 646 pageids =>$id, 647}; 648 649my$revnum=0; 650# Get 500 revisions at a time due to the mediawiki api limit 651while(1) { 652my$result=$mediawiki->api($query); 653 654# Parse each of those 500 revisions 655foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 656my$page_rev_ids; 657$page_rev_ids->{pageid} =$page->{pageid}; 658$page_rev_ids->{revid} =$revision->{revid}; 659push(@page_revs,$page_rev_ids); 660$revnum++; 661} 662last unless$result->{'query-continue'}; 663$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 664} 665if($shallow_import&&@page_revs) { 666print STDERR " Found 1 revision (shallow import).\n"; 667@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 668return$page_revs[0]; 669} 670print STDERR " Found ",$revnum," revision(s).\n"; 671return@page_revs; 672} 673 674sub fetch_mw_revisions { 675my$pages=shift;my@pages= @{$pages}; 676my$fetch_from=shift; 677 678my@revisions= (); 679my$n=1; 680foreachmy$page(@pages) { 681my$id=$page->{pageid}; 682 683print STDERR "page$n/",scalar(@pages),": ".$page->{title} ."\n"; 684$n++; 685my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 686@revisions= (@page_revs,@revisions); 687} 688 689return($n,@revisions); 690} 691 692sub fe_escape_path { 693my$path=shift; 694$path=~s/\\/\\\\/g; 695$path=~s/"/\\"/g; 696$path=~s/\n/\\n/g; 697return'"'.$path.'"'; 698} 699 700sub import_file_revision { 701my$commit=shift; 702my%commit= %{$commit}; 703my$full_import=shift; 704my$n=shift; 705my$mediafile=shift; 706my%mediafile; 707if($mediafile) { 708%mediafile= %{$mediafile}; 709} 710 711my$title=$commit{title}; 712my$comment=$commit{comment}; 713my$content=$commit{content}; 714my$author=$commit{author}; 715my$date=$commit{date}; 716 717print STDOUT "commit refs/mediawiki/$remotename/master\n"; 718print STDOUT "mark :$n\n"; 719print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 720 literal_data($comment); 721 722# If it's not a clone, we need to know where to start from 723if(!$full_import&&$n==1) { 724print STDOUT "from refs/mediawiki/$remotename/master^0\n"; 725} 726if($contentne DELETED_CONTENT) { 727print STDOUT "M 644 inline ". 728 fe_escape_path($title.".mw") ."\n"; 729 literal_data($content); 730if(%mediafile) { 731print STDOUT "M 644 inline " 732. fe_escape_path($mediafile{title}) ."\n"; 733 literal_data_raw($mediafile{content}); 734} 735print STDOUT "\n\n"; 736}else{ 737print STDOUT "D ". fe_escape_path($title.".mw") ."\n"; 738} 739 740# mediawiki revision number in the git note 741if($full_import&&$n==1) { 742print STDOUT "reset refs/notes/$remotename/mediawiki\n"; 743} 744print STDOUT "commit refs/notes/$remotename/mediawiki\n"; 745print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 746 literal_data("Note added by git-mediawiki during import"); 747if(!$full_import&&$n==1) { 748print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; 749} 750print STDOUT "N inline :$n\n"; 751 literal_data("mediawiki_revision: ".$commit{mw_revision}); 752print STDOUT "\n\n"; 753return; 754} 755 756# parse a sequence of 757# <cmd> <arg1> 758# <cmd> <arg2> 759# \n 760# (like batch sequence of import and sequence of push statements) 761sub get_more_refs { 762my$cmd=shift; 763my@refs; 764while(1) { 765my$line= <STDIN>; 766if($line=~/^$cmd (.*)$/) { 767push(@refs,$1); 768}elsif($lineeq"\n") { 769return@refs; 770}else{ 771die("Invalid command in a '$cmd' batch:$_\n"); 772} 773} 774return; 775} 776 777sub mw_import { 778# multiple import commands can follow each other. 779my@refs= (shift, get_more_refs("import")); 780foreachmy$ref(@refs) { 781 mw_import_ref($ref); 782} 783print STDOUT "done\n"; 784return; 785} 786 787sub mw_import_ref { 788my$ref=shift; 789# The remote helper will call "import HEAD" and 790# "import refs/heads/master". 791# Since HEAD is a symbolic ref to master (by convention, 792# followed by the output of the command "list" that we gave), 793# we don't need to do anything in this case. 794if($refeq"HEAD") { 795return; 796} 797 798 mw_connect_maybe(); 799 800print STDERR "Searching revisions...\n"; 801my$last_local= get_last_local_revision(); 802my$fetch_from=$last_local+1; 803if($fetch_from==1) { 804print STDERR ", fetching from beginning.\n"; 805}else{ 806print STDERR ", fetching from here.\n"; 807} 808 809my$n=0; 810if($fetch_strategyeq"by_rev") { 811print STDERR "Fetching & writing export data by revs...\n"; 812$n= mw_import_ref_by_revs($fetch_from); 813}elsif($fetch_strategyeq"by_page") { 814print STDERR "Fetching & writing export data by pages...\n"; 815$n= mw_import_ref_by_pages($fetch_from); 816}else{ 817print STDERR "fatal: invalid fetch strategy\"$fetch_strategy\".\n"; 818print STDERR "Check your configuration variables remote.$remotename.fetchStrategy and mediawiki.fetchStrategy\n"; 819exit1; 820} 821 822if($fetch_from==1&&$n==0) { 823print STDERR "You appear to have cloned an empty MediaWiki.\n"; 824# Something has to be done remote-helper side. If nothing is done, an error is 825# thrown saying that HEAD is referring to unknown object 0000000000000000000 826# and the clone fails. 827} 828return; 829} 830 831sub mw_import_ref_by_pages { 832 833my$fetch_from=shift; 834my%pages_hash= get_mw_pages(); 835my@pages=values(%pages_hash); 836 837my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 838 839@revisions=sort{$a->{revid} <=>$b->{revid}}@revisions; 840my@revision_ids=map{$_->{revid} }@revisions; 841 842return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 843} 844 845sub mw_import_ref_by_revs { 846 847my$fetch_from=shift; 848my%pages_hash= get_mw_pages(); 849 850my$last_remote= get_last_global_remote_rev(); 851my@revision_ids=$fetch_from..$last_remote; 852return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); 853} 854 855# Import revisions given in second argument (array of integers). 856# Only pages appearing in the third argument (hash indexed by page titles) 857# will be imported. 858sub mw_import_revids { 859my$fetch_from=shift; 860my$revision_ids=shift; 861my$pages=shift; 862 863my$n=0; 864my$n_actual=0; 865my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 866 867foreachmy$pagerevid(@$revision_ids) { 868# Count page even if we skip it, since we display 869# $n/$total and $total includes skipped pages. 870$n++; 871 872# fetch the content of the pages 873my$query= { 874 action =>'query', 875 prop =>'revisions', 876 rvprop =>'content|timestamp|comment|user|ids', 877 revids =>$pagerevid, 878}; 879 880my$result=$mediawiki->api($query); 881 882if(!$result) { 883die"Failed to retrieve modified page for revision$pagerevid\n"; 884} 885 886if(defined($result->{query}->{badrevids}->{$pagerevid})) { 887# The revision id does not exist on the remote wiki. 888next; 889} 890 891if(!defined($result->{query}->{pages})) { 892die"Invalid revision$pagerevid.\n"; 893} 894 895my@result_pages=values(%{$result->{query}->{pages}}); 896my$result_page=$result_pages[0]; 897my$rev=$result_pages[0]->{revisions}->[0]; 898 899my$page_title=$result_page->{title}; 900 901if(!exists($pages->{$page_title})) { 902print STDERR "$n/",scalar(@$revision_ids), 903": Skipping revision #$rev->{revid} of$page_title\n"; 904next; 905} 906 907$n_actual++; 908 909my%commit; 910$commit{author} =$rev->{user} ||'Anonymous'; 911$commit{comment} =$rev->{comment} || EMPTY_MESSAGE; 912$commit{title} = mediawiki_smudge_filename($page_title); 913$commit{mw_revision} =$rev->{revid}; 914$commit{content} = mediawiki_smudge($rev->{'*'}); 915 916if(!defined($rev->{timestamp})) { 917$last_timestamp++; 918}else{ 919$last_timestamp=$rev->{timestamp}; 920} 921$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 922 923# Differentiates classic pages and media files. 924my($namespace,$filename) =$page_title=~/^([^:]*):(.*)$/; 925my%mediafile; 926if($namespace) { 927my$id= get_mw_namespace_id($namespace); 928if($id&&$id== get_mw_namespace_id("File")) { 929%mediafile= get_mw_mediafile_for_page_revision($filename,$rev->{timestamp}); 930} 931} 932# If this is a revision of the media page for new version 933# of a file do one common commit for both file and media page. 934# Else do commit only for that page. 935print STDERR "$n/",scalar(@$revision_ids),": Revision #$rev->{revid} of$commit{title}\n"; 936 import_file_revision(\%commit, ($fetch_from==1),$n_actual, \%mediafile); 937} 938 939return$n_actual; 940} 941 942sub error_non_fast_forward { 943my$advice= run_git("config --bool advice.pushNonFastForward"); 944chomp($advice); 945if($advicene"false") { 946# Native git-push would show this after the summary. 947# We can't ask it to display it cleanly, so print it 948# ourselves before. 949print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; 950print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 951print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; 952} 953print STDOUT "error$_[0]\"non-fast-forward\"\n"; 954return0; 955} 956 957sub mw_upload_file { 958my$complete_file_name=shift; 959my$new_sha1=shift; 960my$extension=shift; 961my$file_deleted=shift; 962my$summary=shift; 963my$newrevid; 964my$path="File:".$complete_file_name; 965my%hashFiles= get_allowed_file_extensions(); 966if(!exists($hashFiles{$extension})) { 967print STDERR "$complete_file_nameis not a permitted file on this wiki.\n"; 968print STDERR "Check the configuration of file uploads in your mediawiki.\n"; 969return$newrevid; 970} 971# Deleting and uploading a file requires a priviledged user 972if($file_deleted) { 973 mw_connect_maybe(); 974my$query= { 975 action =>'delete', 976 title =>$path, 977 reason =>$summary 978}; 979if(!$mediawiki->edit($query)) { 980print STDERR "Failed to delete file on remote wiki\n"; 981print STDERR "Check your permissions on the remote site. Error code:\n"; 982print STDERR $mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details}; 983exit1; 984} 985}else{ 986# Don't let perl try to interpret file content as UTF-8 => use "raw" 987my$content= run_git("cat-file blob$new_sha1","raw"); 988if($contentne"") { 989 mw_connect_maybe(); 990$mediawiki->{config}->{upload_url} = 991"$url/index.php/Special:Upload"; 992$mediawiki->edit({ 993 action =>'upload', 994 filename =>$complete_file_name, 995 comment =>$summary, 996 file => [undef, 997$complete_file_name, 998 Content =>$content], 999 ignorewarnings =>1,1000}, {1001 skip_encoding =>11002} ) ||die$mediawiki->{error}->{code} .':'1003.$mediawiki->{error}->{details} ."\n";1004my$last_file_page=$mediawiki->get_page({title =>$path});1005$newrevid=$last_file_page->{revid};1006print STDERR "Pushed file:$new_sha1-$complete_file_name.\n";1007}else{1008print STDERR "Empty file$complete_file_namenot pushed.\n";1009}1010}1011return$newrevid;1012}10131014sub mw_push_file {1015my$diff_info=shift;1016# $diff_info contains a string in this format:1017# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status>1018my@diff_info_split=split(/[ \t]/,$diff_info);10191020# Filename, including .mw extension1021my$complete_file_name=shift;1022# Commit message1023my$summary=shift;1024# MediaWiki revision number. Keep the previous one by default,1025# in case there's no edit to perform.1026my$oldrevid=shift;1027my$newrevid;10281029if($summaryeq EMPTY_MESSAGE) {1030$summary='';1031}10321033my$new_sha1=$diff_info_split[3];1034my$old_sha1=$diff_info_split[2];1035my$page_created= ($old_sha1eq NULL_SHA1);1036my$page_deleted= ($new_sha1eq NULL_SHA1);1037$complete_file_name= mediawiki_clean_filename($complete_file_name);10381039my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/;1040if(!defined($extension)) {1041$extension="";1042}1043if($extensioneq"mw") {1044my$ns= get_mw_namespace_id_for_page($complete_file_name);1045if($ns&&$ns== get_mw_namespace_id("File") && (!$export_media)) {1046print STDERR "Ignoring media file related page:$complete_file_name\n";1047return($oldrevid,"ok");1048}1049my$file_content;1050if($page_deleted) {1051# Deleting a page usually requires1052# special privileges. A common1053# convention is to replace the page1054# with this content instead:1055$file_content= DELETED_CONTENT;1056}else{1057$file_content= run_git("cat-file blob$new_sha1");1058}10591060 mw_connect_maybe();10611062my$result=$mediawiki->edit( {1063 action =>'edit',1064 summary =>$summary,1065 title =>$title,1066 basetimestamp =>$basetimestamps{$oldrevid},1067 text => mediawiki_clean($file_content,$page_created),1068}, {1069 skip_encoding =>1# Helps with names with accentuated characters1070});1071if(!$result) {1072if($mediawiki->{error}->{code} ==3) {1073# edit conflicts, considered as non-fast-forward1074print STDERR 'Warning: Error '.1075$mediawiki->{error}->{code} .1076' from mediwiki: '.$mediawiki->{error}->{details} .1077".\n";1078return($oldrevid,"non-fast-forward");1079}else{1080# Other errors. Shouldn't happen => just die()1081die'Fatal: Error '.1082$mediawiki->{error}->{code} .1083' from mediwiki: '.$mediawiki->{error}->{details} ."\n";1084}1085}1086$newrevid=$result->{edit}->{newrevid};1087print STDERR "Pushed file:$new_sha1-$title\n";1088}elsif($export_media) {1089$newrevid= mw_upload_file($complete_file_name,$new_sha1,1090$extension,$page_deleted,1091$summary);1092}else{1093print STDERR "Ignoring media file$title\n";1094}1095$newrevid= ($newrevidor$oldrevid);1096return($newrevid,"ok");1097}10981099sub mw_push {1100# multiple push statements can follow each other1101my@refsspecs= (shift, get_more_refs("push"));1102my$pushed;1103formy$refspec(@refsspecs) {1104my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/1105or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>\n");1106if($force) {1107print STDERR "Warning: forced push not allowed on a MediaWiki.\n";1108}1109if($localeq"") {1110print STDERR "Cannot delete remote branch on a MediaWiki\n";1111print STDOUT "error$remotecannot delete\n";1112next;1113}1114if($remotene"refs/heads/master") {1115print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n";1116print STDOUT "error$remoteonly master allowed\n";1117next;1118}1119if(mw_push_revision($local,$remote)) {1120$pushed=1;1121}1122}11231124# Notify Git that the push is done1125print STDOUT "\n";11261127if($pushed&&$dumb_push) {1128print STDERR "Just pushed some revisions to MediaWiki.\n";1129print STDERR "The pushed revisions now have to be re-imported, and your current branch\n";1130print STDERR "needs to be updated with these re-imported commits. You can do this with\n";1131print STDERR "\n";1132print STDERR " git pull --rebase\n";1133print STDERR "\n";1134}1135return;1136}11371138sub mw_push_revision {1139my$local=shift;1140my$remote=shift;# actually, this has to be "refs/heads/master" at this point.1141my$last_local_revid= get_last_local_revision();1142print STDERR ".\n";# Finish sentence started by get_last_local_revision()1143my$last_remote_revid= get_last_remote_revision();1144my$mw_revision=$last_remote_revid;11451146# Get sha1 of commit pointed by local HEAD1147my$HEAD_sha1= run_git("rev-parse$local2>/dev/null");chomp($HEAD_sha1);1148# Get sha1 of commit pointed by remotes/$remotename/master1149my$remoteorigin_sha1= run_git("rev-parse refs/remotes/$remotename/master2>/dev/null");1150chomp($remoteorigin_sha1);11511152if($last_local_revid>0&&1153$last_local_revid<$last_remote_revid) {1154return error_non_fast_forward($remote);1155}11561157if($HEAD_sha1eq$remoteorigin_sha1) {1158# nothing to push1159return0;1160}11611162# Get every commit in between HEAD and refs/remotes/origin/master,1163# including HEAD and refs/remotes/origin/master1164my@commit_pairs= ();1165if($last_local_revid>0) {1166my$parsed_sha1=$remoteorigin_sha1;1167# Find a path from last MediaWiki commit to pushed commit1168print STDERR "Computing path from local to remote ...\n";1169my@local_ancestry=split(/\n/, run_git("rev-list --boundary --parents$local^$parsed_sha1"));1170my%local_ancestry;1171foreachmy$line(@local_ancestry) {1172if(my($child,$parents) =$line=~/^-?([a-f0-9]+) ([a-f0-9 ]+)/) {1173foreachmy$parent(split(/ /,$parents)) {1174$local_ancestry{$parent} =$child;1175}1176}elsif(!$line=~/^([a-f0-9]+)/) {1177die"Unexpected output from git rev-list:$line\n";1178}1179}1180while($parsed_sha1ne$HEAD_sha1) {1181my$child=$local_ancestry{$parsed_sha1};1182if(!$child) {1183printf STDERR "Cannot find a path in history from remote commit to last commit\n";1184return error_non_fast_forward($remote);1185}1186push(@commit_pairs, [$parsed_sha1,$child]);1187$parsed_sha1=$child;1188}1189}else{1190# No remote mediawiki revision. Export the whole1191# history (linearized with --first-parent)1192print STDERR "Warning: no common ancestor, pushing complete history\n";1193my$history= run_git("rev-list --first-parent --children$local");1194my@history=split(/\n/,$history);1195@history=@history[1..$#history];1196foreachmy$line(reverse@history) {1197my@commit_info_split=split(/[ \n]/,$line);1198push(@commit_pairs, \@commit_info_split);1199}1200}12011202foreachmy$commit_info_split(@commit_pairs) {1203my$sha1_child= @{$commit_info_split}[0];1204my$sha1_commit= @{$commit_info_split}[1];1205my$diff_infos= run_git("diff-tree -r --raw -z$sha1_child$sha1_commit");1206# TODO: we could detect rename, and encode them with a #redirect on the wiki.1207# TODO: for now, it's just a delete+add1208my@diff_info_list=split(/\0/,$diff_infos);1209# Keep the subject line of the commit message as mediawiki comment for the revision1210my$commit_msg= run_git("log --no-walk --format=\"%s\"$sha1_commit");1211chomp($commit_msg);1212# Push every blob1213while(@diff_info_list) {1214my$status;1215# git diff-tree -z gives an output like1216# <metadata>\0<filename1>\01217# <metadata>\0<filename2>\01218# and we've split on \0.1219my$info=shift(@diff_info_list);1220my$file=shift(@diff_info_list);1221($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision);1222if($statuseq"non-fast-forward") {1223# we may already have sent part of the1224# commit to MediaWiki, but it's too1225# late to cancel it. Stop the push in1226# the middle, but still give an1227# accurate error message.1228return error_non_fast_forward($remote);1229}1230if($statusne"ok") {1231die("Unknown error from mw_push_file()\n");1232}1233}1234unless($dumb_push) {1235 run_git("notes --ref=$remotename/mediawikiadd -f -m\"mediawiki_revision:$mw_revision\"$sha1_commit");1236 run_git("update-ref -m\"Git-MediaWiki push\"refs/mediawiki/$remotename/master$sha1_commit$sha1_child");1237}1238}12391240print STDOUT "ok$remote\n";1241return1;1242}12431244sub get_allowed_file_extensions {1245 mw_connect_maybe();12461247my$query= {1248 action =>'query',1249 meta =>'siteinfo',1250 siprop =>'fileextensions'1251};1252my$result=$mediawiki->api($query);1253my@file_extensions=map{$_->{ext}} @{$result->{query}->{fileextensions}};1254my%hashFile=map{$_=>1}@file_extensions;12551256return%hashFile;1257}12581259# In memory cache for MediaWiki namespace ids.1260my%namespace_id;12611262# Namespaces whose id is cached in the configuration file1263# (to avoid duplicates)1264my%cached_mw_namespace_id;12651266# Return MediaWiki id for a canonical namespace name.1267# Ex.: "File", "Project".1268sub get_mw_namespace_id {1269 mw_connect_maybe();1270my$name=shift;12711272if(!exists$namespace_id{$name}) {1273# Look at configuration file, if the record for that namespace is1274# already cached. Namespaces are stored in form:1275# "Name_of_namespace:Id_namespace", ex.: "File:6".1276my@temp=split(/\n/, run_git("config --get-all remote."1277.$remotename.".namespaceCache"));1278chomp(@temp);1279foreachmy$ns(@temp) {1280my($n,$id) =split(/:/,$ns);1281if($ideq'notANameSpace') {1282$namespace_id{$n} = {is_namespace =>0};1283}else{1284$namespace_id{$n} = {is_namespace =>1, id =>$id};1285}1286$cached_mw_namespace_id{$n} =1;1287}1288}12891290if(!exists$namespace_id{$name}) {1291print STDERR "Namespace$namenot found in cache, querying the wiki ...\n";1292# NS not found => get namespace id from MW and store it in1293# configuration file.1294my$query= {1295 action =>'query',1296 meta =>'siteinfo',1297 siprop =>'namespaces'1298};1299my$result=$mediawiki->api($query);13001301while(my($id,$ns) =each(%{$result->{query}->{namespaces}})) {1302if(defined($ns->{id}) &&defined($ns->{canonical})) {1303$namespace_id{$ns->{canonical}} = {is_namespace =>1, id =>$ns->{id}};1304if($ns->{'*'}) {1305# alias (e.g. french Fichier: as alias for canonical File:)1306$namespace_id{$ns->{'*'}} = {is_namespace =>1, id =>$ns->{id}};1307}1308}1309}1310}13111312my$ns=$namespace_id{$name};1313my$id;13141315unless(defined$ns) {1316print STDERR "No such namespace$nameon MediaWiki.\n";1317$ns= {is_namespace =>0};1318$namespace_id{$name} =$ns;1319}13201321if($ns->{is_namespace}) {1322$id=$ns->{id};1323}13241325# Store "notANameSpace" as special value for inexisting namespaces1326my$store_id= ($id||'notANameSpace');13271328# Store explicitely requested namespaces on disk1329if(!exists$cached_mw_namespace_id{$name}) {1330 run_git("config --add remote.".$remotename1331.".namespaceCache\"".$name.":".$store_id."\"");1332$cached_mw_namespace_id{$name} =1;1333}1334return$id;1335}13361337sub get_mw_namespace_id_for_page {1338my$namespace=shift;1339if($namespace=~/^([^:]*):/) {1340return get_mw_namespace_id($namespace);1341}else{1342return;1343}1344}