1#! /usr/bin/perl 2 3# Copyright (C) 2011 4# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr> 5# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr> 6# Claire Fousse <claire.fousse@ensimag.imag.fr> 7# David Amouyal <david.amouyal@ensimag.imag.fr> 8# Matthieu Moy <matthieu.moy@grenoble-inp.fr> 9# License: GPL v2 or later 10 11# Gateway between Git and MediaWiki. 12# https://github.com/Bibzball/Git-Mediawiki/wiki 13# 14# Known limitations: 15# 16# - Only wiki pages are managed, no support for [[File:...]] 17# attachments. 18# 19# - Poor performance in the best case: it takes forever to check 20# whether we're up-to-date (on fetch or push) or to fetch a few 21# revisions from a large wiki, because we use exclusively a 22# page-based synchronization. We could switch to a wiki-wide 23# synchronization when the synchronization involves few revisions 24# but the wiki is large. 25# 26# - Git renames could be turned into MediaWiki renames (see TODO 27# below) 28# 29# - login/password support requires the user to write the password 30# cleartext in a file (see TODO below). 31# 32# - No way to import "one page, and all pages included in it" 33# 34# - Multiple remote MediaWikis have not been very well tested. 35 36use strict; 37use MediaWiki::API; 38use DateTime::Format::ISO8601; 39 40# By default, use UTF-8 to communicate with Git and the user 41binmode STDERR,":utf8"; 42binmode STDOUT,":utf8"; 43 44use URI::Escape; 45use IPC::Open2; 46 47use warnings; 48 49# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced 50useconstant SLASH_REPLACEMENT =>"%2F"; 51 52# It's not always possible to delete pages (may require some 53# priviledges). Deleted pages are replaced with this content. 54useconstant DELETED_CONTENT =>"[[Category:Deleted]]\n"; 55 56# It's not possible to create empty pages. New empty files in Git are 57# sent with this content instead. 58useconstant EMPTY_CONTENT =>"<!-- empty page -->\n"; 59 60# used to reflect file creation or deletion in diff. 61useconstant NULL_SHA1 =>"0000000000000000000000000000000000000000"; 62 63my$remotename=$ARGV[0]; 64my$url=$ARGV[1]; 65 66# Accept both space-separated and multiple keys in config file. 67# Spaces should be written as _ anyway because we'll use chomp. 68my@tracked_pages=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".pages")); 69chomp(@tracked_pages); 70 71# Just like @tracked_pages, but for MediaWiki categories. 72my@tracked_categories=split(/[ \n]/, run_git("config --get-all remote.".$remotename.".categories")); 73chomp(@tracked_categories); 74 75my$wiki_login= run_git("config --get remote.".$remotename.".mwLogin"); 76# TODO: ideally, this should be able to read from keyboard, but we're 77# inside a remote helper, so our stdin is connect to git, not to a 78# terminal. 79my$wiki_passwd= run_git("config --get remote.".$remotename.".mwPassword"); 80my$wiki_domain= run_git("config --get remote.".$remotename.".mwDomain"); 81chomp($wiki_login); 82chomp($wiki_passwd); 83chomp($wiki_domain); 84 85# Import only last revisions (both for clone and fetch) 86my$shallow_import= run_git("config --get --bool remote.".$remotename.".shallow"); 87chomp($shallow_import); 88$shallow_import= ($shallow_importeq"true"); 89 90# Dumb push: don't update notes and mediawiki ref to reflect the last push. 91# 92# Configurable with mediawiki.dumbPush, or per-remote with 93# remote.<remotename>.dumbPush. 94# 95# This means the user will have to re-import the just-pushed 96# revisions. On the other hand, this means that the Git revisions 97# corresponding to MediaWiki revisions are all imported from the wiki, 98# regardless of whether they were initially created in Git or from the 99# web interface, hence all users will get the same history (i.e. if 100# the push from Git to MediaWiki loses some information, everybody 101# will get the history with information lost). If the import is 102# deterministic, this means everybody gets the same sha1 for each 103# MediaWiki revision. 104my$dumb_push= run_git("config --get --bool remote.$remotename.dumbPush"); 105unless($dumb_push) { 106$dumb_push= run_git("config --get --bool mediawiki.dumbPush"); 107} 108chomp($dumb_push); 109$dumb_push= ($dumb_pusheq"true"); 110 111my$wiki_name=$url; 112$wiki_name=~s/[^\/]*:\/\///; 113# If URL is like http://user:password@example.com/, we clearly don't 114# want the password in $wiki_name. While we're there, also remove user 115# and '@' sign, to avoid author like MWUser@HTTPUser@host.com 116$wiki_name=~s/^.*@//; 117 118# Commands parser 119my$entry; 120my@cmd; 121while(<STDIN>) { 122chomp; 123@cmd=split(/ /); 124if(defined($cmd[0])) { 125# Line not blank 126if($cmd[0]eq"capabilities") { 127die("Too many arguments for capabilities")unless(!defined($cmd[1])); 128 mw_capabilities(); 129}elsif($cmd[0]eq"list") { 130die("Too many arguments for list")unless(!defined($cmd[2])); 131 mw_list($cmd[1]); 132}elsif($cmd[0]eq"import") { 133die("Invalid arguments for import")unless($cmd[1]ne""&& !defined($cmd[2])); 134 mw_import($cmd[1]); 135}elsif($cmd[0]eq"option") { 136die("Too many arguments for option")unless($cmd[1]ne""&&$cmd[2]ne""&& !defined($cmd[3])); 137 mw_option($cmd[1],$cmd[2]); 138}elsif($cmd[0]eq"push") { 139 mw_push($cmd[1]); 140}else{ 141print STDERR "Unknown command. Aborting...\n"; 142last; 143} 144}else{ 145# blank line: we should terminate 146last; 147} 148 149BEGIN{ $| =1}# flush STDOUT, to make sure the previous 150# command is fully processed. 151} 152 153########################## Functions ############################## 154 155## credential API management (generic functions) 156 157sub credential_from_url { 158my$url=shift; 159my$parsed= URI->new($url); 160my%credential; 161 162if($parsed->scheme) { 163$credential{protocol} =$parsed->scheme; 164} 165if($parsed->host) { 166$credential{host} =$parsed->host; 167} 168if($parsed->path) { 169$credential{path} =$parsed->path; 170} 171if($parsed->userinfo) { 172if($parsed->userinfo=~/([^:]*):(.*)/) { 173$credential{username} =$1; 174$credential{password} =$2; 175}else{ 176$credential{username} =$parsed->userinfo; 177} 178} 179 180return%credential; 181} 182 183sub credential_read { 184my%credential; 185my$reader=shift; 186my$op=shift; 187while(<$reader>) { 188my($key,$value) =/([^=]*)=(.*)/; 189if(not defined$key) { 190die"ERROR receiving response from git credential$op:\n$_\n"; 191} 192$credential{$key} =$value; 193} 194return%credential; 195} 196 197sub credential_write { 198my$credential=shift; 199my$writer=shift; 200while(my($key,$value) =each(%$credential) ) { 201if($value) { 202print$writer"$key=$value\n"; 203} 204} 205} 206 207sub credential_run { 208my$op=shift; 209my$credential=shift; 210my$pid= open2(my$reader,my$writer,"git credential$op"); 211 credential_write($credential,$writer); 212print$writer"\n"; 213close($writer); 214 215if($opeq"fill") { 216%$credential= credential_read($reader,$op); 217}else{ 218if(<$reader>) { 219die"ERROR while running git credential$op:\n$_"; 220} 221} 222close($reader); 223waitpid($pid,0); 224my$child_exit_status=$?>>8; 225if($child_exit_status!=0) { 226die"'git credential$op' failed with code$child_exit_status."; 227} 228} 229 230# MediaWiki API instance, created lazily. 231my$mediawiki; 232 233sub mw_connect_maybe { 234if($mediawiki) { 235return; 236} 237$mediawiki= MediaWiki::API->new; 238$mediawiki->{config}->{api_url} ="$url/api.php"; 239if($wiki_login) { 240my%credential= credential_from_url($url); 241$credential{username} =$wiki_login; 242$credential{password} =$wiki_passwd; 243 credential_run("fill", \%credential); 244my$request= {lgname =>$credential{username}, 245 lgpassword =>$credential{password}, 246 lgdomain =>$wiki_domain}; 247if($mediawiki->login($request)) { 248 credential_run("approve", \%credential); 249print STDERR "Logged in mediawiki user\"$credential{username}\".\n"; 250}else{ 251print STDERR "Failed to log in mediawiki user\"$credential{username}\"on$url\n"; 252print STDERR " (error ". 253$mediawiki->{error}->{code} .': '. 254$mediawiki->{error}->{details} .")\n"; 255 credential_run("reject", \%credential); 256exit1; 257} 258} 259} 260 261## Functions for listing pages on the remote wiki 262sub get_mw_tracked_pages { 263my$pages=shift; 264my@some_pages=@tracked_pages; 265while(@some_pages) { 266my$last=50; 267if($#some_pages<$last) { 268$last=$#some_pages; 269} 270my@slice=@some_pages[0..$last]; 271 get_mw_first_pages(\@slice,$pages); 272@some_pages=@some_pages[51..$#some_pages]; 273} 274} 275 276sub get_mw_tracked_categories { 277my$pages=shift; 278foreachmy$category(@tracked_categories) { 279if(index($category,':') <0) { 280# Mediawiki requires the Category 281# prefix, but let's not force the user 282# to specify it. 283$category="Category:".$category; 284} 285my$mw_pages=$mediawiki->list( { 286 action =>'query', 287 list =>'categorymembers', 288 cmtitle =>$category, 289 cmlimit =>'max'} ) 290||die$mediawiki->{error}->{code} .': ' 291.$mediawiki->{error}->{details}; 292foreachmy$page(@{$mw_pages}) { 293$pages->{$page->{title}} =$page; 294} 295} 296} 297 298sub get_mw_all_pages { 299my$pages=shift; 300# No user-provided list, get the list of pages from the API. 301my$mw_pages=$mediawiki->list({ 302 action =>'query', 303 list =>'allpages', 304 aplimit =>'max' 305}); 306if(!defined($mw_pages)) { 307print STDERR "fatal: could not get the list of wiki pages.\n"; 308print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 309print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 310exit1; 311} 312foreachmy$page(@{$mw_pages}) { 313$pages->{$page->{title}} =$page; 314} 315} 316 317# queries the wiki for a set of pages. Meant to be used within a loop 318# querying the wiki for slices of page list. 319sub get_mw_first_pages { 320my$some_pages=shift; 321my@some_pages= @{$some_pages}; 322 323my$pages=shift; 324 325# pattern 'page1|page2|...' required by the API 326my$titles=join('|',@some_pages); 327 328my$mw_pages=$mediawiki->api({ 329 action =>'query', 330 titles =>$titles, 331}); 332if(!defined($mw_pages)) { 333print STDERR "fatal: could not query the list of wiki pages.\n"; 334print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; 335print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; 336exit1; 337} 338while(my($id,$page) =each(%{$mw_pages->{query}->{pages}})) { 339if($id<0) { 340print STDERR "Warning: page$page->{title} not found on wiki\n"; 341}else{ 342$pages->{$page->{title}} =$page; 343} 344} 345} 346 347# Get the list of pages to be fetched according to configuration. 348sub get_mw_pages { 349 mw_connect_maybe(); 350 351my%pages;# hash on page titles to avoid duplicates 352my$user_defined; 353if(@tracked_pages) { 354$user_defined=1; 355# The user provided a list of pages titles, but we 356# still need to query the API to get the page IDs. 357 get_mw_tracked_pages(\%pages); 358} 359if(@tracked_categories) { 360$user_defined=1; 361 get_mw_tracked_categories(\%pages); 362} 363if(!$user_defined) { 364 get_mw_all_pages(\%pages); 365} 366returnvalues(%pages); 367} 368 369# usage: $out = run_git("command args"); 370# $out = run_git("command args", "raw"); # don't interpret output as UTF-8. 371sub run_git { 372my$args=shift; 373my$encoding= (shift||"encoding(UTF-8)"); 374open(my$git,"-|:$encoding","git ".$args); 375my$res=do{local$/; <$git> }; 376close($git); 377 378return$res; 379} 380 381 382sub get_last_local_revision { 383# Get note regarding last mediawiki revision 384my$note= run_git("notes --ref=$remotename/mediawikishow refs/mediawiki/$remotename/master2>/dev/null"); 385my@note_info=split(/ /,$note); 386 387my$lastrevision_number; 388if(!(defined($note_info[0]) &&$note_info[0]eq"mediawiki_revision:")) { 389print STDERR "No previous mediawiki revision found"; 390$lastrevision_number=0; 391}else{ 392# Notes are formatted : mediawiki_revision: #number 393$lastrevision_number=$note_info[1]; 394chomp($lastrevision_number); 395print STDERR "Last local mediawiki revision found is$lastrevision_number"; 396} 397return$lastrevision_number; 398} 399 400# Remember the timestamp corresponding to a revision id. 401my%basetimestamps; 402 403sub get_last_remote_revision { 404 mw_connect_maybe(); 405 406my@pages= get_mw_pages(); 407 408my$max_rev_num=0; 409 410foreachmy$page(@pages) { 411my$id=$page->{pageid}; 412 413my$query= { 414 action =>'query', 415 prop =>'revisions', 416 rvprop =>'ids|timestamp', 417 pageids =>$id, 418}; 419 420my$result=$mediawiki->api($query); 421 422my$lastrev=pop(@{$result->{query}->{pages}->{$id}->{revisions}}); 423 424$basetimestamps{$lastrev->{revid}} =$lastrev->{timestamp}; 425 426$max_rev_num= ($lastrev->{revid} >$max_rev_num?$lastrev->{revid} :$max_rev_num); 427} 428 429print STDERR "Last remote revision found is$max_rev_num.\n"; 430return$max_rev_num; 431} 432 433# Clean content before sending it to MediaWiki 434sub mediawiki_clean { 435my$string=shift; 436my$page_created=shift; 437# Mediawiki does not allow blank space at the end of a page and ends with a single \n. 438# This function right trims a string and adds a \n at the end to follow this rule 439$string=~s/\s+$//; 440if($stringeq""&&$page_created) { 441# Creating empty pages is forbidden. 442$string= EMPTY_CONTENT; 443} 444return$string."\n"; 445} 446 447# Filter applied on MediaWiki data before adding them to Git 448sub mediawiki_smudge { 449my$string=shift; 450if($stringeq EMPTY_CONTENT) { 451$string=""; 452} 453# This \n is important. This is due to mediawiki's way to handle end of files. 454return$string."\n"; 455} 456 457sub mediawiki_clean_filename { 458my$filename=shift; 459$filename=~s/@{[SLASH_REPLACEMENT]}/\//g; 460# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. 461# Do a variant of URL-encoding, i.e. looks like URL-encoding, 462# but with _ added to prevent MediaWiki from thinking this is 463# an actual special character. 464$filename=~s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; 465# If we use the uri escape before 466# we should unescape here, before anything 467 468return$filename; 469} 470 471sub mediawiki_smudge_filename { 472my$filename=shift; 473$filename=~s/\//@{[SLASH_REPLACEMENT]}/g; 474$filename=~s/ /_/g; 475# Decode forbidden characters encoded in mediawiki_clean_filename 476$filename=~s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; 477return$filename; 478} 479 480sub literal_data { 481my($content) =@_; 482print STDOUT "data ", bytes::length($content),"\n",$content; 483} 484 485sub mw_capabilities { 486# Revisions are imported to the private namespace 487# refs/mediawiki/$remotename/ by the helper and fetched into 488# refs/remotes/$remotename later by fetch. 489print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; 490print STDOUT "import\n"; 491print STDOUT "list\n"; 492print STDOUT "push\n"; 493print STDOUT "\n"; 494} 495 496sub mw_list { 497# MediaWiki do not have branches, we consider one branch arbitrarily 498# called master, and HEAD pointing to it. 499print STDOUT "? refs/heads/master\n"; 500print STDOUT "\@refs/heads/masterHEAD\n"; 501print STDOUT "\n"; 502} 503 504sub mw_option { 505print STDERR "remote-helper command 'option$_[0]' not yet implemented\n"; 506print STDOUT "unsupported\n"; 507} 508 509sub fetch_mw_revisions_for_page { 510my$page=shift; 511my$id=shift; 512my$fetch_from=shift; 513my@page_revs= (); 514my$query= { 515 action =>'query', 516 prop =>'revisions', 517 rvprop =>'ids', 518 rvdir =>'newer', 519 rvstartid =>$fetch_from, 520 rvlimit =>500, 521 pageids =>$id, 522}; 523 524my$revnum=0; 525# Get 500 revisions at a time due to the mediawiki api limit 526while(1) { 527my$result=$mediawiki->api($query); 528 529# Parse each of those 500 revisions 530foreachmy$revision(@{$result->{query}->{pages}->{$id}->{revisions}}) { 531my$page_rev_ids; 532$page_rev_ids->{pageid} =$page->{pageid}; 533$page_rev_ids->{revid} =$revision->{revid}; 534push(@page_revs,$page_rev_ids); 535$revnum++; 536} 537last unless$result->{'query-continue'}; 538$query->{rvstartid} =$result->{'query-continue'}->{revisions}->{rvstartid}; 539} 540if($shallow_import&&@page_revs) { 541print STDERR " Found 1 revision (shallow import).\n"; 542@page_revs=sort{$b->{revid} <=>$a->{revid}} (@page_revs); 543return$page_revs[0]; 544} 545print STDERR " Found ",$revnum," revision(s).\n"; 546return@page_revs; 547} 548 549sub fetch_mw_revisions { 550my$pages=shift;my@pages= @{$pages}; 551my$fetch_from=shift; 552 553my@revisions= (); 554my$n=1; 555foreachmy$page(@pages) { 556my$id=$page->{pageid}; 557 558print STDERR "page$n/",scalar(@pages),": ".$page->{title} ."\n"; 559$n++; 560my@page_revs= fetch_mw_revisions_for_page($page,$id,$fetch_from); 561@revisions= (@page_revs,@revisions); 562} 563 564return($n,@revisions); 565} 566 567sub import_file_revision { 568my$commit=shift; 569my%commit= %{$commit}; 570my$full_import=shift; 571my$n=shift; 572 573my$title=$commit{title}; 574my$comment=$commit{comment}; 575my$content=$commit{content}; 576my$author=$commit{author}; 577my$date=$commit{date}; 578 579print STDOUT "commit refs/mediawiki/$remotename/master\n"; 580print STDOUT "mark :$n\n"; 581print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 582 literal_data($comment); 583 584# If it's not a clone, we need to know where to start from 585if(!$full_import&&$n==1) { 586print STDOUT "from refs/mediawiki/$remotename/master^0\n"; 587} 588if($contentne DELETED_CONTENT) { 589print STDOUT "M 644 inline$title.mw\n"; 590 literal_data($content); 591print STDOUT "\n\n"; 592}else{ 593print STDOUT "D$title.mw\n"; 594} 595 596# mediawiki revision number in the git note 597if($full_import&&$n==1) { 598print STDOUT "reset refs/notes/$remotename/mediawiki\n"; 599} 600print STDOUT "commit refs/notes/$remotename/mediawiki\n"; 601print STDOUT "committer$author<$author\@$wiki_name> ",$date->epoch," +0000\n"; 602 literal_data("Note added by git-mediawiki during import"); 603if(!$full_import&&$n==1) { 604print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; 605} 606print STDOUT "N inline :$n\n"; 607 literal_data("mediawiki_revision: ".$commit{mw_revision}); 608print STDOUT "\n\n"; 609} 610 611# parse a sequence of 612# <cmd> <arg1> 613# <cmd> <arg2> 614# \n 615# (like batch sequence of import and sequence of push statements) 616sub get_more_refs { 617my$cmd=shift; 618my@refs; 619while(1) { 620my$line= <STDIN>; 621if($line=~m/^$cmd (.*)$/) { 622push(@refs,$1); 623}elsif($lineeq"\n") { 624return@refs; 625}else{ 626die("Invalid command in a '$cmd' batch: ".$_); 627} 628} 629} 630 631sub mw_import { 632# multiple import commands can follow each other. 633my@refs= (shift, get_more_refs("import")); 634foreachmy$ref(@refs) { 635 mw_import_ref($ref); 636} 637print STDOUT "done\n"; 638} 639 640sub mw_import_ref { 641my$ref=shift; 642# The remote helper will call "import HEAD" and 643# "import refs/heads/master". 644# Since HEAD is a symbolic ref to master (by convention, 645# followed by the output of the command "list" that we gave), 646# we don't need to do anything in this case. 647if($refeq"HEAD") { 648return; 649} 650 651 mw_connect_maybe(); 652 653my@pages= get_mw_pages(); 654 655print STDERR "Searching revisions...\n"; 656my$last_local= get_last_local_revision(); 657my$fetch_from=$last_local+1; 658if($fetch_from==1) { 659print STDERR ", fetching from beginning.\n"; 660}else{ 661print STDERR ", fetching from here.\n"; 662} 663my($n,@revisions) = fetch_mw_revisions(\@pages,$fetch_from); 664 665# Creation of the fast-import stream 666print STDERR "Fetching & writing export data...\n"; 667 668$n=0; 669my$last_timestamp=0;# Placeholer in case $rev->timestamp is undefined 670 671foreachmy$pagerevid(sort{$a->{revid} <=>$b->{revid}}@revisions) { 672# fetch the content of the pages 673my$query= { 674 action =>'query', 675 prop =>'revisions', 676 rvprop =>'content|timestamp|comment|user|ids', 677 revids =>$pagerevid->{revid}, 678}; 679 680my$result=$mediawiki->api($query); 681 682my$rev=pop(@{$result->{query}->{pages}->{$pagerevid->{pageid}}->{revisions}}); 683 684$n++; 685 686my%commit; 687$commit{author} =$rev->{user} ||'Anonymous'; 688$commit{comment} =$rev->{comment} ||'*Empty MediaWiki Message*'; 689$commit{title} = mediawiki_smudge_filename( 690$result->{query}->{pages}->{$pagerevid->{pageid}}->{title} 691); 692$commit{mw_revision} =$pagerevid->{revid}; 693$commit{content} = mediawiki_smudge($rev->{'*'}); 694 695if(!defined($rev->{timestamp})) { 696$last_timestamp++; 697}else{ 698$last_timestamp=$rev->{timestamp}; 699} 700$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp); 701 702print STDERR "$n/",scalar(@revisions),": Revision #$pagerevid->{revid} of$commit{title}\n"; 703 704 import_file_revision(\%commit, ($fetch_from==1),$n); 705} 706 707if($fetch_from==1&&$n==0) { 708print STDERR "You appear to have cloned an empty MediaWiki.\n"; 709# Something has to be done remote-helper side. If nothing is done, an error is 710# thrown saying that HEAD is refering to unknown object 0000000000000000000 711# and the clone fails. 712} 713} 714 715sub error_non_fast_forward { 716my$advice= run_git("config --bool advice.pushNonFastForward"); 717chomp($advice); 718if($advicene"false") { 719# Native git-push would show this after the summary. 720# We can't ask it to display it cleanly, so print it 721# ourselves before. 722print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; 723print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; 724print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; 725} 726print STDOUT "error$_[0]\"non-fast-forward\"\n"; 727return0; 728} 729 730sub mw_upload_file { 731my$complete_file_name=shift; 732my$new_sha1=shift; 733my$extension=shift; 734my$file_deleted=shift; 735my$summary=shift; 736my$newrevid; 737my$path="File:".$complete_file_name; 738my%hashFiles= get_allowed_file_extensions(); 739if(!exists($hashFiles{$extension})) { 740print STDERR "$complete_file_nameis not a permitted file on this wiki.\n"; 741print STDERR "Check the configuration of file uploads in your mediawiki.\n"; 742return$newrevid; 743} 744# Deleting and uploading a file requires a priviledged user 745if($file_deleted) { 746 mw_connect_maybe(); 747my$query= { 748 action =>'delete', 749 title =>$path, 750 reason =>$summary 751}; 752if(!$mediawiki->edit($query)) { 753print STDERR "Failed to delete file on remote wiki\n"; 754print STDERR "Check your permissions on the remote site. Error code:\n"; 755print STDERR $mediawiki->{error}->{code} .':'.$mediawiki->{error}->{details}; 756exit1; 757} 758}else{ 759# Don't let perl try to interpret file content as UTF-8 => use "raw" 760my$content= run_git("cat-file blob$new_sha1","raw"); 761if($contentne"") { 762 mw_connect_maybe(); 763$mediawiki->{config}->{upload_url} = 764"$url/index.php/Special:Upload"; 765$mediawiki->edit({ 766 action =>'upload', 767 filename =>$complete_file_name, 768 comment =>$summary, 769 file => [undef, 770$complete_file_name, 771 Content =>$content], 772 ignorewarnings =>1, 773}, { 774 skip_encoding =>1 775} ) ||die$mediawiki->{error}->{code} .':' 776.$mediawiki->{error}->{details}; 777my$last_file_page=$mediawiki->get_page({title =>$path}); 778$newrevid=$last_file_page->{revid}; 779print STDERR "Pushed file:$new_sha1-$complete_file_name.\n"; 780}else{ 781print STDERR "Empty file$complete_file_namenot pushed.\n"; 782} 783} 784return$newrevid; 785} 786 787sub mw_push_file { 788my$diff_info=shift; 789# $diff_info contains a string in this format: 790# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status> 791my@diff_info_split=split(/[ \t]/,$diff_info); 792 793# Filename, including .mw extension 794my$complete_file_name=shift; 795# Commit message 796my$summary=shift; 797# MediaWiki revision number. Keep the previous one by default, 798# in case there's no edit to perform. 799my$oldrevid=shift; 800my$newrevid; 801 802my$new_sha1=$diff_info_split[3]; 803my$old_sha1=$diff_info_split[2]; 804my$page_created= ($old_sha1eq NULL_SHA1); 805my$page_deleted= ($new_sha1eq NULL_SHA1); 806$complete_file_name= mediawiki_clean_filename($complete_file_name); 807 808my($title,$extension) =$complete_file_name=~/^(.*)\.([^\.]*)$/; 809if(!defined($extension)) { 810$extension=""; 811} 812if($extensioneq"mw") { 813my$file_content; 814if($page_deleted) { 815# Deleting a page usually requires 816# special priviledges. A common 817# convention is to replace the page 818# with this content instead: 819$file_content= DELETED_CONTENT; 820}else{ 821$file_content= run_git("cat-file blob$new_sha1"); 822} 823 824 mw_connect_maybe(); 825 826my$result=$mediawiki->edit( { 827 action =>'edit', 828 summary =>$summary, 829 title =>$title, 830 basetimestamp =>$basetimestamps{$oldrevid}, 831 text => mediawiki_clean($file_content,$page_created), 832}, { 833 skip_encoding =>1# Helps with names with accentuated characters 834}); 835if(!$result) { 836if($mediawiki->{error}->{code} ==3) { 837# edit conflicts, considered as non-fast-forward 838print STDERR 'Warning: Error '. 839$mediawiki->{error}->{code} . 840' from mediwiki: '.$mediawiki->{error}->{details} . 841".\n"; 842return($oldrevid,"non-fast-forward"); 843}else{ 844# Other errors. Shouldn't happen => just die() 845die'Fatal: Error '. 846$mediawiki->{error}->{code} . 847' from mediwiki: '.$mediawiki->{error}->{details}; 848} 849} 850$newrevid=$result->{edit}->{newrevid}; 851print STDERR "Pushed file:$new_sha1-$title\n"; 852}else{ 853$newrevid= mw_upload_file($complete_file_name,$new_sha1, 854$extension,$page_deleted, 855$summary); 856} 857$newrevid= ($newrevidor$oldrevid); 858return($newrevid,"ok"); 859} 860 861sub mw_push { 862# multiple push statements can follow each other 863my@refsspecs= (shift, get_more_refs("push")); 864my$pushed; 865formy$refspec(@refsspecs) { 866my($force,$local,$remote) =$refspec=~/^(\+)?([^:]*):([^:]*)$/ 867or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>"); 868if($force) { 869print STDERR "Warning: forced push not allowed on a MediaWiki.\n"; 870} 871if($localeq"") { 872print STDERR "Cannot delete remote branch on a MediaWiki\n"; 873print STDOUT "error$remotecannot delete\n"; 874next; 875} 876if($remotene"refs/heads/master") { 877print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n"; 878print STDOUT "error$remoteonly master allowed\n"; 879next; 880} 881if(mw_push_revision($local,$remote)) { 882$pushed=1; 883} 884} 885 886# Notify Git that the push is done 887print STDOUT "\n"; 888 889if($pushed&&$dumb_push) { 890print STDERR "Just pushed some revisions to MediaWiki.\n"; 891print STDERR "The pushed revisions now have to be re-imported, and your current branch\n"; 892print STDERR "needs to be updated with these re-imported commits. You can do this with\n"; 893print STDERR "\n"; 894print STDERR " git pull --rebase\n"; 895print STDERR "\n"; 896} 897} 898 899sub mw_push_revision { 900my$local=shift; 901my$remote=shift;# actually, this has to be "refs/heads/master" at this point. 902my$last_local_revid= get_last_local_revision(); 903print STDERR ".\n";# Finish sentence started by get_last_local_revision() 904my$last_remote_revid= get_last_remote_revision(); 905my$mw_revision=$last_remote_revid; 906 907# Get sha1 of commit pointed by local HEAD 908my$HEAD_sha1= run_git("rev-parse$local2>/dev/null");chomp($HEAD_sha1); 909# Get sha1 of commit pointed by remotes/$remotename/master 910my$remoteorigin_sha1= run_git("rev-parse refs/remotes/$remotename/master2>/dev/null"); 911chomp($remoteorigin_sha1); 912 913if($last_local_revid>0&& 914$last_local_revid<$last_remote_revid) { 915return error_non_fast_forward($remote); 916} 917 918if($HEAD_sha1eq$remoteorigin_sha1) { 919# nothing to push 920return0; 921} 922 923# Get every commit in between HEAD and refs/remotes/origin/master, 924# including HEAD and refs/remotes/origin/master 925my@commit_pairs= (); 926if($last_local_revid>0) { 927my$parsed_sha1=$remoteorigin_sha1; 928# Find a path from last MediaWiki commit to pushed commit 929while($parsed_sha1ne$HEAD_sha1) { 930my@commit_info=grep(/^$parsed_sha1/,split(/\n/, run_git("rev-list --children$local"))); 931if(!@commit_info) { 932return error_non_fast_forward($remote); 933} 934my@commit_info_split=split(/ |\n/,$commit_info[0]); 935# $commit_info_split[1] is the sha1 of the commit to export 936# $commit_info_split[0] is the sha1 of its direct child 937push(@commit_pairs, \@commit_info_split); 938$parsed_sha1=$commit_info_split[1]; 939} 940}else{ 941# No remote mediawiki revision. Export the whole 942# history (linearized with --first-parent) 943print STDERR "Warning: no common ancestor, pushing complete history\n"; 944my$history= run_git("rev-list --first-parent --children$local"); 945my@history=split('\n',$history); 946@history=@history[1..$#history]; 947foreachmy$line(reverse@history) { 948my@commit_info_split=split(/ |\n/,$line); 949push(@commit_pairs, \@commit_info_split); 950} 951} 952 953foreachmy$commit_info_split(@commit_pairs) { 954my$sha1_child= @{$commit_info_split}[0]; 955my$sha1_commit= @{$commit_info_split}[1]; 956my$diff_infos= run_git("diff-tree -r --raw -z$sha1_child$sha1_commit"); 957# TODO: we could detect rename, and encode them with a #redirect on the wiki. 958# TODO: for now, it's just a delete+add 959my@diff_info_list=split(/\0/,$diff_infos); 960# Keep the subject line of the commit message as mediawiki comment for the revision 961my$commit_msg= run_git("log --no-walk --format=\"%s\"$sha1_commit"); 962chomp($commit_msg); 963# Push every blob 964while(@diff_info_list) { 965my$status; 966# git diff-tree -z gives an output like 967# <metadata>\0<filename1>\0 968# <metadata>\0<filename2>\0 969# and we've split on \0. 970my$info=shift(@diff_info_list); 971my$file=shift(@diff_info_list); 972($mw_revision,$status) = mw_push_file($info,$file,$commit_msg,$mw_revision); 973if($statuseq"non-fast-forward") { 974# we may already have sent part of the 975# commit to MediaWiki, but it's too 976# late to cancel it. Stop the push in 977# the middle, but still give an 978# accurate error message. 979return error_non_fast_forward($remote); 980} 981if($statusne"ok") { 982die("Unknown error from mw_push_file()"); 983} 984} 985unless($dumb_push) { 986 run_git("notes --ref=$remotename/mediawikiadd -m\"mediawiki_revision:$mw_revision\"$sha1_commit"); 987 run_git("update-ref -m\"Git-MediaWiki push\"refs/mediawiki/$remotename/master$sha1_commit$sha1_child"); 988} 989} 990 991print STDOUT "ok$remote\n"; 992return1; 993} 994 995sub get_allowed_file_extensions { 996 mw_connect_maybe(); 997 998my$query= { 999 action =>'query',1000 meta =>'siteinfo',1001 siprop =>'fileextensions'1002};1003my$result=$mediawiki->api($query);1004my@file_extensions=map$_->{ext},@{$result->{query}->{fileextensions}};1005my%hashFile=map{$_=>1}@file_extensions;10061007return%hashFile;1008}