use strict;
use MediaWiki::API;
use Git;
+use Git::Mediawiki qw(clean_filename smudge_filename connect_maybe
+ EMPTY HTTP_CODE_OK);
use DateTime::Format::ISO8601;
use warnings;
use URI::Escape;
-# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced
-use constant SLASH_REPLACEMENT => '%2F';
-
# It's not always possible to delete pages (may require some
# privileges). Deleted pages are replaced with this content.
use constant DELETED_CONTENT => "[[Category:Deleted]]\n";
# Used on Git's side to reflect empty edit messages on the wiki
use constant EMPTY_MESSAGE => '*Empty MediaWiki Message*';
+# Number of pages taken into account at once in submodule get_mw_page_list
+use constant SLICE_SIZE => 50;
+
+# Number of linked mediafile to get at once in get_linked_mediafiles
+# The query is split in small batches because of the MW API limit of
+# the number of links to be returned (500 links max).
+use constant BATCH_SIZE => 10;
+
+if (@ARGV != 2) {
+ exit_error_usage();
+}
+
my $remotename = $ARGV[0];
my $url = $ARGV[1];
########################## Functions ##############################
+## error handling
+sub exit_error_usage {
+ die "ERROR: git-remote-mediawiki module was not called with a correct number of\n" .
+ "parameters\n" .
+ "You may obtain this error because you attempted to run the git-remote-mediawiki\n" .
+ "module directly.\n" .
+ "This module can be used the following way:\n" .
+ "\tgit clone mediawiki://<address of a mediawiki>\n" .
+ "Then, use git commit, push and pull as with every normal git repository.\n";
+}
+
sub parse_command {
my ($line) = @_;
my @cmd = split(/ /, $line);
die("Too many arguments for list\n") if (defined($cmd[2]));
mw_list($cmd[1]);
} elsif ($cmd[0] eq 'import') {
- die("Invalid arguments for import\n")
- if ($cmd[1] eq "" || defined($cmd[2]));
+ die("Invalid argument for import\n")
+ if ($cmd[1] eq EMPTY);
+ die("Too many arguments for import\n")
+ if (defined($cmd[2]));
mw_import($cmd[1]);
} elsif ($cmd[0] eq 'option') {
+ die("Invalid arguments for option\n")
+ if ($cmd[1] eq EMPTY || $cmd[2] eq EMPTY);
die("Too many arguments for option\n")
- if ($cmd[1] eq "" || $cmd[2] eq "" || defined($cmd[3]));
+ if (defined($cmd[3]));
mw_option($cmd[1],$cmd[2]);
} elsif ($cmd[0] eq 'push') {
mw_push($cmd[1]);
# MediaWiki API instance, created lazily.
my $mediawiki;
-sub mw_connect_maybe {
- if ($mediawiki) {
- return;
- }
- $mediawiki = MediaWiki::API->new;
- $mediawiki->{config}->{api_url} = "${url}/api.php";
- if ($wiki_login) {
- my %credential = (
- 'url' => $url,
- 'username' => $wiki_login,
- 'password' => $wiki_passwd
- );
- Git::credential(\%credential);
- my $request = {lgname => $credential{username},
- lgpassword => $credential{password},
- lgdomain => $wiki_domain};
- if ($mediawiki->login($request)) {
- Git::credential(\%credential, 'approve');
- print {*STDERR} qq(Logged in mediawiki user "$credential{username}".\n);
- } else {
- print {*STDERR} qq(Failed to log in mediawiki user "$credential{username}" on ${url}\n);
- print {*STDERR} ' (error ' .
- $mediawiki->{error}->{code} . ': ' .
- $mediawiki->{error}->{details} . ")\n";
- Git::credential(\%credential, 'reject');
- exit 1;
- }
- }
- return;
-}
-
sub fatal_mw_error {
my $action = shift;
print STDERR "fatal: could not $action.\n";
sub get_mw_page_list {
my $page_list = shift;
my $pages = shift;
- my @some_pages = @$page_list;
+ my @some_pages = @{$page_list};
while (@some_pages) {
- my $last_page = 50;
+ my $last_page = SLICE_SIZE;
if ($#some_pages < $last_page) {
$last_page = $#some_pages;
}
my @slice = @some_pages[0..$last_page];
get_mw_first_pages(\@slice, $pages);
- @some_pages = @some_pages[51..$#some_pages];
+ @some_pages = @some_pages[(SLICE_SIZE + 1)..$#some_pages];
}
return;
}
# Get the list of pages to be fetched according to configuration.
sub get_mw_pages {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
print {*STDERR} "Listing pages on remote wiki...\n";
my $pages = shift;
my @titles = map { $_->{title} } values(%{$pages});
- # The query is split in small batches because of the MW API limit of
- # the number of links to be returned (500 links max).
- my $batch = 10;
+ my $batch = BATCH_SIZE;
while (@titles) {
if ($#titles < $batch) {
$batch = $#titles;
my $download_url = shift;
my $response = $mediawiki->{ua}->get($download_url);
- if ($response->code == 200) {
+ if ($response->code == HTTP_CODE_OK) {
return $response->decoded_content;
} else {
print {*STDERR} "Error downloading mediafile from :\n";
# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev
# option.
sub get_last_global_remote_rev {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $query = {
action => 'query',
# Get the last remote revision concerning the tracked pages and the tracked
# categories.
sub get_last_remote_revision {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my %pages_hash = get_mw_pages();
my @pages = values(%pages_hash);
# Mediawiki does not allow blank space at the end of a page and ends with a single \n.
# This function right trims a string and adds a \n at the end to follow this rule
$string =~ s/\s+$//;
- if ($string eq "" && $page_created) {
+ if ($string eq EMPTY && $page_created) {
# Creating empty pages is forbidden.
$string = EMPTY_CONTENT;
}
sub mediawiki_smudge {
my $string = shift;
if ($string eq EMPTY_CONTENT) {
- $string = "";
+ $string = EMPTY;
}
# This \n is important. This is due to mediawiki's way to handle end of files.
return "${string}\n";
}
-sub mediawiki_clean_filename {
- my $filename = shift;
- $filename =~ s{@{[SLASH_REPLACEMENT]}}{/}g;
- # [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded.
- # Do a variant of URL-encoding, i.e. looks like URL-encoding,
- # but with _ added to prevent MediaWiki from thinking this is
- # an actual special character.
- $filename =~ s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge;
- # If we use the uri escape before
- # we should unescape here, before anything
-
- return $filename;
-}
-
-sub mediawiki_smudge_filename {
- my $filename = shift;
- $filename =~ s{/}{@{[SLASH_REPLACEMENT]}}g;
- $filename =~ s/ /_/g;
- # Decode forbidden characters encoded in mediawiki_clean_filename
- $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf('%c', hex($1))/ge;
- return $filename;
-}
-
sub literal_data {
my ($content) = @_;
print {*STDOUT} 'data ', bytes::length($content), "\n", $content;
my ($content) = @_;
# Avoid confusion between size in bytes and in characters
utf8::downgrade($content);
- binmode {*STDOUT}, ':raw';
+ binmode STDOUT, ':raw';
print {*STDOUT} 'data ', bytes::length($content), "\n", $content;
- binmode {*STDOUT}, ':encoding(UTF-8)';
+ binmode STDOUT, ':encoding(UTF-8)';
return;
}
return;
}
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
print {*STDERR} "Searching revisions...\n";
my $last_local = get_last_local_revision();
my $n_actual = 0;
my $last_timestamp = 0; # Placeholer in case $rev->timestamp is undefined
- foreach my $pagerevid (@$revision_ids) {
+ foreach my $pagerevid (@{$revision_ids}) {
# Count page even if we skip it, since we display
# $n/$total and $total includes skipped pages.
$n++;
my $page_title = $result_page->{title};
if (!exists($pages->{$page_title})) {
- print {*STDERR} "${n}/", scalar(@$revision_ids),
+ print {*STDERR} "${n}/", scalar(@{$revision_ids}),
": Skipping revision #$rev->{revid} of ${page_title}\n";
next;
}
my %commit;
$commit{author} = $rev->{user} || 'Anonymous';
$commit{comment} = $rev->{comment} || EMPTY_MESSAGE;
- $commit{title} = mediawiki_smudge_filename($page_title);
+ $commit{title} = smudge_filename($page_title);
$commit{mw_revision} = $rev->{revid};
$commit{content} = mediawiki_smudge($rev->{'*'});
# If this is a revision of the media page for new version
# of a file do one common commit for both file and media page.
# Else do commit only for that page.
- print {*STDERR} "${n}/", scalar(@$revision_ids), ": Revision #$rev->{revid} of $commit{title}\n";
+ print {*STDERR} "${n}/", scalar(@{$revision_ids}), ": Revision #$rev->{revid} of $commit{title}\n";
import_file_revision(\%commit, ($fetch_from == 1), $n_actual, \%mediafile);
}
}
# Deleting and uploading a file requires a priviledged user
if ($file_deleted) {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $query = {
action => 'delete',
title => $path,
} else {
# Don't let perl try to interpret file content as UTF-8 => use "raw"
my $content = run_git("cat-file blob ${new_sha1}", 'raw');
- if ($content ne "") {
- mw_connect_maybe();
+ if ($content ne EMPTY) {
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
$mediawiki->{config}->{upload_url} =
"${url}/index.php/Special:Upload";
$mediawiki->edit({
my $newrevid;
if ($summary eq EMPTY_MESSAGE) {
- $summary = '';
+ $summary = EMPTY;
}
my $new_sha1 = $diff_info_split[3];
my $old_sha1 = $diff_info_split[2];
my $page_created = ($old_sha1 eq NULL_SHA1);
my $page_deleted = ($new_sha1 eq NULL_SHA1);
- $complete_file_name = mediawiki_clean_filename($complete_file_name);
+ $complete_file_name = clean_filename($complete_file_name);
my ($title, $extension) = $complete_file_name =~ /^(.*)\.([^\.]*)$/;
if (!defined($extension)) {
- $extension = "";
+ $extension = EMPTY;
}
if ($extension eq 'mw') {
my $ns = get_mw_namespace_id_for_page($complete_file_name);
$file_content = run_git("cat-file blob ${new_sha1}");
}
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $result = $mediawiki->edit( {
action => 'edit',
# edit conflicts, considered as non-fast-forward
print {*STDERR} 'Warning: Error ' .
$mediawiki->{error}->{code} .
- ' from mediwiki: ' . $mediawiki->{error}->{details} .
+ ' from mediawiki: ' . $mediawiki->{error}->{details} .
".\n";
return ($oldrevid, 'non-fast-forward');
} else {
# Other errors. Shouldn't happen => just die()
die 'Fatal: Error ' .
$mediawiki->{error}->{code} .
- ' from mediwiki: ' . $mediawiki->{error}->{details} . "\n";
+ ' from mediawiki: ' . $mediawiki->{error}->{details} . "\n";
}
}
$newrevid = $result->{edit}->{newrevid};
if ($force) {
print {*STDERR} "Warning: forced push not allowed on a MediaWiki.\n";
}
- if ($local eq "") {
+ if ($local eq EMPTY) {
print {*STDERR} "Cannot delete remote branch on a MediaWiki\n";
print {*STDOUT} "error ${remote} cannot delete\n";
next;
}
sub get_allowed_file_extensions {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $query = {
action => 'query',
# Return MediaWiki id for a canonical namespace name.
# Ex.: "File", "Project".
sub get_mw_namespace_id {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $name = shift;
if (!exists $namespace_id{$name}) {