From: Junio C Hamano Date: Fri, 13 Jul 2012 22:37:46 +0000 (-0700) Subject: Merge branch 'mm/mediawiki-tests' X-Git-Tag: v1.7.12-rc0~40 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/6a9aa0c9b224517db0549d9252fdbc5177e6c0e2?ds=inline;hp=-c Merge branch 'mm/mediawiki-tests' * mm/mediawiki-tests: git-remote-mediawiki: be more defensive when requests fail git-remote-mediawiki: more efficient 'pull' in the best case git-remote-mediawiki: extract revision-importing loop to a function git-remote-mediawiki: refactor loop over revision ids git-remote-mediawiki: change return type of get_mw_pages git-remote-mediawiki (t9363): test 'File:' import and export git-remote-mediawiki: support for uploading file in test environment git-remote-mediawiki (t9362): test git-remote-mediawiki with UTF8 characters git-remote-mediawiki (t9361): test git-remote-mediawiki pull and push git-remote-mediawiki (t9360): test git-remote-mediawiki clone git-remote-mediawiki: test environment of git-remote-mediawiki git-remote-mediawiki: scripts to install, delete and clear a MediaWiki --- 6a9aa0c9b224517db0549d9252fdbc5177e6c0e2 diff --combined contrib/mw-to-git/git-remote-mediawiki index 6b128e88e8,ff9384e91b..accd70a94c --- a/contrib/mw-to-git/git-remote-mediawiki +++ b/contrib/mw-to-git/git-remote-mediawiki @@@ -13,16 -13,16 +13,13 @@@ # # Known limitations: # - # - Poor performance in the best case: it takes forever to check - # whether we're up-to-date (on fetch or push) or to fetch a few - # revisions from a large wiki, because we use exclusively a - # page-based synchronization. We could switch to a wiki-wide - # synchronization when the synchronization involves few revisions - # but the wiki is large. + # - Several strategies are provided to fetch modifications from the + # wiki, but no automatic heuristics is provided, the user has + # to understand and chose which strategy is appropriate for him. # # - Git renames could be turned into MediaWiki renames (see TODO # below) # -# - login/password support requires the user to write the password -# cleartext in a file (see TODO below). -# # - No way to import "one page, and all pages included in it" # # - Multiple remote MediaWikis have not been very well tested. @@@ -72,7 -72,9 +69,7 @@@ chomp($import_media) $import_media = ($import_media eq "true"); my $wiki_login = run_git("config --get remote.". $remotename .".mwLogin"); -# TODO: ideally, this should be able to read from keyboard, but we're -# inside a remote helper, so our stdin is connect to git, not to a -# terminal. +# Note: mwPassword is discourraged. Use the credential system instead. my $wiki_passwd = run_git("config --get remote.". $remotename .".mwPassword"); my $wiki_domain = run_git("config --get remote.". $remotename .".mwDomain"); chomp($wiki_login); @@@ -84,6 -86,21 +81,21 @@@ my $shallow_import = run_git("config -- chomp($shallow_import); $shallow_import = ($shallow_import eq "true"); + # Fetch (clone and pull) by revisions instead of by pages. This behavior + # is more efficient when we have a wiki with lots of pages and we fetch + # the revisions quite often so that they concern only few pages. + # Possible values: + # - by_rev: perform one query per new revision on the remote wiki + # - by_page: query each tracked page for new revision + my $fetch_strategy = run_git("config --get remote.$remotename.fetchStrategy"); + unless ($fetch_strategy) { + $fetch_strategy = run_git("config --get mediawiki.fetchStrategy"); + } + chomp($fetch_strategy); + unless ($fetch_strategy) { + $fetch_strategy = "by_page"; + } + # Dumb push: don't update notes and mediawiki ref to reflect the last push. # # Configurable with mediawiki.dumbPush, or per-remote with @@@ -374,7 -391,7 +386,7 @@@ sub get_mw_pages get_all_mediafiles(\%pages); } } - return values(%pages); + return %pages; } # usage: $out = run_git("command args"); @@@ -528,10 -545,31 +540,31 @@@ sub get_last_local_revision # Remember the timestamp corresponding to a revision id. my %basetimestamps; + # Get the last remote revision without taking in account which pages are + # tracked or not. This function makes a single request to the wiki thus + # avoid a loop onto all tracked pages. This is useful for the fetch-by-rev + # option. + sub get_last_global_remote_rev { + mw_connect_maybe(); + + my $query = { + action => 'query', + list => 'recentchanges', + prop => 'revisions', + rclimit => '1', + rcdir => 'older', + }; + my $result = $mediawiki->api($query); + return $result->{query}->{recentchanges}[0]->{revid}; + } + + # Get the last remote revision concerning the tracked pages and the tracked + # categories. sub get_last_remote_revision { mw_connect_maybe(); - my @pages = get_mw_pages(); + my %pages_hash = get_mw_pages(); + my @pages = values(%pages_hash); my $max_rev_num = 0; @@@ -797,8 -835,6 +830,6 @@@ sub mw_import_ref mw_connect_maybe(); - my @pages = get_mw_pages(); - print STDERR "Searching revisions...\n"; my $last_local = get_last_local_revision(); my $fetch_from = $last_local + 1; @@@ -807,35 -843,106 +838,106 @@@ } else { print STDERR ", fetching from here.\n"; } + + my $n = 0; + if ($fetch_strategy eq "by_rev") { + print STDERR "Fetching & writing export data by revs...\n"; + $n = mw_import_ref_by_revs($fetch_from); + } elsif ($fetch_strategy eq "by_page") { + print STDERR "Fetching & writing export data by pages...\n"; + $n = mw_import_ref_by_pages($fetch_from); + } else { + print STDERR "fatal: invalid fetch strategy \"$fetch_strategy\".\n"; + print STDERR "Check your configuration variables remote.$remotename.fetchStrategy and mediawiki.fetchStrategy\n"; + exit 1; + } + + if ($fetch_from == 1 && $n == 0) { + print STDERR "You appear to have cloned an empty MediaWiki.\n"; + # Something has to be done remote-helper side. If nothing is done, an error is + # thrown saying that HEAD is refering to unknown object 0000000000000000000 + # and the clone fails. + } + } + + sub mw_import_ref_by_pages { + + my $fetch_from = shift; + my %pages_hash = get_mw_pages(); + my @pages = values(%pages_hash); + my ($n, @revisions) = fetch_mw_revisions(\@pages, $fetch_from); - # Creation of the fast-import stream - print STDERR "Fetching & writing export data...\n"; + @revisions = sort {$a->{revid} <=> $b->{revid}} @revisions; + my @revision_ids = map $_->{revid}, @revisions; + + return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); + } + + sub mw_import_ref_by_revs { + + my $fetch_from = shift; + my %pages_hash = get_mw_pages(); - $n = 0; + my $last_remote = get_last_global_remote_rev(); + my @revision_ids = $fetch_from..$last_remote; + return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); + } + + # Import revisions given in second argument (array of integers). + # Only pages appearing in the third argument (hash indexed by page titles) + # will be imported. + sub mw_import_revids { + my $fetch_from = shift; + my $revision_ids = shift; + my $pages = shift; + + my $n = 0; + my $n_actual = 0; my $last_timestamp = 0; # Placeholer in case $rev->timestamp is undefined - foreach my $pagerevid (sort {$a->{revid} <=> $b->{revid}} @revisions) { + foreach my $pagerevid (@$revision_ids) { # fetch the content of the pages my $query = { action => 'query', prop => 'revisions', rvprop => 'content|timestamp|comment|user|ids', - revids => $pagerevid->{revid}, + revids => $pagerevid, }; my $result = $mediawiki->api($query); - my $rev = pop(@{$result->{query}->{pages}->{$pagerevid->{pageid}}->{revisions}}); + if (!$result) { + die "Failed to retrieve modified page for revision $pagerevid"; + } + + if (!defined($result->{query}->{pages})) { + die "Invalid revision $pagerevid."; + } + + my @result_pages = values(%{$result->{query}->{pages}}); + my $result_page = $result_pages[0]; + my $rev = $result_pages[0]->{revisions}->[0]; + # Count page even if we skip it, since we display + # $n/$total and $total includes skipped pages. $n++; - my $page_title = $result->{query}->{pages}->{$pagerevid->{pageid}}->{title}; + my $page_title = $result_page->{title}; + + if (!exists($pages->{$page_title})) { + print STDERR "$n/", scalar(@$revision_ids), + ": Skipping revision #$rev->{revid} of $page_title\n"; + next; + } + + $n_actual++; + my %commit; $commit{author} = $rev->{user} || 'Anonymous'; $commit{comment} = $rev->{comment} || '*Empty MediaWiki Message*'; $commit{title} = mediawiki_smudge_filename($page_title); - $commit{mw_revision} = $pagerevid->{revid}; + $commit{mw_revision} = $rev->{revid}; $commit{content} = mediawiki_smudge($rev->{'*'}); if (!defined($rev->{timestamp})) { @@@ -854,16 -961,11 +956,11 @@@ # If this is a revision of the media page for new version # of a file do one common commit for both file and media page. # Else do commit only for that page. - print STDERR "$n/", scalar(@revisions), ": Revision #$pagerevid->{revid} of $commit{title}\n"; - import_file_revision(\%commit, ($fetch_from == 1), $n, \%mediafile); + print STDERR "$n/", scalar(@$revision_ids), ": Revision #$rev->{revid} of $commit{title}\n"; + import_file_revision(\%commit, ($fetch_from == 1), $n_actual, \%mediafile); } - if ($fetch_from == 1 && $n == 0) { - print STDERR "You appear to have cloned an empty MediaWiki.\n"; - # Something has to be done remote-helper side. If nothing is done, an error is - # thrown saying that HEAD is refering to unknown object 0000000000000000000 - # and the clone fails. - } + return $n_actual; } sub error_non_fast_forward {