contrib / mw-to-git / git-mw.perlon commit Merge branch 'jk/push-scrub-url' into maint (71076e1)
   1#!/usr/bin/perl
   2
   3# Copyright (C) 2013
   4#     Benoit Person <benoit.person@ensimag.imag.fr>
   5#     Celestin Matte <celestin.matte@ensimag.imag.fr>
   6# License: GPL v2 or later
   7
   8# Set of tools for git repo with a mediawiki remote.
   9# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/
  10
  11use strict;
  12use warnings;
  13
  14use Getopt::Long;
  15use URI::URL qw(url);
  16use LWP::UserAgent;
  17use HTML::TreeBuilder;
  18
  19use Git;
  20use MediaWiki::API;
  21use Git::Mediawiki qw(clean_filename connect_maybe
  22                                        EMPTY HTTP_CODE_PAGE_NOT_FOUND);
  23
  24# By default, use UTF-8 to communicate with Git and the user
  25binmode STDERR, ':encoding(UTF-8)';
  26binmode STDOUT, ':encoding(UTF-8)';
  27
  28# Global parameters
  29my $verbose = 0;
  30sub v_print {
  31        if ($verbose) {
  32                return print {*STDERR} @_;
  33        }
  34        return;
  35}
  36
  37# Preview parameters
  38my $file_name = EMPTY;
  39my $remote_name = EMPTY;
  40my $preview_file_name = EMPTY;
  41my $autoload = 0;
  42sub file {
  43        $file_name = shift;
  44        return $file_name;
  45}
  46
  47my %commands = (
  48        'help' =>
  49                [\&help, {}, \&help],
  50        'preview' =>
  51                [\&preview, {
  52                        '<>' => \&file,
  53                        'output|o=s' => \$preview_file_name,
  54                        'remote|r=s' => \$remote_name,
  55                        'autoload|a' => \$autoload
  56                }, \&preview_help]
  57);
  58
  59# Search for sub-command
  60my $cmd = $commands{'help'};
  61for (0..@ARGV-1) {
  62        if (defined $commands{$ARGV[$_]}) {
  63                $cmd = $commands{$ARGV[$_]};
  64                splice @ARGV, $_, 1;
  65                last;
  66        }
  67};
  68GetOptions( %{$cmd->[1]},
  69        'help|h' => \&{$cmd->[2]},
  70        'verbose|v'  => \$verbose);
  71
  72# Launch command
  73&{$cmd->[0]};
  74
  75############################# Preview Functions ################################
  76
  77sub preview_help {
  78        print {*STDOUT} <<'END';
  79USAGE: git mw preview [--remote|-r <remote name>] [--autoload|-a]
  80                      [--output|-o <output filename>] [--verbose|-v]
  81                      <blob> | <filename>
  82
  83DESCRIPTION:
  84Preview is an utiliy to preview local content of a mediawiki repo as if it was
  85pushed on the remote.
  86
  87For that, preview searches for the remote name of the current branch's
  88upstream if --remote is not set. If that remote is not found or if it
  89is not a mediawiki, it lists all mediawiki remotes configured and asks
  90you to replay your command with the --remote option set properly.
  91
  92Then, it searches for a file named 'filename'. If it's not found in
  93the current dir, it will assume it's a blob.
  94
  95The content retrieved in the file (or in the blob) will then be parsed
  96by the remote mediawiki and combined with a template retrieved from
  97the mediawiki.
  98
  99Finally, preview will save the HTML result in a file. and autoload it
 100in your default web browser if the option --autoload is present.
 101
 102OPTIONS:
 103    -r <remote name>, --remote <remote name>
 104        If the remote is a mediawiki, the template and the parse engine
 105        used for the preview will be those of that remote.
 106        If not, a list of valid remotes will be shown.
 107
 108    -a, --autoload
 109        Try to load the HTML output in a new tab (or new window) of your
 110        default web browser.
 111
 112    -o <output filename>, --output <output filename>
 113        Change the HTML output filename. Default filename is based on the
 114        input filename with its extension replaced by '.html'.
 115
 116    -v, --verbose
 117        Show more information on what's going on under the hood.
 118END
 119        exit;
 120}
 121
 122sub preview {
 123        my $wiki;
 124        my ($remote_url, $wiki_page_name);
 125        my ($new_content, $template);
 126        my $file_content;
 127
 128        if ($file_name eq EMPTY) {
 129                die "Missing file argument, see `git mw help`\n";
 130        }
 131
 132        v_print("### Selecting remote\n");
 133        if ($remote_name eq EMPTY) {
 134                $remote_name = find_upstream_remote_name();
 135                if ($remote_name) {
 136                        $remote_url = mediawiki_remote_url_maybe($remote_name);
 137                }
 138
 139                if (! $remote_url) {
 140                        my @valid_remotes = find_mediawiki_remotes();
 141
 142                        if ($#valid_remotes == 0) {
 143                                print {*STDERR} "No mediawiki remote in this repo. \n";
 144                                exit 1;
 145                        } else {
 146                                my $remotes_list = join("\n\t", @valid_remotes);
 147                                print {*STDERR} <<"MESSAGE";
 148There are multiple mediawiki remotes, which of:
 149        ${remotes_list}
 150do you want ? Use the -r option to specify the remote.
 151MESSAGE
 152                        }
 153
 154                        exit 1;
 155                }
 156        } else {
 157                if (!is_valid_remote($remote_name)) {
 158                        die "${remote_name} is not a remote\n";
 159                }
 160
 161                $remote_url = mediawiki_remote_url_maybe($remote_name);
 162                if (! $remote_url) {
 163                        die "${remote_name} is not a mediawiki remote\n";
 164                }
 165        }
 166        v_print("selected remote:\n\tname: ${remote_name}\n\turl: ${remote_url}\n");
 167
 168        $wiki = connect_maybe($wiki, $remote_name, $remote_url);
 169
 170        # Read file content
 171        if (! -e $file_name) {
 172                $file_content = git_cmd_try {
 173                        Git::command('cat-file', 'blob', $file_name); }
 174                        "%s failed w/ code %d";
 175
 176                if ($file_name =~ /(.+):(.+)/) {
 177                        $file_name = $2;
 178                }
 179        } else {
 180                open my $read_fh, "<", $file_name
 181                        or die "could not open ${file_name}: $!\n";
 182                $file_content = do { local $/ = undef; <$read_fh> };
 183                close $read_fh
 184                        or die "unable to close: $!\n";
 185        }
 186
 187        v_print("### Retrieving template\n");
 188        ($wiki_page_name = clean_filename($file_name)) =~ s/\.[^.]+$//;
 189        $template = get_template($remote_url, $wiki_page_name);
 190
 191        v_print("### Parsing local content\n");
 192        $new_content = $wiki->api({
 193                action => 'parse',
 194                text => $file_content,
 195                title => $wiki_page_name
 196        }, {
 197                skip_encoding => 1
 198        }) or die "No response from remote mediawiki\n";
 199        $new_content = $new_content->{'parse'}->{'text'}->{'*'};
 200
 201        v_print("### Merging contents\n");
 202        if ($preview_file_name eq EMPTY) {
 203                ($preview_file_name = $file_name) =~ s/\.[^.]+$/.html/;
 204        }
 205        open(my $save_fh, '>:encoding(UTF-8)', $preview_file_name)
 206                or die "Could not open: $!\n";
 207        print {$save_fh} merge_contents($template, $new_content, $remote_url);
 208        close($save_fh)
 209                or die "Could not close: $!\n";
 210
 211        v_print("### Results\n");
 212        if ($autoload) {
 213                v_print("Launching browser w/ file: ${preview_file_name}");
 214                system('git', 'web--browse', $preview_file_name);
 215        } else {
 216                print {*STDERR} "Preview file saved as: ${preview_file_name}\n";
 217        }
 218
 219        exit;
 220}
 221
 222# uses global scope variable: $remote_name
 223sub merge_contents {
 224        my $template = shift;
 225        my $content = shift;
 226        my $remote_url = shift;
 227        my ($content_tree, $html_tree, $mw_content_text);
 228        my $template_content_id = 'bodyContent';
 229
 230        $html_tree = HTML::TreeBuilder->new;
 231        $html_tree->parse($template);
 232
 233        $content_tree = HTML::TreeBuilder->new;
 234        $content_tree->parse($content);
 235
 236        $template_content_id = Git::config("remote.${remote_name}.mwIDcontent")
 237                || $template_content_id;
 238        v_print("Using '${template_content_id}' as the content ID\n");
 239
 240        $mw_content_text = $html_tree->look_down('id', $template_content_id);
 241        if (!defined $mw_content_text) {
 242                print {*STDERR} <<"CONFIG";
 243Could not combine the new content with the template. You might want to
 244configure `mediawiki.IDContent` in your config:
 245        git config --add remote.${remote_name}.mwIDcontent <id>
 246and re-run the command afterward.
 247CONFIG
 248                exit 1;
 249        }
 250        $mw_content_text->delete_content();
 251        $mw_content_text->push_content($content_tree);
 252
 253        make_links_absolute($html_tree, $remote_url);
 254
 255        return $html_tree->as_HTML;
 256}
 257
 258sub make_links_absolute {
 259        my $html_tree = shift;
 260        my $remote_url = shift;
 261        for (@{ $html_tree->extract_links() }) {
 262                my ($link, $element, $attr) = @{ $_ };
 263                my $url = url($link)->canonical;
 264                if ($url !~ /#/) {
 265                        $element->attr($attr, URI->new_abs($url, $remote_url));
 266                }
 267        }
 268        return $html_tree;
 269}
 270
 271sub is_valid_remote {
 272        my $remote = shift;
 273        my @remotes = git_cmd_try {
 274                Git::command('remote') }
 275                "%s failed w/ code %d";
 276        my $found_remote = 0;
 277        foreach my $remote (@remotes) {
 278                if ($remote eq $remote) {
 279                        $found_remote = 1;
 280                        last;
 281                }
 282        }
 283        return $found_remote;
 284}
 285
 286sub find_mediawiki_remotes {
 287        my @remotes = git_cmd_try {
 288                Git::command('remote'); }
 289                "%s failed w/ code %d";
 290        my $remote_url;
 291        my @valid_remotes = ();
 292        foreach my $remote (@remotes) {
 293                $remote_url = mediawiki_remote_url_maybe($remote);
 294                if ($remote_url) {
 295                        push(@valid_remotes, $remote);
 296                }
 297        }
 298        return @valid_remotes;
 299}
 300
 301sub find_upstream_remote_name {
 302        my $current_branch = git_cmd_try {
 303                Git::command_oneline('symbolic-ref', '--short', 'HEAD') }
 304                "%s failed w/ code %d";
 305        return Git::config("branch.${current_branch}.remote");
 306}
 307
 308sub mediawiki_remote_url_maybe {
 309        my $remote = shift;
 310
 311        # Find remote url
 312        my $remote_url = Git::config("remote.${remote}.url");
 313        if ($remote_url =~ s/mediawiki::(.*)/$1/) {
 314                return url($remote_url)->canonical;
 315        }
 316
 317        return;
 318}
 319
 320sub get_template {
 321        my $url = shift;
 322        my $page_name = shift;
 323        my ($req, $res, $code, $url_after);
 324
 325        $req = LWP::UserAgent->new;
 326        if ($verbose) {
 327                $req->show_progress(1);
 328        }
 329
 330        $res = $req->get("${url}/index.php?title=${page_name}");
 331        if (!$res->is_success) {
 332                $code = $res->code;
 333                $url_after = $res->request()->uri(); # resolve all redirections
 334                if ($code == HTTP_CODE_PAGE_NOT_FOUND) {
 335                        if ($verbose) {
 336                                print {*STDERR} <<"WARNING";
 337Warning: Failed to retrieve '$page_name'. Create it on the mediawiki if you want
 338all the links to work properly.
 339Trying to use the mediawiki homepage as a fallback template ...
 340WARNING
 341                        }
 342
 343                        # LWP automatically redirects GET request
 344                        $res = $req->get("${url}/index.php");
 345                        if (!$res->is_success) {
 346                                $url_after = $res->request()->uri(); # resolve all redirections
 347                                die "Failed to get homepage @ ${url_after} w/ code ${code}\n";
 348                        }
 349                } else {
 350                        die "Failed to get '${page_name}' @ ${url_after} w/ code ${code}\n";
 351                }
 352        }
 353
 354        return $res->decoded_content;
 355}
 356
 357############################## Help Functions ##################################
 358
 359sub help {
 360        print {*STDOUT} <<'END';
 361usage: git mw <command> <args>
 362
 363git mw commands are:
 364    help        Display help information about git mw
 365    preview     Parse and render local file into HTML
 366END
 367        exit;
 368}