Uzanto:SistMesEspigRoboto/fontokodo

El Vikipedio, la libera enciklopedio

Fontokodo de la roboto[redakti | redakti fonton]

Atentigo: aperas ĉi-sube "</pre__FORIGU__>". Devus esti </pre> sed ne facilas montri tion en la fontokodo, kiu mem estas inter <pre> kaj </pre>. Do vi devos ŝanĝi ĝin.

Por mem uzi la programon, vi devus ŝanĝi ankaŭ la uzantonomon (serĉu "userid"), la pasvorton (serĉu "password"). Ŝanĝu ankaŭ proxy (mi forgesis la esperantan vorton por tio...) - ĝi estu en formato: http://host:port/ - aŭ simple forigu la linion sube pri ĝi ("$ua->proxy('http',$proxy)") laŭ bezono.

#!/usr/bin/perl

use strict;
use LWP;
use LWP::UserAgent;
use HTML::Form;
use Encode;

my $proxy = '<DETALOJ KAŜITAJ>';
my $url_stem='http://eo.wikipedia.org/w/index.php?title=';
my $login_form_url=$url_stem.'Speciala:Userlogin';

# page name is for edit summary; URL may contain UTF8 instead of "x"s
my $input_page_name='Vikipedio:Sxangxendaj sistemaj mesagxoj';
my $input_page_url=$url_stem.'Vikipedio:%C5%9Can%C4%9Dendaj_sistemaj_mesa%C4%9Doj';

# report page
my $report_page_name='Vikipediisto:SistMesEspigRoboto/raporto';
my $report_page_url=$url_stem.'Vikipediisto:SistMesEspigRoboto/raporto';

my $userid='SistMesEspigRoboto';
my $password='<DETALOJ KAŜITAJ>';

my ($response, $form, $content);

#-----------------------------------------------------------
# initialise
my $ua = LWP::UserAgent->new() or return (undef, "no lwp");
$ua->cookie_jar({});
$ua->proxy('http',$proxy);

#-----------------------------------------------------------
# log in

print "Logging in\n";
$response = $ua->get($login_form_url);
$form = find_form ($response, 'name', 'userlogin');

$form->param('wpName',$userid);
$form->param('wpPassword',$password);

$response = $ua->request($form->click());

die "login failed" unless $response->content =~ /var wgUserName = "$userid"/;

#-----------------------------------------------------------
# read input page:

print "Reading input page\n";
$response = $ua->get($input_page_url.'&action=edit');

# and the wikisource from the edit box
$form = find_form ($response, 'name', 'editform' );
$content=$form->param('wpTextbox1');

# and parse it
my ($page_titles_ref, $page_contents_ref, $newcontent)
    = parse_input_page($content);
my @page_titles=@$page_titles_ref;
my @page_contents=@$page_contents_ref;

# get the revision ID
$response->content =~ /var wgCurRevisionId = "([0-9]+)";/ 
    or die "can't get revision ID";
my $id=$1;

# also get the "raw" content (for that revision ID) and parse it
# REASON: this will give us a version with the "x"s etc substituted
$response = $ua->get($input_page_url."&oldid=$id&action=raw");
$content = $response->content;
my ($raw_page_titles_ref) = parse_input_page($content);
my @raw_page_titles = @$raw_page_titles_ref;

# sanity check - there should be the same number of these
my $num_edits = scalar @page_titles;
die "mismatch between raw and edit-box page titles" 
    unless $num_edits == scalar @raw_page_titles;

#-----------------------------------------------------------

print "There are $num_edits edits to do.\n";

exit if ($num_edits == 0);

# write the edited input page
print "Writing edited input page\n";

$form->param('wpTextbox1',$newcontent);
$form->param('wpSummary',"Roboto akceptis $num_edits petitajn redaktojn");

# uncomment if edit should be marked as minor - see also below
#$form->param('wpMinoredit',1);

utf8_encode($form);
$response = $ua->request($form->click());

#-----------------------------------------------------------

# now write each of the edited pages...

for my $seq (0 .. $#page_titles) {

    my $raw_page_title = $raw_page_titles[$seq];

    print "Editing $raw_page_title:\n";

    print "   getting edit form\n";
    $response = $ua->get($url_stem.$raw_page_title.'&action=edit');
    
    $form = find_form ($response, 'name', 'editform' );
    $form->param('wpTextbox1',$page_contents[$seq]);
    $form->param('wpSummary',"Roboto kopias enhavon el [[$input_page_name]]");
    
    #$form->param('wpMinoredit',1);

    print "   saving\n";
    utf8_encode($form);
    $response = $ua->request($form->click());

    #sleep(10);
}


# and now append to the report page...
print "Writing report:\n";
my @date = gmtime(time);
my $date_string = sprintf("%04d-%02d-%02d %02d:%02d:%02d",
                          $date[5]+1900,$date[4]+1,$date[3],$date[2],$date[1],$date[0]);

print "   getting edit form\n";
$response = $ua->get($report_page_url.'&action=edit');
$form = find_form ($response, 'name', 'editform' );

my $report_page_contents = $form->param('wpTextbox1');

$report_page_contents .= <<EOF;
=== Redaktoj je $date_string ===

La roboto redaktis la jena(j)n pagxo(j)n je proksimume $date_string.

<table border="1" cellpadding="5" cellspacing="0">
<tr><th>Pagxo</th><th>Vikiteksto</th><th>Aperas kiel</th></tr>
EOF
for my $seq (0 .. $#page_titles) {
    my $page_title = $page_titles[$seq];
    $report_page_contents .= <<EOF;
<tr>
<td>
[[$page title]]
</td>
<td>
<nowiki><pre></nowiki>
$page_contents[$seq]
<nowiki></pre__FORIGU__></nowiki>
</td>
<td>
$page_contents[$seq]
</td>
</tr>
EOF
}
$report_page_contents .= <<EOF;
</table>
EOF

$form->param('wpTextbox1',$report_page_contents);
$form->param('wpSummary',"Roboto aldonas raporton je $date_string");
#$form->param('wpMinoredit',1);

print "   saving\n";
utf8_encode($form);
$response = $ua->request($form->click());

print "Done\n";

#================================================
# subroutines

sub utf8_encode
{
    # utf8 encode all parameters of a form
    my $form = shift;
    my @names = $form->param;
    for my $name (@names) {
        my $val = $form->param($name);
        my $newval = encode("utf8",$val);
        $form->param($name,$newval);
    }
    return $form;
}

sub find_form
{
    my ($uaobj, $name, $val) = @_;
    my @forms = HTML::Form->parse( $uaobj );
    my @matching_forms = grep {$_->attr($name) eq $val} @forms;

    die "There is not exactly one form whose attribute $name is $val"
        if scalar @matching_forms != 1;

    return $matching_forms[0];
}

sub parse_input_page
{
    my $content = shift;

    my @lines = split("\n",$content);
    my @page_titles = ();
    my @page_contents = ();
    my $newcontent; # for input page
    my $page_title = undef;
    my $page_content;

    # parse the page into required pages
    for my $line (@lines) {    
        if (defined ($page_title)) {    
            if ($line =~ /^:\@%fino\s*$/) {
                # end of required page
                push @page_titles, $page_title;
                push @page_contents, $page_content;
                $page_title = undef;
            } else {
                # in required page
                $page_content .= "$line\n";
            }
        } elsif ($line =~ (/^:\@%\[\[(.*)\]\]\s*$/)) {
            # start of required page
            $page_title = $1;
            $page_content = '';
        } else {
            # outside required page
            $newcontent .= "$line\n";
        }
    }
    if (defined $page_title) {
        # unterminated output page content at end of processing gets 
        # shoved back onto end of input page
        $newcontent .= $page_content;
    }
    
    return (\@page_titles, \@page_contents,$newcontent);
}