use MediaWiki::Bot;
use Encode;
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;#$
# Gently pruned from the standard exclusion code to hardcode $user and $opt
sub allowBots {
my($text) = @_;
my $user = "Joe's Null Bot";
return 0 if $text =~ /{{[nN]obots}}/;
return 1 if $text =~ /{{[bB]ots}}/;
if($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){
return 1 if $1 eq 'all';
return 0 if $1 eq 'none';
my @bots = split(/\s*,\s*/, $1);
return (grep $_ eq $user, @bots)?1:0;
}
if($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){
return 0 if $1 eq 'all';
return 1 if $1 eq 'none';
my @bots = split(/\s*,\s*/, $1);
return (grep $_ eq $user, @bots)?0:1;
}
return 1;
}
# Have the bot check in to see if it's run past it's "expiration date", typically of 86400 seconds
# (that is, one day). Mostly here to avoid ten copies of the bot running if nothing can run for
# ten days.
$epoch = time();
$listcount =0;
$purgecount = 0;
sub check_expirations() {
my $secs = time() - $epoch;
if ($secs > 86400) {
die "Bot expired of old age.\n";
}
if ($purgecount > 1500) {
die "This category is looking disturbingly large. Quitting.\n";
}
}
# Within a single MediaWiki call, we ask the API to make up to 5 attempts, 10 s apart, until
# the worst-case server lag is better than 5s.
my $mw = MediaWiki::Bot->new({
assert => 'bot',
host => 'en.wikipedia.org',
protocol => 'https',
operator => "Joe's Null Bot",
#debug => "2",
}) or die "new mwbot fail";
$mw->{config}->{api_url} = 'https://en.wikipedia.org/w/api.php';
# Delay/retry parameters
$mw->{config}->{max_lag} = 5; # Tell MediaWiki to put us off it there's a 5s+ db lag out there
$mw->{config}->{max_lag_delay} = 10; # ..and to wait 10s between retries
$mw->{config}->{max_lag_retries} = 4; # ..and to only make 4 retries before dropping back to our code
# Our own delay parameters
$standardelay = 2; # Wait 15s or more between purge calls....
$longdelay = 50; # ...if the API puts us off several times in a row, take a 15-minute break
my $articles = null;
# login
while (1) {
if ($mw->login( { username => "Joe's Null Bot", password => 'REDACTED' } )) {
last;
}
check_expirations();
if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
sleep $longdelay;
} elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
print "bad gateway\n";
sleep $longdelay;
} else {
die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
}
# Get list of articles
while (1) {
check_expirations();
$articles = $mw->list ( {
action => 'query',
list => 'categorymembers',
cmtitle => 'Category:Pending AfC submissions',
cmlimit => 'max'} );
if ($articles) { last; }
if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
sleep $longdelay;
} elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
print "bad gateway\n";
sleep $longdelay;
} else {
die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
}
# scan through the articles...
foreach (@{$articles}) {
my $thistitle = $_->{title};
$listcount++;
print "T: " . encode("iso-8859-1", $thistitle) . "\n";
while (1) {
check_expirations();
my $pagehash = $mw->get_page( { title => $thistitle } );
if ($pagehash) { last; }
if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
sleep $longdelay;
} elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
print "bad gateway\n";
sleep $longdelay;
} else {
die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
}
sleep $standardelay; # There's no hurry!
if (allowBots($pagehash->{'*'})) {
$purgecount++;
while (1) {
check_expirations();
# …and purge each one
my $apires = $mw->api( {
action => 'purge', titles => $thistitle, forcelinkupdate => 1} );
if ($apires) { last; }
if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
sleep $longdelay;
} elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
print "bad gateway\n";
sleep $longdelay;
} else {
die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
}
} else {
print "….DENIED\n";
}
}
my $apires = $mw->api( { action => 'purge', titles => "Category:AfC pending submissions by age"} );
if ($apires) {
print "AFCpsba: cat purged\n";
} else {
if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
die "AFCpsba: delayed for replag\n";
} else {
die "AFCpsba: " . $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
}
print $purgecount . " from a total list of " . $listcount . " articles in " . (time()-$epoch) . " seconds.\n";