[Pdbwiki-devel] MediaWiki edit summary script (Perl, MediWiki::API)

Dan Bolser dan.bolser at gmail.com
Tue Jan 18 21:56:52 EST 2011


Hi guys, I couldn't sleep so I wrote this (quite fun!):



#!/usr/bin/perl -w

## Prototype script to collect and summarise a months worth of edits
## from a given MW.

## For information, see:
## http://search.cpan.org/dist/MediaWiki-API/lib/MediaWiki/API.pm
## http://www.mediawiki.org/wiki/API:Query_-_Lists#recentchanges_.2F_rc

use strict;

use Data::Dumper;

use DateTime;

use MediaWiki::API;



## CONNECT TO AN API

#my $api_url = 'http://en.wikipedia.org/w/api.php';
#my $api_url = 'http://seqanswers.com/w/api.php';
my $api_url = 'http://pdbwiki.org/api.php';



## Get API object for the given URL
my $mw = MediaWiki::API->
  new({ api_url => $api_url });



## Configure a default error function (saves us checking for errors)
$mw->{config}->{on_error} = \&on_error;

## The error function
sub on_error {
  print "Error code: ", $mw->{error}->{code}, "\n";
  print $mw->{error}->{details}, "\n";
  print $mw->{error}->{stacktrace}, "\n";
  die "err\n";
}



## Print the site name
my $ref = $mw->api( { action => 'query', meta => 'siteinfo' } );
print "Sitename: '", $ref->{query}->{general}->{sitename}, "'\n";



## Grab the recent changes list

my $rcstart =
  DateTime->now->subtract(months => 1)->epoch;

## Debugging
#my $rcstart =
#  DateTime->now->subtract(hours => 7)->epoch;

warn "collecting changes since $rcstart\n";
my $rc_array = $mw->
  list ({
	 action  => 'query',
	 list    => 'recentchanges',
	
	 ## Get changes since:
	 rcdir   => 'newer',
	 rcstart => $rcstart,
	
	 ## Number of revisions to collect in each batch of results
	 ## returned by the API
	 rclimit => '500',
	
	 ## Filters
	 rcshow  => '!minor',
	 rcshow  => '!bot',
	
	 #rcexcludeuser => '',
	 #rctype => edit / new / log,
	
	 ## Properties
	 rcprop =>
	   'user|comment|timestamp|title|sizes|flags'
	
	},
	{
	 ## Config
	
	 ## Process result as they come in with this function
	 ## (responsible for returning something useful).
	 #hook => \&look_hook,
	
	 ## Max number of batches to collect (for debugging)
	 max => 1
	
	}
       );

#sub look_hook{
#  print "hi\n";
#}

warn 'found ', scalar(@$rc_array), " revisions\n";

## Debugging
#print Dumper $rc_array;



## Compile edit statistics for the month

my(%users,
   %pages);

foreach my $rc (@$rc_array){

  ## Debugging
  #print Dumper $rc;

  $users{$rc->{ user}}++;
  $pages{$rc->{title}}++;
}

warn "OK\n";



print "users:\n";
print "$_\t$users{$_}\n" for sort u keys %users;

print "pages:\n";
print "$_\t$pages{$_}\n" for sort p keys %pages;

sub u { $users{$b} <=> $users{$a} }
sub p { $pages{$b} <=> $pages{$a} }



__END__

# 20:06 -!- dbolser [~dmb at bioinformatics.org] has joined #perl
# 20:06 < dbolser> how can I get the current date, minus one month,
# formatted like this "2011-01-18T21:31:02Z"?
# 20:07  * GumbyPAN CPAN Upload: SDL-Tutorial-3DWorld-0.33 by ADAMK
# 20:07 < tm604> dbolser: DateTime->now->subtract(months =>
# 1)->iso8601
# 20:07 < dbolser> tm604: You are better than Google
# 20:07 < ology> dbolser: perldoc POSIX search for strftime
# 20:08 < dbolser> tys
# 20:08 < tm604> might also need to specify the timezone directly to
# get the trailing Z.



More information about the Pdbwiki-devel mailing list