BioWiki/collect and upload bifx org stats.plx

From Bioinformatics.Org Wiki

Jump to: navigation, search

First fully functional prototype 'wiki activity monitor' script.

#!/usr/bin/perl -w
## Prototype script to collect and summarise a months worth of edits
## from a given MW.
## For information, see:
use strict;
use Data::Dumper;
use DateTime;
use MediaWiki::API;
use Digest::MD5 qw(md5_hex);
## We use an 'epoch' format time here, because it's easy to pass to
## MediaWiki.
my $rcstart =
  DateTime->now->subtract(months => 1)->epoch;
## Debugging
#my $rcstart =
#  DateTime->now->subtract(minutes => 1)->epoch;
my $api_url = '';
## CONNECT TO THE BIOWIKIAPI (as above in this case)
## Get API object for the given URL
my $mw = MediaWiki::API->
  new({ api_url => $api_url, retries => 5 });
## Configure a default error function (saves us checking for errors)
$mw->{config}->{on_error} = \&on_error;
## The error function
sub on_error {
  warn "Error code: ", $mw->{error}->{code}, "\n";
  warn $mw->{error}->{details}, "\n";
  warn $mw->{error}->{stacktrace}, "\n";
  die "err\n";
## Print the site name (shows we got a connection) 
my $ref = $mw->api( { action => 'query', meta => 'siteinfo' } );
warn "Sitename: '", $ref->{query}->{general}->{sitename}, "'\n";
## Grab the recent changes list (object)
warn "collecting changes since $rcstart\n";
my $rc_array = $mw->
  list ({
	 action  => 'query',
	 list    => 'recentchanges',
	 ## Get changes since:
	 rcdir   => 'newer',
	 rcstart => $rcstart,
	 ## Number of revisions to collect in each batch of results
	 ## returned by the API
	 rclimit => '500',
	 ## Filters: Lets post process these (using flags), !filter
	 #rcshow => '!minor|!bot',
	 #rctype => 'edit|new|log',
	 #rcexcludeuser => '',
	 ## Properties to return. See:
	 rcprop =>
	 ## MW::API Config
	 ## Process result as they come in with this function
	 ## (responsible for returning something useful).
	 #hook => \&look_hook,
	 ## Max number of batches to collect (for debugging)
	 #max => 1
#sub look_hook{
#  warn "hi\n";
warn 'found ', scalar(@$rc_array), " revisions\n";
## Debugging
#warn Dumper $rc_array;
## Compile edit statistics for the month
## here we collect four counts:
## Number of active users (number of new users)
## Number of pages edited (number of new pages)
## Number of edits
my (%users, $number_of_new_users,
    %pages, $number_of_new_pages,
foreach my $rc (@$rc_array){
  ## Debugging
  #warn Dumper $rc;
  #warn "paused\n";
  #my $x = <STDIN>;
  if($rc->{type} eq 'log'){
    if($rc->{logtype} eq 'newusers'){
      ## Sanity check
      die Dumper $rc unless $rc->{logaction} eq 'create';
      #warn 'new user: ', $rc->{user}, "\n";
      ## No other logtypes (e.g. delete, block, upload, move, ...)
      ## concern us here.
      ## TODO: We could look at the deletion log and see if any of our
      ## new users or new pages for the month should be deleted...
  elsif($rc->{type} eq 'new'){
    ## Sanity check
    die Dumper $rc unless defined($rc->{new});
    #warn 'new page: ', $rc->{title}, "\n";
    $users{$rc->{ user}}++;
  elsif($rc->{type} eq 'edit'){
    ## Filter bots and minor edits
    ## Need a username kill list here?
    next if defined($rc->{minor});
    $users{$rc->{ user}}++;
    die Dumper $rc;
warn "OK\n";
print 'active users = ', scalar keys %users, "\n";
print " new users = $number_of_new_users\n";
print 'active pages = ', scalar keys %pages, "\n";
print " new pages = $number_of_new_pages\n";
print "total edits = $total_edits\n";
## OK, now we have to upload...
# Step 1) parse the existing BioWiki page
## In this simple case we don't need a new MW::API object for upload
my $page_title = 'Bioinformatics.Org Wiki';
my $page_ref = $mw->
  get_page({ title => $page_title });
#print Dumper $page_ref;
my $page_text = $page_ref->{'*'};
## Parse out the (first) 'BioWiki' template from the page text
die "failed to parse page '$page_title'\n"
  unless $page_text =~ /^(.*?){{(BioWiki)\s*\|(.*?)}}(.*)$/s;
#print "'$1'\n";
#print "'$2'\n";
#print "'$3'\n";
#print "'$4'\n";
my $pre_text       = $1;
my $template_title = $2; # BioWiki in this case
my $template_body  = $3;
my $post_text      = $4;
## Strip newlines from the template body
$template_body =~ s/\n//g;
## Parse the template fields
my %fields = split(/\||=/, $template_body);
#print "$_\t$fields{$_}\n" for keys %fields;
## Sanity check fields
for(keys %fields){
  # used
  next if /^date created$/;
  next if /^logo file$/;
  next if /^num pages$/;
  next if /^num users$/;
  next if /^num contribs$/;
  next if /^contribs$/;
  next if /^platform$/;
  next if /^extensions$/;
  next if /^url$/;
  next if /^people$/;
  next if /^email$/;
  next if /^institutions$/;
  next if /^num users active$/;
  next if /^num users new$/;
  next if /^num pages active$/;
  next if /^num pages new$/;
  next if /^num edits$/;
  # missed
  warn "How did we miss : '$_' ?\n"
# Step 2) create the new page text
my $new_page_text = "
 |date created = ". ($fields{'date created'} || ''). "
 |logo file    = ". ($fields{'logo file'}    || ''). "
 |num pages    = ". ($fields{'num pages'}    || ''). "
 |num users    = ". ($fields{'num users'}    || ''). "
 |num contribs = ". ($fields{'num contribs'} || ''). "
 |contribs     = ". ($fields{'contribs'}     || ''). "
 |platform     = ". ($fields{'platform'}     || ''). "
 |extensions   = ". ($fields{'extensions'}   || ''). "
 |url          = ". ($fields{'url'}          || ''). "
 |people       = ". ($fields{'people'}       || ''). "
 |email        = ". ($fields{'email'}        || ''). "
 |institutions = ". ($fields{'institutions'} || ''). "
 |num users active = ". (scalar keys %users). "
 |num users new    = $number_of_new_users
 |num pages active = ". (scalar keys %pages). "
 |num pages new    = $number_of_new_pages
 |num edits        = $total_edits
#print $new_page_text, "\n";
# Step 3) upload the new page
warn "writing\n";
## In this simple case we don't need a new MW::API object for upload,
## however, we need to login now to write.
  login({ lgname => '<my pet bot>',
          lgpassword => '<my secret>',
## To avoid edit conflicts
my $timestamp = $page_ref->{timestamp};
  edit({ action => 'edit',
	 title => $page_title,
	 ## To avoid edit conflicts
	 basetimestamp => $timestamp,
	 text => $new_page_text,
         summary => "uploading statistics for $page_title",
         ## Mark the edit as a bot edit.
         bot => '',
         ## Guard against encoding corruption (I hope!)
	 md5 => md5_hex($new_page_text),
warn "OK\n";
Personal tools
wiki navigation