BioWiki/collect and upload bifx org stats.plx

From Bioinformatics.Org Wiki

Jump to: navigation, search

First fully functional prototype 'wiki activity monitor' script.

#!/usr/bin/perl -w
 
## Prototype script to collect and summarise a months worth of edits
## from a given MW.
 
## For information, see:
## http://search.cpan.org/dist/MediaWiki-API/lib/MediaWiki/API.pm
## http://www.mediawiki.org/wiki/API:Query_-_Lists#recentchanges_.2F_rc
 
use strict;
 
use Data::Dumper;
 
use DateTime;
 
use MediaWiki::API;
 
use Digest::MD5 qw(md5_hex);
 
 
 
## We use an 'epoch' format time here, because it's easy to pass to
## MediaWiki.
my $rcstart =
  DateTime->now->subtract(months => 1)->epoch;
 
## Debugging
#my $rcstart =
#  DateTime->now->subtract(minutes => 1)->epoch;
 
 
 
 
 
## CONNECT TO AN API
 
my $api_url = 'http://www.bioinformatics.org/w/api.php';
 
 
 
## CONNECT TO THE BIOWIKIAPI (as above in this case)
 
 
 
## Get API object for the given URL
my $mw = MediaWiki::API->
  new({ api_url => $api_url, retries => 5 });
 
 
 
## Configure a default error function (saves us checking for errors)
$mw->{config}->{on_error} = \&on_error;
 
## The error function
sub on_error {
  warn "Error code: ", $mw->{error}->{code}, "\n";
  warn $mw->{error}->{details}, "\n";
  warn $mw->{error}->{stacktrace}, "\n";
  die "err\n";
}
 
 
 
## Print the site name (shows we got a connection) 
my $ref = $mw->api( { action => 'query', meta => 'siteinfo' } );
warn "Sitename: '", $ref->{query}->{general}->{sitename}, "'\n";
 
 
 
 
 
## Grab the recent changes list (object)
 
warn "collecting changes since $rcstart\n";
my $rc_array = $mw->
  list ({
	 action  => 'query',
	 list    => 'recentchanges',
 
	 ## Get changes since:
	 rcdir   => 'newer',
	 rcstart => $rcstart,
 
	 ## Number of revisions to collect in each batch of results
	 ## returned by the API
	 rclimit => '500',
 
	 ## Filters: Lets post process these (using flags), !filter
	 #rcshow => '!minor|!bot',
	 #rctype => 'edit|new|log',
 
	 #rcexcludeuser => '',
 
	 ## Properties to return. See:
	 ## http://www.mediawiki.org/wiki/API:Query_-_Lists#recentchanges_.2F_rc
	 rcprop =>
	   'user|timestamp|title|flags|loginfo'
 
	},
	{
	 ## MW::API Config
 
	 ## Process result as they come in with this function
	 ## (responsible for returning something useful).
	 #hook => \&look_hook,
 
	 ## Max number of batches to collect (for debugging)
	 #max => 1
 
	}
       );
 
#sub look_hook{
#  warn "hi\n";
#}
 
warn 'found ', scalar(@$rc_array), " revisions\n";
 
## Debugging
#warn Dumper $rc_array;
 
 
 
 
 
## Compile edit statistics for the month
 
## here we collect four counts:
 
## Number of active users (number of new users)
## Number of pages edited (number of new pages)
## 
## Number of edits
 
 
my (%users, $number_of_new_users,
    %pages, $number_of_new_pages,
    $total_edits,
    );
 
foreach my $rc (@$rc_array){
 
  ## Debugging
  #warn Dumper $rc;
  #warn "paused\n";
  #my $x = <STDIN>;
 
  if($rc->{type} eq 'log'){
    if($rc->{logtype} eq 'newusers'){
      ## Sanity check
      die Dumper $rc unless $rc->{logaction} eq 'create';
      #warn 'new user: ', $rc->{user}, "\n";
      $number_of_new_users++;
    }
    else{
      ## No other logtypes (e.g. delete, block, upload, move, ...)
      ## concern us here.
 
      ## TODO: We could look at the deletion log and see if any of our
      ## new users or new pages for the month should be deleted...
    }
  }
 
  elsif($rc->{type} eq 'new'){
    ## Sanity check
    die Dumper $rc unless defined($rc->{new});
    #warn 'new page: ', $rc->{title}, "\n";
    $number_of_new_pages++;
    $users{$rc->{ user}}++;
    $pages{$rc->{title}}++;
  }
 
  elsif($rc->{type} eq 'edit'){
    ## Filter bots and minor edits
    ## Need a username kill list here?
    next if defined($rc->{minor});
    $total_edits++;
    $users{$rc->{ user}}++;
    $pages{$rc->{title}}++;
  }
 
  else{
    die Dumper $rc;
  }
 
  next;
}
 
warn "OK\n";
 
print 'active users = ', scalar keys %users, "\n";
print " new users = $number_of_new_users\n";
print 'active pages = ', scalar keys %pages, "\n";
print " new pages = $number_of_new_pages\n";
print "total edits = $total_edits\n";
 
 
 
 
 
## OK, now we have to upload...
 
# Step 1) parse the existing BioWiki page
 
## In this simple case we don't need a new MW::API object for upload
 
my $page_title = 'Bioinformatics.Org Wiki';
 
my $page_ref = $mw->
  get_page({ title => $page_title });
 
#print Dumper $page_ref;
 
my $page_text = $page_ref->{'*'};
 
## Parse out the (first) 'BioWiki' template from the page text
die "failed to parse page '$page_title'\n"
  unless $page_text =~ /^(.*?){{(BioWiki)\s*\|(.*?)}}(.*)$/s;
 
#print "'$1'\n";
#print "'$2'\n";
#print "'$3'\n";
#print "'$4'\n";
 
my $pre_text       = $1;
my $template_title = $2; # BioWiki in this case
my $template_body  = $3;
my $post_text      = $4;
 
## Strip newlines from the template body
$template_body =~ s/\n//g;
 
## Parse the template fields
my %fields = split(/\||=/, $template_body);
 
#print "$_\t$fields{$_}\n" for keys %fields;
 
## Sanity check fields
for(keys %fields){
  # used
  next if /^date created$/;
  next if /^logo file$/;
  next if /^num pages$/;
  next if /^num users$/;
  next if /^num contribs$/;
  next if /^contribs$/;
  next if /^platform$/;
  next if /^extensions$/;
  next if /^url$/;
  next if /^people$/;
  next if /^email$/;
  next if /^institutions$/;
 
  next if /^num users active$/;
  next if /^num users new$/;
  next if /^num pages active$/;
  next if /^num pages new$/;
  next if /^num edits$/;
 
  # missed
  warn "How did we miss : '$_' ?\n"
}
 
 
 
# Step 2) create the new page text
 
my $new_page_text = "
$pre_text
{{BioWiki
 |date created = ". ($fields{'date created'} || ''). "
 |logo file    = ". ($fields{'logo file'}    || ''). "
 |num pages    = ". ($fields{'num pages'}    || ''). "
 |num users    = ". ($fields{'num users'}    || ''). "
 |num contribs = ". ($fields{'num contribs'} || ''). "
 |contribs     = ". ($fields{'contribs'}     || ''). "
 |platform     = ". ($fields{'platform'}     || ''). "
 |extensions   = ". ($fields{'extensions'}   || ''). "
 |url          = ". ($fields{'url'}          || ''). "
 |people       = ". ($fields{'people'}       || ''). "
 |email        = ". ($fields{'email'}        || ''). "
 |institutions = ". ($fields{'institutions'} || ''). "
 
 |num users active = ". (scalar keys %users). "
 |num users new    = $number_of_new_users
 |num pages active = ". (scalar keys %pages). "
 |num pages new    = $number_of_new_pages
 |num edits        = $total_edits
}}
$post_text
";
 
#print $new_page_text, "\n";
 
 
 
# Step 3) upload the new page
 
warn "writing\n";
 
## In this simple case we don't need a new MW::API object for upload,
## however, we need to login now to write.
 
$mw->
  login({ lgname => '<my pet bot>',
          lgpassword => '<my secret>',
        });
 
## To avoid edit conflicts
my $timestamp = $page_ref->{timestamp};
 
$mw->
  edit({ action => 'edit',
	 title => $page_title,
	 ## To avoid edit conflicts
	 basetimestamp => $timestamp,
	 text => $new_page_text,
         summary => "uploading statistics for $page_title",
         ## Mark the edit as a bot edit.
         bot => '',
         ## Guard against encoding corruption (I hope!)
	 md5 => md5_hex($new_page_text),
       });
 
warn "OK\n";
Personal tools
Namespaces
Variants
Actions
wiki navigation
Toolbox