[Pdbwiki-devel] MediaWiki edit summary script (Perl, MediWiki::API)

Dan Bolser dan.bolser at gmail.com
Wed Jan 19 12:02:23 EST 2011


Just running against wp now...



On 19 January 2011 14:59, Jose M. Duarte <jose.m.duarte at gmail.com> wrote:
> Awesome! works like a charm :)
>
> Jose
>
>
> On 19 January 2011 03:56, Dan Bolser <dan.bolser at gmail.com> wrote:
>>
>> Hi guys, I couldn't sleep so I wrote this (quite fun!):
>>
>>
>>
>> #!/usr/bin/perl -w
>>
>> ## Prototype script to collect and summarise a months worth of edits
>> ## from a given MW.
>>
>> ## For information, see:
>> ## http://search.cpan.org/dist/MediaWiki-API/lib/MediaWiki/API.pm
>> ## http://www.mediawiki.org/wiki/API:Query_-_Lists#recentchanges_.2F_rc
>>
>> use strict;
>>
>> use Data::Dumper;
>>
>> use DateTime;
>>
>> use MediaWiki::API;
>>
>>
>>
>> ## CONNECT TO AN API
>>
>> #my $api_url = 'http://en.wikipedia.org/w/api.php';
>> #my $api_url = 'http://seqanswers.com/w/api.php';
>> my $api_url = 'http://pdbwiki.org/api.php';
>>
>>
>>
>> ## Get API object for the given URL
>> my $mw = MediaWiki::API->
>>  new({ api_url => $api_url });
>>
>>
>>
>> ## Configure a default error function (saves us checking for errors)
>> $mw->{config}->{on_error} = \&on_error;
>>
>> ## The error function
>> sub on_error {
>>  print "Error code: ", $mw->{error}->{code}, "\n";
>>  print $mw->{error}->{details}, "\n";
>>  print $mw->{error}->{stacktrace}, "\n";
>>  die "err\n";
>> }
>>
>>
>>
>> ## Print the site name
>> my $ref = $mw->api( { action => 'query', meta => 'siteinfo' } );
>> print "Sitename: '", $ref->{query}->{general}->{sitename}, "'\n";
>>
>>
>>
>> ## Grab the recent changes list
>>
>> my $rcstart =
>>  DateTime->now->subtract(months => 1)->epoch;
>>
>> ## Debugging
>> #my $rcstart =
>> #  DateTime->now->subtract(hours => 7)->epoch;
>>
>> warn "collecting changes since $rcstart\n";
>> my $rc_array = $mw->
>>  list ({
>>         action  => 'query',
>>         list    => 'recentchanges',
>>
>>         ## Get changes since:
>>         rcdir   => 'newer',
>>         rcstart => $rcstart,
>>
>>         ## Number of revisions to collect in each batch of results
>>         ## returned by the API
>>         rclimit => '500',
>>
>>         ## Filters
>>         rcshow  => '!minor',
>>         rcshow  => '!bot',
>>
>>         #rcexcludeuser => '',
>>         #rctype => edit / new / log,
>>
>>         ## Properties
>>         rcprop =>
>>           'user|comment|timestamp|title|sizes|flags'
>>
>>        },
>>        {
>>         ## Config
>>
>>         ## Process result as they come in with this function
>>         ## (responsible for returning something useful).
>>         #hook => \&look_hook,
>>
>>         ## Max number of batches to collect (for debugging)
>>         max => 1
>>
>>        }
>>       );
>>
>> #sub look_hook{
>> #  print "hi\n";
>> #}
>>
>> warn 'found ', scalar(@$rc_array), " revisions\n";
>>
>> ## Debugging
>> #print Dumper $rc_array;
>>
>>
>>
>> ## Compile edit statistics for the month
>>
>> my(%users,
>>   %pages);
>>
>> foreach my $rc (@$rc_array){
>>
>>  ## Debugging
>>  #print Dumper $rc;
>>
>>  $users{$rc->{ user}}++;
>>  $pages{$rc->{title}}++;
>> }
>>
>> warn "OK\n";
>>
>>
>>
>> print "users:\n";
>> print "$_\t$users{$_}\n" for sort u keys %users;
>>
>> print "pages:\n";
>> print "$_\t$pages{$_}\n" for sort p keys %pages;
>>
>> sub u { $users{$b} <=> $users{$a} }
>> sub p { $pages{$b} <=> $pages{$a} }
>>
>>
>>
>> __END__
>>
>> # 20:06 -!- dbolser [~dmb at bioinformatics.org] has joined #perl
>> # 20:06 < dbolser> how can I get the current date, minus one month,
>> # formatted like this "2011-01-18T21:31:02Z"?
>> # 20:07  * GumbyPAN CPAN Upload: SDL-Tutorial-3DWorld-0.33 by ADAMK
>> # 20:07 < tm604> dbolser: DateTime->now->subtract(months =>
>> # 1)->iso8601
>> # 20:07 < dbolser> tm604: You are better than Google
>> # 20:07 < ology> dbolser: perldoc POSIX search for strftime
>> # 20:08 < dbolser> tys
>> # 20:08 < tm604> might also need to specify the timezone directly to
>> # get the trailing Z.
>>
>> _______________________________________________
>> Pdbwiki-devel mailing list
>> Pdbwiki-devel at bioinformatics.org
>> http://www.bioinformatics.org/mailman/listinfo/pdbwiki-devel
>> http://www.pdbwiki.org
>
>



More information about the Pdbwiki-devel mailing list