ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/prep/PREPv1-0.pl
Revision: 1.1.1.1 (vendor branch)
Committed: Fri Jul 8 18:02:31 2005 UTC (15 years ago) by cfrenz
Branch: MAIN
CVS Tags: HEAD, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
message about import

Line User Rev File contents
1 cfrenz 1.1 #!/usr/bin/perl
2    
3     # PREP (Perl RegExps for Pubmed) is a script that allows the use of
4     # Perl regexs in the searching of Pubmed records, providing the ability to search
5     # records for textual patterns as well as keywords
6    
7     # Copyright 2005- Christopher M. Frenz
8     # This script is free sofware it may be used, copied, redistributed, and/or modified
9     # under the terms laid forth in the Perl Artisic License
10    
11     # Please cite this script in any publication in which literature cited within the
12     # publication was located using the PREP.pl script.
13    
14     # Usage: perl PREPv1-0.pl PubmedQueryTerms
15    
16     # Usage of this script requires the LWP and XML::LibXML modules are installed
17     use LWP;
18     use XML::LibXML; #Version 1.58 used for development and testing
19    
20     # Change the variable below to set the text pattern that Perl
21     # will seek to match in the returned results
22     my $regex='[ARNDCEQGHILKMFPSTWYV]\d+[ARNDCEQGHILKMFPSTWYV]';
23    
24     my $request;
25     my $response;
26     my $query;
27    
28     # Concatenates arguments passed to script to form Pubmed query
29     $query=join(" ", @ARGV);
30    
31     # Creates the URL to search Pubmed
32     my $baseurl="http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?";
33     my $url=$baseurl . "db=Pubmed&retmax=1&usehistory=y&term=" . $query;
34    
35    
36     # Searches Pubmed and Returns the number of results
37     # as well as the session information needed for results retrieval
38     $request=LWP::UserAgent->new();
39     $response=$request->get($url);
40     my $results= $response->content;
41     die unless $response->is_success;
42     print "PubMed Search Results \n";
43     $results=~/<Count>(\d+)<\/Count>/;
44     my $NumAbstracts=$1;
45     $results=~/<QueryKey>(\d+)<\/QueryKey>/;
46     my $QueryKey=$1;
47     $results=~/<WebEnv>(.*?)<\/WebEnv>/;
48     my $WebEnv=$1;
49     print "$NumAbstracts are Available \n";
50     print "Query Key= $QueryKey \n";
51     print "WebEnv= $WebEnv \n";
52    
53     # Opens a file for output
54     open(OFile, ">PREPout.html");
55    
56     my $parser=XML::LibXML->new;
57    
58     my $retmax=500; #Number of records to be retrieved per request-Max 500
59     my $retstart=0; #Record number to start retreival from
60    
61     # Creates the URL needed to retrieve results
62     $baseurl="http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?";
63     my $url2="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=";
64    
65     my $Count=0;
66     # Retreives results in XML format
67     for($retstart=0;$retstart<=$NumAbstracts;$retstart+=$retmax){
68     print "Processing record # $retstart \n";
69     $url=$baseurl . "rettype=abstract&retmode=xml&retstart=$retstart&retmax=$retmax&db=Pubmed&query_key=$QueryKey&WebEnv=$WebEnv";
70    
71     $response=$request->get($url);
72     $results=$response->content;
73     die unless $response->is_success;
74    
75     # Uses a DOM based XML parser to process returned results
76     my $domtree=$parser->parse_string($results);
77     @Records=$domtree->getElementsByTagName("PubmedArticle");
78     my $i=0;
79     foreach(@Records){
80     # Extracts element data for regex processing and output formatting
81     $titles=$Records[$i]->getElementsByTagName("ArticleTitle");
82     $journals=$Records[$i]->getElementsByTagName("MedlineTA");
83     $volumes=$Records[$i]->getElementsByTagName("Volume");
84     $pgnums=$Records[$i]->getElementsByTagName("MedlinePgn");
85     $abstracts=$Records[$i]->getElementsByTagName("AbstractText");
86     $IDS=$Records[$i]->getElementsByTagName("PMID");
87    
88    
89     # Processes title and abstract for pattern match and if a match occurs
90     # data is written to output
91     if($titles=~/($regex)/ or $abstracts=~/($regex)/){
92     print OFile "<h1>Pattern Match: $1 </h1>\n";
93     print OFile "<h3><a href=\"$url2$IDS\">$titles </a></h3> \n";
94     print OFile "<p>$journals $volumes, $pgnums </p>\n";
95     print OFile "<p>$abstracts </p>\n\n";
96     $Count=$Count+1;
97     }
98     $i=$i+1;
99     }
100     }
101     close OFile;
102     print "$Count records matched the pattern";