ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/prep/PREPv1-0.pl
Revision: 1.1
Committed: Fri Jul 8 18:02:31 2005 UTC (15 years ago) by cfrenz
Branch point for: MAIN
Log Message:
Initial revision

Line File contents
1 #!/usr/bin/perl
2
3 # PREP (Perl RegExps for Pubmed) is a script that allows the use of
4 # Perl regexs in the searching of Pubmed records, providing the ability to search
5 # records for textual patterns as well as keywords
6
7 # Copyright 2005- Christopher M. Frenz
8 # This script is free sofware it may be used, copied, redistributed, and/or modified
9 # under the terms laid forth in the Perl Artisic License
10
11 # Please cite this script in any publication in which literature cited within the
12 # publication was located using the PREP.pl script.
13
14 # Usage: perl PREPv1-0.pl PubmedQueryTerms
15
16 # Usage of this script requires the LWP and XML::LibXML modules are installed
17 use LWP;
18 use XML::LibXML; #Version 1.58 used for development and testing
19
20 # Change the variable below to set the text pattern that Perl
21 # will seek to match in the returned results
22 my $regex='[ARNDCEQGHILKMFPSTWYV]\d+[ARNDCEQGHILKMFPSTWYV]';
23
24 my $request;
25 my $response;
26 my $query;
27
28 # Concatenates arguments passed to script to form Pubmed query
29 $query=join(" ", @ARGV);
30
31 # Creates the URL to search Pubmed
32 my $baseurl="http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?";
33 my $url=$baseurl . "db=Pubmed&retmax=1&usehistory=y&term=" . $query;
34
35
36 # Searches Pubmed and Returns the number of results
37 # as well as the session information needed for results retrieval
38 $request=LWP::UserAgent->new();
39 $response=$request->get($url);
40 my $results= $response->content;
41 die unless $response->is_success;
42 print "PubMed Search Results \n";
43 $results=~/<Count>(\d+)<\/Count>/;
44 my $NumAbstracts=$1;
45 $results=~/<QueryKey>(\d+)<\/QueryKey>/;
46 my $QueryKey=$1;
47 $results=~/<WebEnv>(.*?)<\/WebEnv>/;
48 my $WebEnv=$1;
49 print "$NumAbstracts are Available \n";
50 print "Query Key= $QueryKey \n";
51 print "WebEnv= $WebEnv \n";
52
53 # Opens a file for output
54 open(OFile, ">PREPout.html");
55
56 my $parser=XML::LibXML->new;
57
58 my $retmax=500; #Number of records to be retrieved per request-Max 500
59 my $retstart=0; #Record number to start retreival from
60
61 # Creates the URL needed to retrieve results
62 $baseurl="http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?";
63 my $url2="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=";
64
65 my $Count=0;
66 # Retreives results in XML format
67 for($retstart=0;$retstart<=$NumAbstracts;$retstart+=$retmax){
68 print "Processing record # $retstart \n";
69 $url=$baseurl . "rettype=abstract&retmode=xml&retstart=$retstart&retmax=$retmax&db=Pubmed&query_key=$QueryKey&WebEnv=$WebEnv";
70
71 $response=$request->get($url);
72 $results=$response->content;
73 die unless $response->is_success;
74
75 # Uses a DOM based XML parser to process returned results
76 my $domtree=$parser->parse_string($results);
77 @Records=$domtree->getElementsByTagName("PubmedArticle");
78 my $i=0;
79 foreach(@Records){
80 # Extracts element data for regex processing and output formatting
81 $titles=$Records[$i]->getElementsByTagName("ArticleTitle");
82 $journals=$Records[$i]->getElementsByTagName("MedlineTA");
83 $volumes=$Records[$i]->getElementsByTagName("Volume");
84 $pgnums=$Records[$i]->getElementsByTagName("MedlinePgn");
85 $abstracts=$Records[$i]->getElementsByTagName("AbstractText");
86 $IDS=$Records[$i]->getElementsByTagName("PMID");
87
88
89 # Processes title and abstract for pattern match and if a match occurs
90 # data is written to output
91 if($titles=~/($regex)/ or $abstracts=~/($regex)/){
92 print OFile "<h1>Pattern Match: $1 </h1>\n";
93 print OFile "<h3><a href=\"$url2$IDS\">$titles </a></h3> \n";
94 print OFile "<p>$journals $volumes, $pgnums </p>\n";
95 print OFile "<p>$abstracts </p>\n\n";
96 $Count=$Count+1;
97 }
98 $i=$i+1;
99 }
100 }
101 close OFile;
102 print "$Count records matched the pattern";