ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/contig_to_chr_coords.py
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (9 years, 2 months ago) by gpertea
File size: 4631 byte(s)
Log Message:
adding tophat source work

Line User Rev File contents
1 gpertea 29 #!/usr/bin/env python
2     # encoding: utf-8
3     """
4     contig_to_chr_coords.py
5    
6     Created by Cole Trapnell on 2008-09-05.
7     Copyright (c) 2008 Cole Trapnell. All rights reserved.
8     """
9    
10     import sys
11     import getopt
12    
13    
14     help_message = '''
15     Takes the NCBI seq_contig file and maps contig coords to whole chromosome
16     coords in a GTF, GFF, or BED file
17    
18     contig_to_chr_coords.py <format_flag> <seq_contig.md> <junctions.bed|transcripts.gff|transcripts.gtf>
19     '''
20    
21    
22     class Usage(Exception):
23     def __init__(self, msg):
24     self.msg = msg
25    
26    
27     def main(argv=None):
28     if argv is None:
29     argv = sys.argv
30     try:
31     try:
32     opts, args = getopt.getopt(argv[1:], "ho:vbg", ["help", "output=", "bed", "gff"])
33     except getopt.error, msg:
34     raise Usage(msg)
35    
36     arg_is_splice = False
37     arg_is_gff = False
38     # option processing
39     for option, value in opts:
40     if option == "-v":
41     verbose = True
42     if option in ("-h", "--help"):
43     raise Usage(help_message)
44     if option in ("-o", "--output"):
45     output = value
46     if option in ("-b", "--bed"):
47     arg_is_splice = True
48     if option in ("-g", "--gff"):
49     arg_is_gff = True
50    
51     if (arg_is_splice == False and arg_is_gff == False) or (arg_is_splice == True and arg_is_gff == True):
52     print >> sys.stderr, "Error: please specify either -b or -g, but not both"
53     raise Usage(help_message)
54    
55     if len(args) < 1:
56     raise Usage(help_message)
57     contig_to_chr_file = open(args[0])
58    
59     contigs = {}
60    
61     for line in contig_to_chr_file.readlines():
62     if line[0] == "#":
63     continue
64     line = line.strip()
65     cols = line.split('\t')
66     if len(cols) < 9:
67     continue
68     chromosome = cols[1]
69     group = cols[8]
70     feature_name = cols[5]
71     if not feature_name in ["start", "end"]:
72     contigs[feature_name] = (chromosome, int(cols[2]))
73     #print feature_name, chromosome, int(cols[2])
74    
75     if arg_is_gff:
76     gff_file = open(args[1])
77    
78     lines = gff_file.readlines()
79     print lines[0],
80     for line in lines[1:]:
81     line = line.strip()
82     cols = line.split('\t')
83     if len(cols) < 8:
84     continue
85    
86     contig = cols[0]
87     chr_fields = contig.split('|')
88     refseq_id = chr_fields[3]
89     contig = contigs.get(refseq_id)
90     chr_name = contig[0]
91     pipe_idx = chr_name.find('|')
92    
93     if pipe_idx != -1:
94     chr_name = chr_name[:pipe_idx]
95     if contig != None:
96     #print line
97     left_pos = contig[1] + int(cols[3])
98     right_pos = contig[1] + int(cols[4])
99    
100     print "chr%s\tTopHat\tisland\t%d\t%d\t%s\t.\t.\t%s" % (chr_name, left_pos, right_pos, cols[5],cols[8])
101     #print >>sys.stderr, "%s\t%d\t%d\t%s\t%s\t%s\t%s" % (contig[0], left_pos, right_pos,cols[3],cols[6],cols[0],cols[1])
102    
103    
104     if arg_is_splice:
105     splice_file = open(args[1])
106    
107     lines = splice_file.readlines()
108     print lines[0],
109     for line in lines[1:]:
110     line = line.strip()
111     cols = line.split('\t')
112     contig = cols[0]
113     chr_fields = contig.split('|')
114     refseq_id = chr_fields[3]
115     contig = contigs.get(refseq_id)
116     chr_name = contig[0]
117     pipe_idx = chr_name.find('|')
118    
119     if pipe_idx != -1:
120     chr_name = chr_name[:pipe_idx]
121     if contig != None:
122     #print line
123     left_pos = contig[1] + int(cols[1])
124     right_pos = contig[1] + int(cols[2])
125    
126     print "chr%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t255,0,0\t2\t1,1\t%s" % (chr_name, left_pos, right_pos, cols[3],cols[5],left_pos, right_pos,cols[11])
127     #print >>sys.stderr, "%s\t%d\t%d\t%s\t%s\t%s\t%s" % (contig[0], left_pos, right_pos,cols[3],cols[6],cols[0],cols[1])
128     except Usage, err:
129     print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
130     print >> sys.stderr, "\t for help use --help"
131     return 2
132    
133    
134     if __name__ == "__main__":
135     sys.exit(main())

Properties

Name Value
svn:executable *