ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/tophat
(Generate patch)
# Line 1157 | Line 1157
1157          self.fsrc=None
1158          self.popen=None
1159          pipecmd=[]
1160 <        if guess:
1161 <           s=filename.lower()
1162 <           if s.endswith(".z") or s.endswith(".gz") or s.endswith(".gzip"):
1163 <                pipecmd=['gzip']
1164 <           else:
1165 <                if s.endswith(".bz2") or s.endswith(".bzip2") or s.endswith(".bzip"):
1166 <                     pipecmd=['bzip2']
1167 <           if len(pipecmd)>0 and which(pipecmd[0]) is None:
1168 <               die("Error: cannot find %s to decompress input file %s " % (pipecmd, filename))
1169 <           if len(pipecmd)>0:
1170 <              if pipecmd[0]=='gzip' and sysparams.zipper.endswith('pigz'):
1171 <                 pipecmd[0]=sysparams.zipper
1172 <                 pipecmd.extend(sysparams.zipper_opts)
1173 <              elif pipecmd[0]=='bzip2' and sysparams.zipper.endswith('pbzip2'):
1174 <                 pipecmd[0]=sysparams.zipper
1175 <                 pipecmd.extend(sysparams.zipper_opts)
1176 <        else: #not guessing, but we must still be sure it's a compressed file
1177 <           if use_zpacker and filename.endswith(".z"):
1178 <              pipecmd=[sysparams.zipper]
1179 <              pipecmd.extend(sysparams.zipper_opts)
1180 <
1160 >        s=filename.lower()
1161 >        if s.endswith(".bam"):
1162 >           pipecmd=["bam2fastx", "-"]
1163 >        else:
1164 >           if guess:
1165 >              if s.endswith(".z") or s.endswith(".gz") or s.endswith(".gzip"):
1166 >                  pipecmd=['gzip']
1167 >              else:
1168 >                  if s.endswith(".bz2") or s.endswith(".bzip2") or s.endswith(".bzip"):
1169 >                       pipecmd=['bzip2']
1170 >              if len(pipecmd)>0 and which(pipecmd[0]) is None:
1171 >                 die("Error: cannot find %s to decompress input file %s " % (pipecmd, filename))
1172 >              if len(pipecmd)>0:
1173 >                 if pipecmd[0]=='gzip' and sysparams.zipper.endswith('pigz'):
1174 >                   pipecmd[0]=sysparams.zipper
1175 >                   pipecmd.extend(sysparams.zipper_opts)
1176 >                 elif pipecmd[0]=='bzip2' and sysparams.zipper.endswith('pbzip2'):
1177 >                   pipecmd[0]=sysparams.zipper
1178 >                   pipecmd.extend(sysparams.zipper_opts)
1179 >           else: #not guessing, but we must still be sure it's a compressed file
1180 >              if use_zpacker and filename.endswith(".z"):
1181 >                pipecmd=[sysparams.zipper]
1182 >                pipecmd.extend(sysparams.zipper_opts)
1183 >           if pipecmd:
1184 >              pipecmd+=['-cd']
1185          if pipecmd:
1182           pipecmd+=['-cd']
1186             try:
1187                self.fsrc=open(self.fname, 'rb')
1188                self.popen=subprocess.Popen(pipecmd,
# Line 2127 | Line 2130
2130   # left and right read files provided by the user.
2131   class Maps:
2132          def __init__(self,
2133 <                     unspliced_bwt,
2133 >             #        unspliced_bwt,
2134                       unspliced_sam,
2135                       seg_maps,
2136                       unmapped_segs,
2137                       segs):
2138 <            self.unspliced_bwt = unspliced_bwt
2138 >            #self.unspliced_bwt = unspliced_bwt
2139              self.unspliced_sam = unspliced_sam
2140              self.seg_maps = seg_maps
2141              self.unmapped_segs = unmapped_segs
# Line 2172 | Line 2175
2175              continue
2176          fbasename=getFileBaseName(reads)
2177          unspliced_out = tmp_dir + fbasename + ".mapped"
2175        #if use_zpacker: unspliced_out+=".z"
2178          unmapped_unspliced = tmp_dir + fbasename + "_unmapped"
2179 <        
2179 >
2180          num_segs = read_len / segment_len
2181          if read_len % segment_len >= min(segment_len -2, 20):
2182              num_segs += 1
2183 <        
2183 >
2184          # daehwan - check this out
2185          if num_segs <= 1:
2186              print >> sys.stderr, "Warning: you have only one segment per read\n\twe strongly recommend that you decrease --segment-length to about half the read length because TopHat will work better with multiple segments"
2187  
2188          # Perform the initial Bowtie mapping of the full length reads
2189 <        (unspliced_map, unmapped) = bowtie(params,
2189 >        (unspliced_sam, unmapped_reads) = bowtie(params,
2190                                             bwt_idx_prefix,
2191                                             reads,
2192                                             "fastq",
# Line 2192 | Line 2194
2194                                             unspliced_out,
2195                                             unmapped_unspliced)
2196  
2195        unspliced_sam = tmp_dir+fbasename+'.unspl.bam'
2196
2197          # Convert the initial Bowtie maps into BAM format.
2198 <        # TODO: this step should be replaced by a more direct conversion
2199 <        #  because Bowtie can actually output SAM format now
2200 <        join_mapped_segments(params,
2201 <                             sam_header_filename,
2202 <                             reads,
2203 <                             ref_fasta,
2204 <                             ["/dev/null"],
2205 <                             ["/dev/null"],
2206 <                             ["/dev/null"],
2207 <                             [unspliced_map],
2208 <                             [],
2209 <                             unspliced_sam)
2198 >        #  skip this step as fix_map_ordering does it now
2199 >        #join_mapped_segments(params,
2200 >        #                     sam_header_filename,
2201 >        #                     reads,
2202 >        #                     ref_fasta,
2203 >        #                     ["/dev/null"],
2204 >        #                     ["/dev/null"],
2205 >        #                     ["/dev/null"],
2206 >        #                     [unspliced_map],
2207 >        #                     [],
2208 >        #                     unspliced_sam)
2209          # Using the num_segs value returned by check_reads(), decide which
2210          # junction discovery strategy to use
2211          if num_segs == 1:
# Line 2227 | Line 2226
2226          segs = []
2227          if num_segs > 1:
2228              # split up the IUM reads into segments
2229 <            read_segments = split_reads(unmapped_unspliced,
2229 >            # unmapped_reads can be in BAM format
2230 >            read_segments = split_reads(unmapped_reads,
2231                                          tmp_dir + fbasename,
2232                                          False,
2233                                          params,
# Line 2237 | Line 2237
2237              for i in range(len(read_segments)):
2238                  seg = read_segments[i]
2239                  fbasename=getFileBaseName(seg)
2240 <                seg_out =  tmp_dir + fbasename + ".bwtout"
2240 >                seg_out =  tmp_dir + fbasename + ".mapped"
2241                  if use_zpacker: seg_out += ".z"
2242 <                unmapped_seg = tmp_dir + fbasename + "_unmapped.fq"
2243 <                if use_BWT_FIFO:
2244 <                    unmapped_seg += ".z"
2242 >                unmapped_seg = tmp_dir + fbasename + "_unmapped"
2243                  extra_output = "(%d/%d)" % (i+1, len(read_segments))
2244                  (seg_map, unmapped) = bowtie_segment(params,
2245                                                       bwt_idx_prefix,
# Line 2255 | Line 2253
2253                  segs.append(seg)
2254  
2255              # Collect the segment maps for left and right reads together
2256 <            maps[reads] = Maps(unspliced_map, unspliced_sam, seg_maps, unmapped_segs, segs)
2256 >            #maps[reads] = Maps(unspliced_map, unspliced_sam, seg_maps, unmapped_segs, segs)
2257 >            maps[reads] = Maps(unspliced_sam, seg_maps, unmapped_segs, segs)
2258          else:
2259              # if there's only one segment, just collect the initial map as the only
2260              # map to be used downstream for coverage-based junction discovery
2261              read_segments = [reads]
2262 <            maps[reads] = Maps(unspliced_map, unspliced_sam, [unspliced_map], [unmapped_unspliced], [unmapped_unspliced])
2262 >            #maps[reads] = Maps(unspliced_map, unspliced_sam, [unspliced_map], [unmapped_reads], [unmapped_reads])
2263 >            maps[reads] = Maps(unspliced_sam, [unspliced_sam], [unmapped_reads], [unmapped_reads])
2264  
2265      # Unless the user asked not to discover new junctions, start that process
2266      # here
2267  
2268      if params.find_novel_juncs or params.find_novel_indels:
2269 <        left_reads_map = maps[left_reads].unspliced_bwt
2269 >        #left_reads_map = maps[left_reads].unspliced_bwt
2270 >        left_reads_map = maps[left_reads].unspliced_sam
2271          left_seg_maps = maps[left_reads].seg_maps
2272          unmapped_reads = maps[left_reads].unmapped_segs
2273          if right_reads != None:
2274 <            right_reads_map = maps[right_reads].unspliced_bwt
2274 >            #right_reads_map = maps[right_reads].unspliced_bwt
2275 >            right_reads_map = maps[right_reads].unspliced_sam
2276              right_seg_maps = maps[right_reads].seg_maps
2277              unmapped_reads.extend(maps[right_reads].unmapped_segs)
2278          else:
# Line 2301 | Line 2303
2303          # discover addtional junctions
2304          if params.find_novel_juncs and params.closure_search and left_reads != None and right_reads != None:
2305              juncs = junctions_from_closures(params,
2306 <                                            [maps[left_reads].unspliced_bwt, maps[left_reads].seg_maps[-1]],
2307 <                                            [maps[right_reads].unspliced_bwt, maps[right_reads].seg_maps[-1]],
2306 >                                          #  [maps[left_reads].unspliced_bwt, maps[left_reads].seg_maps[-1]],
2307 >                                          #  [maps[right_reads].unspliced_bwt, maps[right_reads].seg_maps[-1]],
2308 >                                            [maps[left_reads].unspliced_sam, maps[left_reads].seg_maps[-1]],
2309 >                                            [maps[right_reads].unspliced_sam, maps[right_reads].seg_maps[-1]],
2310                                              ref_fasta)
2311              if os.path.getsize(juncs[0]) != 0:
2312                  possible_juncs.extend(juncs)
# Line 2361 | Line 2365
2365          rfdir = getFileDir(reads)
2366          mapped_reads = rfdir + rfname + ".candidates.bam"
2367          merged_map =rfdir + rfname + ".candidates_and_unspl.bam"
2368 <        unspl_bwtfile = maps[reads].unspliced_bwt
2368 >        # unspl_bwtfile = maps[reads].unspliced_bwt
2369          unspl_samfile = maps[reads].unspliced_sam
2370  
2371          join_mapped_segments(params,
# Line 2400 | Line 2404
2404              if not params.system_params.keep_tmp:
2405                  os.remove(mapped_reads)
2406                  os.remove(unspl_samfile)
2407 <                os.remove(unspl_bwtfile)
2407 >                #os.remove(unspl_bwtfile)
2408          else:
2409              os.rename(mapped_reads, merged_map)
2410              maps[reads] = [merged_map]
2411              if not params.system_params.keep_tmp:
2412                  os.remove(unspl_samfile)
2413 <                os.remove(unspl_bwtfile)
2413 >                #os.remove(unspl_bwtfile)
2414  
2415      return maps
2416  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines