ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/long_spanning_reads.cpp
(Generate patch)
# Line 1015 | Line 1015
1015             std::set<Insertion>& possible_insertions,
1016             ReadTable& unmapped_reads,
1017             RefSequenceTable& rt,
1018 <           FILE* reads_file,
1018 >           //FILE* reads_file,
1019 >           ReadStream& readstream,
1020             vector<HitStream>& contig_hits,
1021             vector<HitStream>& spliced_hits)
1022   {
1023 <  uint32_t curr_contig_obs_order = 0xFFFFFFFF;
1023 >  uint32_t curr_contig_obs_order = VMAXINT32;
1024    HitStream* first_seg_contig_stream = NULL;
1025    uint64_t next_contig_id = 0;
1026  
# Line 1032 | Line 1033
1033  
1034    HitsForRead curr_hit_group;
1035  
1036 <  uint32_t curr_spliced_obs_order = 0xFFFFFFFF;
1036 >  uint32_t curr_spliced_obs_order = VMAXINT32;
1037    HitStream* first_seg_spliced_stream = NULL;
1038    uint64_t next_spliced_id = 0;
1039  
# Line 1043 | Line 1044
1044        curr_spliced_obs_order = unmapped_reads.observation_order(next_spliced_id);
1045      }
1046  
1047 <  while(curr_contig_obs_order != 0xFFFFFFFF ||
1048 <  curr_spliced_obs_order != 0xFFFFFFFF)
1047 >  while(curr_contig_obs_order != VMAXINT32 ||
1048 >  curr_spliced_obs_order != VMAXINT32)
1049      {
1050        uint32_t read_in_process;
1051        vector<HitsForRead> seg_hits_for_read;
# Line 1073 | Line 1074
1074      curr_spliced_obs_order = next_order;
1075    }
1076        else if (curr_contig_obs_order == curr_spliced_obs_order &&
1077 <         curr_contig_obs_order != 0xFFFFFFFF &&
1078 <         curr_spliced_obs_order != 0xFFFFFFFF)
1077 >         curr_contig_obs_order != VMAXINT32 &&
1078 >         curr_spliced_obs_order != VMAXINT32)
1079    {
1080      first_seg_contig_stream->next_read_hits(curr_hit_group);
1081  
# Line 1102 | Line 1103
1103    }
1104  
1105        if (contig_hits.size() > 1)
1106 <  {
1107 <    look_right_for_hit_group(unmapped_reads,
1108 <          contig_hits,
1109 <          0,
1110 <          spliced_hits,
1111 <          curr_hit_group,
1112 <          seg_hits_for_read);
1113 <  }
1106 >      {
1107 >        look_right_for_hit_group(unmapped_reads,
1108 >              contig_hits,
1109 >              0,
1110 >              spliced_hits,
1111 >              curr_hit_group,
1112 >              seg_hits_for_read);
1113 >      }
1114  
1115        size_t last_non_empty = seg_hits_for_read.size() - 1;
1116        while(last_non_empty >= 0 && seg_hits_for_read[last_non_empty].hits.empty())
1117 <  {
1118 <    --last_non_empty;
1119 <  }
1117 >      {
1118 >        --last_non_empty;
1119 >      }
1120  
1121        seg_hits_for_read.resize(last_non_empty + 1);
1122        if (!seg_hits_for_read[last_non_empty].hits[0].end())
1123 <  continue;
1123 >         continue;
1124  
1125        if (!seg_hits_for_read.empty() && !seg_hits_for_read[0].hits.empty())
1126    {
1127      uint64_t insert_id = seg_hits_for_read[0].hits[0].insert_id();
1128 <    char read_name[256];
1129 <    char read_seq[256];
1129 <    char read_alt_name[256];
1130 <    char read_quals[256];
1131 <
1132 <    if (get_read_from_stream(insert_id,
1128 >    Read read;
1129 >    /* if (get_read_from_stream(insert_id,
1130             reads_file,
1131             reads_format,
1132             false,
1133 <           read_name,
1134 <           read_seq,
1138 <           read_alt_name,
1139 <           read_quals))
1133 >           read)) */
1134 >    if (readstream.getRead(insert_id, read))
1135        {
1136          vector<BowtieHit> joined_hits;
1137          join_segments_for_read(rt,
1138 <             read_seq,
1139 <             read_quals,
1138 >             read.seq.c_str(),
1139 >             read.qual.c_str(),
1140               possible_juncs,
1141               possible_insertions,
1142               seg_hits_for_read,
# Line 1152 | Line 1147
1147          joined_hits.erase(new_end, joined_hits.end());
1148  
1149          for (size_t i = 0; i < joined_hits.size(); i++)
1150 <    {
1151 <      const char* ref_name = rt.get_name(joined_hits[i].ref_id());
1152 <      if (color && !color_out)
1153 <        //print_hit(stdout, read_name, joined_hits[i], ref_name, joined_hits[i].seq().c_str(), joined_hits[i].qual().c_str(), true);
1154 <        print_bamhit(bam_writer, read_name, joined_hits[i], ref_name, joined_hits[i].seq().c_str(), joined_hits[i].qual().c_str(), true);
1155 <      else
1156 <        print_bamhit(bam_writer, read_name, joined_hits[i], ref_name, read_seq, read_quals, false);
1157 <        //print_hit(stdout, read_name, joined_hits[i], ref_name, read_seq, read_quals, false);
1158 <    }
1150 >        {
1151 >          const char* ref_name = rt.get_name(joined_hits[i].ref_id());
1152 >          if (color && !color_out)
1153 >            //print_hit(stdout, read_name, joined_hits[i], ref_name, joined_hits[i].seq().c_str(), joined_hits[i].qual().c_str(), true);
1154 >            print_bamhit(bam_writer, read.name.c_str(), joined_hits[i], ref_name, joined_hits[i].seq().c_str(),
1155 >                                         joined_hits[i].qual().c_str(), true);
1156 >          else
1157 >            print_bamhit(bam_writer, read.name.c_str(), joined_hits[i], ref_name,
1158 >                                          read.seq.c_str(), read.qual.c_str(), false);
1159 >            //print_hit(stdout, read_name, joined_hits[i], ref_name, read_seq, read_quals, false);
1160 >        }
1161        }
1162      else
1163        {
# Line 1180 | Line 1177
1177        vector<FILE*>& possible_insertions_files,
1178        vector<FILE*>& possible_deletions_files,
1179        vector<FZPipe>& spliced_segmap_files, //.sam.z files
1180 <      vector<string>& segmap_fnames, //.bam files
1181 <      FZPipe& reads_file)
1180 >       vector<string>& segmap_fnames, //.bam files
1181 >      ReadStream& readstream)
1182   {
1183    if (segmap_fnames.size() == 0)
1184    {
# Line 1198 | Line 1195
1195    bool need_seq = true;
1196    bool need_qual = color;
1197    //rewind(reads_file);
1198 <  reads_file.rewind();
1198 >  readstream.rewind();
1199  
1200    //vector<HitTable> seg_hits;
1201    vector<HitStream> contig_hits;
# Line 1324 | Line 1321
1321      }
1322    }
1323  
1324 <  join_segment_hits(bam_writer, possible_juncs, possible_insertions, it, rt, reads_file.file, contig_hits, spliced_hits);
1324 >  join_segment_hits(bam_writer, possible_juncs, possible_insertions, it, rt, readstream, contig_hits, spliced_hits);
1325    //join_segment_hits(possible_juncs, possible_insertions, it, rt, reads_file.file, contig_hits, spliced_hits);
1326  
1327    /*for (size_t i = 0; i < segmap_fnames.size(); ++i)
# Line 1332 | Line 1329
1329    */
1330    for (size_t i = 0; i < spliced_segmap_files.size(); ++i)
1331          spliced_segmap_files[i].close();
1332 <    reads_file.close();
1332 >  readstream.close();
1333  
1334    for (size_t fac = 0; fac < factories.size(); ++fac)
1335    {
# Line 1412 | Line 1409
1409    //FILE* reads_file = fopen(reads_file_name.c_str(), "r");
1410    vector<string> segmap_file_names;
1411    tokenize(segmap_file_list, ",",segmap_file_names);
1412 <
1413 <  string unzcmd=getUnpackCmd(reads_file_name, spliced_segmap_flist.empty() &&
1417 <                 segmap_file_names.size()<4);
1418 <  FZPipe reads_file(reads_file_name, unzcmd);
1419 <  if (reads_file.file==NULL)
1412 >  ReadStream readstream(reads_file_name);
1413 >  if (readstream.file()==NULL)
1414       err_die("Error: cannot open %s for reading\n",
1415          reads_file_name.c_str());
1416  
# Line 1476 | Line 1470
1470          insertions_files.push_back(insertions_file);
1471      }
1472  
1479    //vector<FZPipe> segmap_files;
1480    /*
1481    vector<string> segmap_files; //BAM files
1482    for (size_t i = 0; i < segmap_file_names.size(); ++i)
1483    {
1484        fprintf(stderr, "Opening %s for reading\n",
1485        segmap_file_names[i].c_str());
1486      FZPipe seg_file(segmap_file_names[i], unzcmd);
1487      if (seg_file.file == NULL)
1488        {
1489        err_die("Error: cannot open %s for reading\n",
1490              segmap_file_names[i].c_str());
1491        }
1492      segmap_files.push_back(seg_file);
1493
1494    }*/
1495
1473    vector<string> spliced_segmap_file_names;
1474    vector<FZPipe> spliced_segmap_files;
1475    //these are headerless .sam.z files
1476 +  string unzcmd;
1477    tokenize(spliced_segmap_flist, ",",spliced_segmap_file_names);
1478    for (size_t i = 0; i < spliced_segmap_file_names.size(); ++i)
1479      {
1480 <      if (i==0) {
1503 <        unzcmd=getUnpackCmd(spliced_segmap_file_names[i]);
1504 <        }
1505 <          fprintf(stderr, "Opening %s for reading\n",
1480 >      fprintf(stderr, "Opening %s for reading\n",
1481          spliced_segmap_file_names[i].c_str());
1482 +      if (unzcmd.empty())
1483 +          unzcmd=getUnpackCmd(spliced_segmap_file_names[i],false);
1484        FZPipe spliced_seg_file(spliced_segmap_file_names[i], unzcmd);
1485        if (spliced_seg_file.file == NULL)
1486          {
# Line 1517 | Line 1494
1494      {
1495        if (spliced_segmap_files.size() != segmap_file_names.size())
1496          {
1497 <        err_die("Error: each segment mapping file must have a corresponding spliced segment mapping file\n");
1497 >        err_die("Error: each segment file must have a corresponding spliced segment file\n");
1498          }
1499      }
1500    GBamWriter bam_writer("-", sam_header.c_str());
1501 <  driver(bam_writer, ref_stream, juncs_files, insertions_files, deletions_files, spliced_segmap_files, segmap_file_names, reads_file);
1501 >  driver(bam_writer, ref_stream, juncs_files, insertions_files, deletions_files, spliced_segmap_files, segmap_file_names, readstream);
1502 >  //driver(ref_stream, juncs_files, insertions_files, deletions_files, spliced_segment_files, segment_files, reads_file);
1503    return 0;
1504   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines