ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gffread/gffread.cpp
(Generate patch)
# Line 9 | Line 9
9  
10   #define USAGE "Usage:\n\
11   gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] \n\
12 < [-o <outfile.gff>] [-t <tname>] [-r [[<strand>]<chr>:]<start>..<end>] \n\
13 < [-CTVNJMKQAFGRUVBHZWTOLE] [-w <spl_exons.fa>] [-x <spl_cds.fa>] [-y <tr_cds.fa>]\n\
12 > [-o <outfile.gff>] [-t <tname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]]\n\
13 > [-CTVNJMKQAFGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]\n\
14   [-i <maxintron>] \n\
15   Filters and/or converts GFF3/GTF2 records.\n\
16   <input_gff> is a GFF file, use '-' if the GFF records will be given at stdin\n\
# Line 24 | Line 24
24        <seq-name> <seq-length> <seq-description>\n\
25        (useful for -A option with mRNA/EST/protein mappings)\n\
26    -i  discard transcripts having an intron larger than <maxintron>\n\
27 <  -r  only show transcripts crossing coordinate range <start>..<end>\n\
27 >  -r  only show transcripts overlapping coordinate range <start>..<end>\n\
28        (on chromosome/contig <chr>, strand <strand> if provided)\n\
29    -R  for -r option, discard all transcripts that are not fully \n\
30 <      contained within given range\n\
30 >      contained within the given range\n\
31    -U  discard single-exon transcripts\n\
32    -C  coding only: discard mRNAs that have no CDS feature\n\
33    -F  full GFF attribute preservation (all attributes are shown)\n\
# Line 129 | Line 129
129   bool fullCDSonly=false; // starts with START, ends with STOP codon
130   bool fullattr=false;
131   //bool sortByLoc=false; // if the GFF output should be sorted by location
132 < bool ensembl_convert=false; //-L, assisst in converting Ensembl GTF to GFF3
132 > bool ensembl_convert=false; //-L, assist in converting Ensembl GTF to GFF3
133  
134  
135   //GStr gseqpath;
# Line 309 | Line 309
309   if (gname==NULL) gname=gffrec.getGeneID();
310   GStr defline(gffrec.getID());
311   if (f_out && !fmtGTF) {
312 <     const char* tn=NULL;
313 <     if ((tn=gffrec.getAttr("transcript_name"))!=NULL) {
314 <        gffrec.addAttr("Name", tn);
312 >     const char* tname=NULL;
313 >     if ((tname=gffrec.getAttr("transcript_name"))!=NULL) {
314 >        gffrec.addAttr("Name", tname);
315          gffrec.removeAttr("transcript_name");
316          }
317       }
318   if (ensembl_convert && startsWith(gffrec.getID(), "ENS")) {
319 <      const char* tn=gffrec.getTrackName();
320 <      gffrec.addAttr("type", tn);
319 >      const char* biotype=gffrec.getAttr("gene_biotype");
320 >      if (biotype) {
321 >         gffrec.addAttr("type", biotype);
322 >         gffrec.removeAttr("gene_biotype");
323 >         }
324 >       else { //old Ensembl files lacking gene_biotype
325 >         gffrec.addAttr("type", gffrec.getTrackName());
326 >         }
327 >
328        //bool is_gene=false;
329        bool is_pseudo=false;
330 <      if (strcmp(tn, "protein_coding")==0 || gffrec.hasCDS())
330 >      if (strcmp(biotype, "protein_coding")==0 || gffrec.hasCDS())
331                  gffrec.setFeatureName("mRNA");
332         else {
333 <          if (strcmp(tn, "processed_transcript")==0)
333 >          if (strcmp(biotype, "processed_transcript")==0)
334                gffrec.setFeatureName("proc_RNA");
335              else {
336 <              //is_gene=endsWith(tn, "gene");
337 <              is_pseudo=strifind(tn, "pseudo");
336 >              //is_gene=endsWith(biotype, "gene");
337 >              is_pseudo=strifind(biotype, "pseudo");
338                if (is_pseudo) {
339                     gffrec.setFeatureName("pseudo_RNA");
340                     }
341 <                else if (endsWith(tn, "RNA")) {
342 <                   gffrec.setFeatureName(tn);
341 >                else if (endsWith(biotype, "RNA")) {
342 >                   gffrec.setFeatureName(biotype);
343                     } else gffrec.setFeatureName("misc_RNA");
344                }
345            }
# Line 602 | Line 609
609    if (reftbl.Count()>0) {
610          GStr refname(gffrec->getRefName());
611          RefTran* rt=reftbl.Find(refname.chars());
612 <        if (rt==NULL && refname[-2]=='.' && isdigit(refname[-1])) {
613 <           //try removing the version
612 >        if (rt==NULL && refname.length()>2 && refname[-2]=='.' && isdigit(refname[-1])) {
613 >           //try removing the version suffix
614             refname.cut(-2);
615             //GMessage("[DEBUG] Trying ref name '%s'...\n", refname.chars());
616             rt=reftbl.Find(refname.chars());
# Line 639 | Line 646
646       if (rfltStart!=0 || rfltEnd!=MAX_UINT) {
647         if (rfltWithin) {
648           if (gffrec->start<rfltStart || gffrec->end>rfltEnd) {
649 <            return false;
649 >            return false; //not within query range
650              }
651           }
652         else {
# Line 673 | Line 680
680  
681   int main(int argc, char * const argv[]) {
682   GArgs args(argc, argv,
683 <   "debug;merge;cluster-only;help;MINCOV=MINPID=hvOUNHWCVJMKQNSXTDAPRZFGLEm:g:i:r:s:t:a:b:o:w:x:y:d:");
683 >   "debug;merge;cluster-only;help;force-exons;MINCOV=MINPID=hvOUNHWCVJMKQNSXTDAPRZFGLEm:g:i:r:s:t:a:b:o:w:x:y:d:");
684   args.printError(USAGE, true);
685   if (args.getOpt('h') || args.getOpt("help")) {
686      GMessage("%s",USAGE);
687      exit(1);
688      }
689   debugMode=(args.getOpt("debug")!=NULL);
690 + bool forceExons=(args.getOpt("force-exons")!=NULL);
691   mRNAOnly=(args.getOpt('O')==NULL);
692   //sortByLoc=(args.getOpt('S')!=NULL);
693   addDescr=(args.getOpt('A')!=NULL);
# Line 713 | Line 721
721        exit(1);
722        }
723      }
716 //protmap=(args.getOpt('P')!=NULL);
724   if (fullCDSonly) validCDSonly=true;
725   if (verbose) {
726       fprintf(stderr, "Command line was:\n");
# Line 786 | Line 793
793        rfltEnd=(uint)gsend.asInt();
794        if (rfltEnd==0) rfltEnd=MAX_UINT;
795        }
789  
796     } //gseq/range filtering
797   else {
798     if (rfltWithin)
799 <     GError("Error: option -R doesn't make sense without -r!\n");
799 >     GError("Error: option -R requires -r!\n");
800 >   //if (rfltWholeTranscript)
801 >   //  GError("Error: option -P requires -r!\n");
802     }
803   s=args.getOpt('m');
804   if (!s.is_empty()) {
# Line 840 | Line 848
848     gffloader.mergeCloseExons=mergeCloseExons;
849     gffloader.showWarnings=(args.getOpt('E')!=NULL);
850     gffloader.load(g_data, &validateGffRec, doCluster, doCollapseRedundant,
851 <                             matchAllIntrons, fuzzSpan);
851 >                             matchAllIntrons, fuzzSpan, forceExons);
852     if (doCluster)
853       collectLocusData(g_data);
854     if (numfiles==0) break;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines