[Biococoa-dev] Even more on sequence formats

Alexander Griekspoor mek at mekentosj.com
Tue Apr 11 06:46:10 EDT 2006


By the way I just found out that A Plasmid Editor (APE) files are  
also supported already by our framework, so you can add "ape" and  
"APE" to the list of supported file extensions. This is the one I use  
now:

     NSArray *fileTypes = [NSArray arrayWithObjects: @"text",  
@"TEXT", @"txt", @"TXT", @"fasta", @"FASTA", @"seq", @"SEQ",
                                                     @"html",  
@"HTML", @"htm", @"HTM", @"rtf", @"RTF", @"rtfd", @"RTFD",
													@"gde", @"fas", @"nessig", @"pir", @"nona", @"phylip",  
@"nexus",
													@"GDE", @"FAS", @"NESSIG", @"PIR", @"NONA", @"PHYLIP",  
@"NEXUS",
													@"raw", @"clustal", @"pdb", @"embl", @"swissprot",  
@"NCBI", @"GCK",
													@"RAW", @"CLUSTAL", @"PDB", @"EMBL", @"SWISSPROT",  
@"ncbi", @"gck",
													@"aln", @"hen", @"fst", @"msf", @"nxs", @"non", @"phy",  
@"tnt", @"ape",
													@"ALN", @"HEN", @"FST", @"MSF", @"NXS", @"NON", @"PHY",  
@"TNT", @"APE", @"exdna",
                                                      
NSFileTypeForHFSTypeCode('TEXT'), NSFileTypeForHFSTypeCode('TXT '), 				
													NSFileTypeForHFSTypeCode('text'),
                                                      
NSFileTypeForHFSTypeCode('xDNA'), NSFileTypeForHFSTypeCode('DNA '),
                                                      
NSFileTypeForHFSTypeCode('GCKc'), NSFileTypeForHFSTypeCode('GCKs'),
													NSFileTypeForHFSTypeCode('NUCL'), nil];


And here is the modified readFile method thus far (so except for the  
binary fixes and the unnecessary raw fix):

- (NSDictionary *)readFile:(NSString *)textFile
{
     NSMutableDictionary *theContents;
     NSString *lineBreak;
	
	// BINARY
	// Strider?
     if([NSHFSTypeOfFile(textFile) isEqualToString: @"'xDNA'"]){

		theContents =  (NSMutableDictionary*) [self readStriderFile:textFile];

	// GCK?
	}else if([NSHFSTypeOfFile(textFile) isEqualToString: @"'GCKc'"] ||  
[NSHFSTypeOfFile(textFile) isEqualToString: @"'GCKs'"]){
		
		theContents =  (NSMutableDictionary*) [self readGCKFile:textFile];
		
	// TEXT
	}else {
		NSMutableString *sequenceFile;
		
		// EXDNA
		if([[textFile pathExtension]isEqualToString:@"exdna"])
			sequenceFile = [NSMutableString stringWithContentsOfFile:  
[textFile stringByAppendingPathComponent: @"sequence.txt"]];
		else
			sequenceFile = [NSMutableString stringWithContentsOfFile:textFile];
		
		lineBreak = [self detectLineBreak:sequenceFile];

		//RTF?
		if ([sequenceFile hasCaseInsensitivePrefix: @"{\\rtf1"])
		{
			NSAttributedString *rtfstring = [[NSAttributedString alloc] 
initWithRTF: [NSData dataWithContentsOfFile: textFile]  
documentAttributes: nil];
			[sequenceFile setString: [rtfstring string]];
			[rtfstring release];
		}
		
		//DETERMINE TYPE
		if ([sequenceFile hasCaseInsensitivePrefix:@"#NEXUS"] || [textFile  
hasCaseInsensitivePrefix:@"#PAUP"])
		{
			theContents =  (NSMutableDictionary*) [self  
readNexusFileAndBlocks:sequenceFile];
		}
		else if ([sequenceFile hasCaseInsensitivePrefix:@"CLUSTAL"])
		{
			theContents =  (NSMutableDictionary*) [self  
readClustalFile:sequenceFile];
		}
		else if ([sequenceFile hasCaseInsensitivePrefix:@"Pileup"])
		{
			theContents =  (NSMutableDictionary*) [self  
readMSFFile:sequenceFile];
		}
		else if ([sequenceFile hasCaseInsensitivePrefix:@">DL"])
		{
			theContents =  (NSMutableDictionary*) [self  
readPirFile:sequenceFile];
		}
		else if ([sequenceFile hasPrefix:@">"])
		{
			theContents =  (NSMutableDictionary*) [self  
readFastaFile:sequenceFile];
		}
		else if ([sequenceFile hasPrefix:@"HEADER"])
		{
			theContents =  (NSMutableDictionary*) [self  
readPDBFile:sequenceFile];
		}
		else if ([sequenceFile hasPrefix:@"LOCUS"])
		{
			theContents =  (NSMutableDictionary*) [self  
readNCBIFile:sequenceFile];
		}
		else if ([sequenceFile stringContains:@"proc/"])
		{
			theContents =  (NSMutableDictionary*) [self  
readNonaFile:sequenceFile];
		}
		else if ([sequenceFile stringContains:@"xread"])
		{
			theContents =  (NSMutableDictionary*) [self  
readHennigFile:sequenceFile];
		}
		else if ([sequenceFile hasPrefix:@"#"])
		{
			theContents = (NSMutableDictionary*)  [self  
readGDEFile:sequenceFile];
		}
		else if ([sequenceFile hasPrefix:@"ID"])	// also works for EMBL  
files, both have the same prefix in the first line
		{
			theContents =  (NSMutableDictionary*) [self  
readSwissProtFile:sequenceFile];
		}
		else if ([sequenceFile stringBeginsWithTwoNumbers])	
		{
			theContents =  (NSMutableDictionary*) [self  
readPhylipFile:sequenceFile];
		}
		else
		{
			theContents =  (NSMutableDictionary*) [self  
readRawFile:sequenceFile];		// Assume sequences are in Plain format
		}

		[theContents setObject:lineBreak forKey:@"lineBreak"];
	}


     return theContents;
}


Those who pay attention see another format I would like to see  
supported ;-)
Cheers,
Alex



On 11-apr-2006, at 12:39, Koen van der Drift wrote:

>
> On Apr 11, 2006, at 6:17 AM, Alexander Griekspoor wrote:
>
>> Please don't sue me!  I just copied what Peter wrote, anything you  
>> will try to get from me in court, I will redeem from him ;-)
>>
>
> Sounds like a good plan :D
>
> - Koen.
>

*********************************************************
                     ** Alexander Griekspoor **
*********************************************************
               The Netherlands Cancer Institute
               Department of Tumorbiology (H4)
          Plesmanlaan 121, 1066 CX, Amsterdam
                     Tel:  + 31 20 - 512 2023
                     Fax:  + 31 20 - 512 2029
                     AIM: mekentosj at mac.com
                     E-mail: a.griekspoor at nki.nl
                 Web: http://www.mekentosj.com

Windows is a 32-bit patch to a 16-bit shell for an 8-bit
operating system, written for a 4-bit processor by a 2-
bit company without 1 bit of sense.

*********************************************************


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.bioinformatics.org/pipermail/biococoa-dev/attachments/20060411/df864760/attachment.html>


More information about the Biococoa-dev mailing list