diff -Naur /Volumes/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.h Developer/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.h --- /Volumes/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.h 2006-08-04 02:21:22.000000000 +0200 +++ Developer/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.h 2007-09-12 08:23:37.000000000 +0200 @@ -71,6 +71,26 @@ int nLength; }GCK_HEADER; +typedef struct macvector_header +{ + char seqType; // 0: nucleic, else: protein + char empty1; + char empty2; + char topology; // 0: linear, else: circular + char empty3; + char empty4; + char ntType; // 1: RNA, else: DNA + char empty5; + char month; // 1-12 + char day; // 1-31 + char year; // 0-255 + 1900 + int empty6; + int nSegments; // always 1. Total number of sequences, BTW + int totalLength; // equal to sequence length (nSegments = 1) + int segNo; // Number of segment (always 1?) + int seqLength; // this segment length + int seqBytes; // this segment length again (???) +}MACVECTOR_HEADER; @interface BCSequenceReader : NSObject { @@ -87,6 +107,7 @@ - (BCSequenceArray *)readClustalFile:(NSString *)textFile; - (BCSequenceArray *)readStriderFile:(NSString *)textFile; - (BCSequenceArray *)readGCKFile:(NSString *)textFile; +- (BCSequenceArray *)readMacVectorFile:(NSString *)textFile; - (BCSequenceArray *)readGDEFile:(NSString *)entryString; - (BCSequenceArray *)readPirFile:(NSString *)entryString; - (BCSequenceArray *)readMSFFile:(NSString *)entryString; diff -Naur /Volumes/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.m Developer/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.m --- /Volumes/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.m 2006-08-04 02:21:22.000000000 +0200 +++ Developer/BioCocoa/BCFoundation/BCSequenceIO/BCSequenceReader.m 2007-09-12 08:26:16.000000000 +0200 @@ -46,7 +46,10 @@ { result = [self readGCKFile: filePath]; } - + else if([NSHFSTypeOfFile(filePath) isEqualToString: @"'PROT'"] || [NSHFSTypeOfFile(filePath) isEqualToString: @"'NUCL'"]) + { + result = [self readMacVectorFile: filePath]; + } else // TEXT file { NSMutableString *sequenceFileContents; @@ -504,6 +507,97 @@ return result; } +- (BCSequenceArray *)readMacVectorFile:(NSString *)textFile +{ + MACVECTOR_HEADER *signature; + + BCSequence *newSequence; + BCSequenceArray *result; + NSMutableArray *annotationsArray; + NSString *alphabet; + unsigned char *seqBuffer; + int i, s; + + NSMutableString *sequenceString = [[NSMutableString alloc] init]; + + result = [[BCSequenceArray alloc] init]; + annotationsArray = [NSMutableArray array]; + + + NSData *data = [NSData dataWithContentsOfFile: textFile]; + + // get header data + signature = malloc(sizeof(MACVECTOR_HEADER)); + [data getBytes: signature length: sizeof(MACVECTOR_HEADER)]; + + /* + define the alphabet so that we can read proteins, DNA and RNA + I'm not sure this is the most elegant way to do things... + It works, anyway. + Bytes in MacVector files corresponds to letters in alphabet + The idea is to use the alphabet as an array so that + when one reads, say 0x02, has just to get character at index + 2 in the alphabet... + */ + + if ([NSHFSTypeOfFile(textFile) isEqualToString: @"'PROT'"]) + { + // Protein + alphabet = @"-ACDEFGHIKLMNPQRSTVWYB*X"; + } + else if (signature->ntType == 1) + { + // RNA, there are no T's, only U's + alphabet = @"-ACMGRSVUWYHKDBN"; + } + else + { + // DNA + alphabet = @"-ACMGRSVTWYHKDBN"; + } + + // get the length in a variable, so that we don't have to convert every time... + s = CFSwapInt32BigToHost(signature->seqLength); + + // read the sequence into a data object + NSData *seqdata = [data subdataWithRange:NSMakeRange(sizeof(MACVECTOR_HEADER),s)]; + + // Now I need to read the data bytes + seqBuffer = malloc(s); + seqBuffer = [seqdata bytes]; + for (i = 0; i < s; i++) + { + // append each character + [sequenceString appendFormat:@"%c", [alphabet characterAtIndex:seqBuffer[i]]]; + } + + /* + What follows is copied from the method above, readGCKFile + I've only commented lines that have to do with reading + annotations. + */ + + if ( [sequenceString length]) + { + newSequence = [BCSequence sequenceWithString: sequenceString]; + +/* + for (i = 0; i < [annotationsArray count]; i++) + { + [newSequence addAnnotation: [annotationsArray objectAtIndex: i]]; + } +*/ + [result addSequence: newSequence]; +// [annotationsArray removeAllObjects]; + } + + [alphabet release]; // should I release alphabet? + [sequenceString release]; + free(signature); + free(seqBuffer); + return result; +} + -(BCSequenceArray *)readClustalFile:(NSString *)entryString {