// // BCPairwiseAlignment.m // BioCocoa // // Created by Philipp Seibel on 10.03.05. // Copyright (c) 2005 The BioCocoa Project. All rights reserved. // #import "BCSequenceAlignment.h" // These defines are used to get the char relative to a given position, these are then compared and calculated a score for. #define DIAG (idxB - 1) * lenA + (idxA - 1) #define LEFT idxB * lenA + (idxA - 1) #define UP (idxB - 1) * lenA + idxA // Properties, indeed very elegant Phil! This is something we should use throughout our framework... NSString * const BCGapPenaltyProperty = @"BCGapPenaltyProperty"; NSString * const BCDefaultGapPenalty = @"BCDefaultGapPenalty"; NSString * const BCAffineGapPenalty = @"BCAffineGapPenalty"; NSString * const BCSubstitutionMatrixProperty = @"BCSubstitutionMatrixProperty"; NSString * const BCDefaultGapPenaltyProperty = @"BCDefaultGapPenaltyProperty"; NSString * const BCGapOpenPenaltyProperty = @"BCGapOpenPenaltyProperty"; NSString * const BCGapExtensionPenaltyProperty = @"BCGapExtensionPenaltyProperty"; // These ints are "arrows" in the direction of the lowest score. So for each position in the matrix we store 1) the score 2) the direction we choose. // These are used to backtrace the alignment from the lowerright corner back to the origin using the direction stored in each position. // The origin has no direction -> kNone typedef enum { kNone = 0, kDiagonal, kLeft, kUp } Pointers; @implementation BCSequenceAlignment ( PairwiseAlignment ) + (BCSequenceAlignment *)needlemanWunschAlignmentWithSequences:(NSArray *)theSequences properties:(NSDictionary *)properties { // Get the matrix BCScoreMatrix *substitutionMatrix = [properties objectForKey: BCSubstitutionMatrixProperty]; // Get the sequences, note that the headerdoc should indicate that this method only accepts two sequences, the rest is ignored BCSequence *sequenceA = [theSequences objectAtIndex:0]; BCSequence *sequenceB = [theSequences objectAtIndex:1]; // Convert the sequences to char arrays. HERE WE SHOULD HAVE A BCSEQUENCE METHOD THAT DIRECTLY GIVES US A CHAR ARRAY INSTEAD OF GOING THROUGH NSSTRING // Here's also where Charles and Phil would like to see the remapping const char * seqA = [[sequenceA sequenceString] cString]; const char * seqB = [[sequenceB sequenceString] cString]; // Get the gap costs int gapCosts = [(NSNumber *)[properties objectForKey: BCDefaultGapPenaltyProperty] intValue]; // Get the lengths of the two sequences. unsigned int lenA = [sequenceA length]; unsigned int lenB = [sequenceB length]; // SETUP PHASE // Allocate the backtracking matrix and score matrix (see enum above) int *backtracking = (int *)malloc( sizeof( int ) * ( lenA + 1 ) * ( lenB + 1 ) ); int *dynMatrix = (int *)malloc( sizeof( int ) * ( lenA + 1 ) * ( lenB + 1 ) ); unsigned int idxA; unsigned int idxB; // calculate the substitutionscore for A to B dynMatrix[ 0 ] = [substitutionMatrix substituteChar:seqA[0] forChar:seqB[0]]; // Set the position of the origin to none. backtracking[ 0 ] = kNone; // Set the top row to point to the origin with increasing gap costs for ( idxA = 1; idxA <= lenA; idxA++ ) { backtracking[ idxA ] = kLeft; dynMatrix[ idxA ] = idxA * gapCosts; } // Set the left column to point to the origin with increasing gap costs for ( idxB = 1; idxB <= lenB; idxB++ ) { backtracking[ idxB * lenA ] = kUp; dynMatrix[ idxB * lenA ] = idxB * gapCosts; } // FILL PHASE // Fill the score+direction matrix for ( idxA = 1; idxA <= lenA; idxA++ ) { // For each row for ( idxB = 1; idxB <= lenB; idxB++ ) { // For each column unsigned int currPos = idxB * lenA + idxA; // calculate the substitutionscore for A to B int substitutionScore = [substitutionMatrix substituteChar:seqA[idxA - 1] forChar:seqB[idxB - 1]]; // calculate the scores if we would go in each of the three directions int diagScore = dynMatrix[ DIAG ] + substitutionScore; int rightScore = dynMatrix[ LEFT ] + gapCosts; int downScore = dynMatrix[ UP ] + gapCosts; // which one is the smallest? Then go in that direction. Store the direction and score in the two matrices at the current position if ( diagScore >= rightScore ) { if ( diagScore > downScore ) { backtracking[ currPos ] = kDiagonal; dynMatrix[ currPos ] = diagScore; } else { backtracking[ currPos ] = kUp; dynMatrix[ currPos ] = downScore; } } else { if ( rightScore > downScore ) { backtracking[ currPos ] = kLeft; dynMatrix[ currPos ] = rightScore; } else { backtracking[ currPos ] = kUp; dynMatrix[ currPos ] = downScore; } } } } // BACKTRACE // Start at lowerright corner int i = lenA; int j = lenB; int k = 0; // Allocate size to read the alignment in, note the size is the maximal possible one (length of A + length of B) char *a = ( char * ) malloc( (lenA + lenB) * sizeof(char)); char *b = ( char * ) malloc( (lenA + lenB) * sizeof(char)); // Backtrace till we reach a kNone direction (i.a.w. the origin while ( backtracking[j * lenA + i ] != kNone ) { // Follow the direction switch(backtracking[j * lenA + i ]){ // Match case kDiagonal : a[k] = seqA[i - 1]; b[k] = seqB[j - 1]; i--; j--; k++; break; // Shift to left case kLeft : a[k] = seqA[i - 1]; b[k] = '-'; i--; k++; break; // Shift up case kUp : a[k] = '-'; b[k] = seqB[j - 1]; j--; k++; break; } } // Create arrays with proper size to store the alignment char *an = (char *)malloc( sizeof(char) * k ); char *bn = (char *)malloc( sizeof(char) * k ); // Reverse the order (remember we started at the end) for(i=k-1;i>=0;i--) an[k-i-1] = a[i]; for(j=k-1;j>=0;j--) bn[k-j-1] = b[j]; // Recreate BCSequence objects // HERE WE SHOULD HAVE A BCSEQUENCE METHOD THAT DIRECTLY INSTANTIATES FROM A CHAR ARRAY INSTEAD OF GOING THROUGH NSSTRING // Here's also where Charles and Phil would like to see the back-remapping BCSequence *alnA = [BCSequence sequenceWithString:[NSString stringWithCString:an length:k] skippingUnknownSymbols:YES usingType:BCOtherSequence]; BCSequence *alnB = [BCSequence sequenceWithString:[NSString stringWithCString:bn length:k] skippingUnknownSymbols:YES usingType:BCOtherSequence]; // Create the BCSequenceAlignment object return [[[BCSequenceAlignment alloc] initWithSequenceArray:[NSArray arrayWithObjects:alnA,alnB,nil]] autorelease]; } + (BCSequenceAlignment *)smithWatermanAlignmentWithSequences:(NSArray *)theSequences properties:(NSDictionary *)properties { return nil; } @end