#include <stdio.h>
#include <libspe.h>
#include <sys/wait.h>

#include <stdlib.h>
#include <string.h>

#include <time.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/param.h>

#include "../swsse2.h"
#include "matrix.h"
#include "fastalib.h"

#include <vec_types.h>
#include <vec_literal.h> 
 
extern spe_program_handle_t farrar_SPU;


typedef enum { SCALAR, 
               WOZNIAK, 
#ifdef WITH_ROGNES
               ROGNES, 
#endif
               STRIPED 
} SW_TYPES;

const char *SW_IMPLEMENATION[] = {
    "Non-optimized",
    "Wozniak",
#ifdef WITH_ROGNES
    "Rognes",
#endif
    "Striped",
};

typedef struct { 
    SW_DATA *(*init) (unsigned char   *querySeq,
                      int              queryLength,
                      signed char     *matrix);
    void     (*scan) (unsigned char   *querySeq,
                      int              queryLength,
                      FASTA_LIB       *dbLib,
                      void            *swData,
                      SEARCH_OPTIONS  *options,
                      SCORE_LIST      *scores);
    void     (*done) (SW_DATA         *pSwData);
} SW_FUNCT_DEFS;


const char AMINO_ACIDS[ALPHA_SIZE] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
    'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 
    'S', 'T', 'V', 'W', 'X', 'Y', 'Z'
};

const int AMINO_ACID_VALUE[256] = {
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8, -1,  9, 10, 11, 12, -1,
    13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,
    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8, -1,  9, 10, 11, 12, -1,
    13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};

const int NUM_SPES = 6;
SCORE_LIST *initList (int count);
void freeList (SCORE_LIST *list);
int main_process(void);

void printResults (SCORE_LIST *list);
         
context ctx[6] __attribute__ ((aligned (128)));         
int main (void)
{
	speid_t speid[NUM_SPES];
	int status[NUM_SPES];	
	int i;
	
	for (i=0;i<NUM_SPES;i++){
		ctx[i].pos = i;//changed from i*98 - dec 5	
	} 
	
    //read in the input sequence
    main_process();    
    
    struct timeval tim;
    gettimeofday(&tim, NULL);
    double t1=tim.tv_sec+(tim.tv_usec/1000000.0);
    

	for (i=0;i<NUM_SPES;i++){
		speid[i] = spe_create_thread(0, &farrar_SPU, &ctx[i], NULL, -1, 0);
	}		
	
	//Get time
	/*for (i=0;i<NUM_SPES;i++){
      while (spe_stat_out_mbox(speid[i]) == 0);
      timer[i] = spe_read_out_mbox(speid[i]);
  	}*/
	
	for (i=0;i<NUM_SPES;i++) 
	{
		spe_wait(speid[i], &status[i], 0);
		//printf ("status = %d\n", WEXITSTATUS(status[i]));
	}
    //}
    gettimeofday(&tim, NULL);
    double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
	//printf("Running time = %.2lf s\n", t2-t1);

	return 0; 
}

//prototype
unsigned char * nextSeqPPE (FASTA_LIB *lib, int *length);


int main_process ()
{
	SW_TYPES swType = STRIPED;
    SCORE_LIST *initList (int count);
    int i;
    int rptCount = 100;
    
    char *dbFile = "../db.fasta";
    char *queryFile = "../ptest1.fasta";
    char *matrixFile = "../blosum45";

    signed char *matrix;

    unsigned char *querySeq;
    int queryLen;

    //SCORE_LIST *list; 

    FASTA_LIB *queryLib;
    FASTA_LIB *dbLib;

    i = 1;
    //list = initList (rptCount);//removed - dec 5

    matrix = readMatrix (matrixFile);
    if (matrix == NULL) {
        fprintf (stderr, "Error reading matrix\n");
        return 0;
    }

    dbLib = openLib (dbFile, swType == WOZNIAK);
    queryLib = openLib (queryFile, 0);

    querySeq = nextSeqPPE (queryLib, &queryLen);
    if (queryLen == 0) {
        fprintf (stderr, "Empty query sequence\n");
        return 0;//Hieu
    }   
    
    /*-Adri (double swStripedInit with SPE)
	SwDataPPE *pSwData __attribute__ ((aligned (128))); 
    pSwData = swStripedInit (querySeq, queryLen, matrix);
	*/
	
	//do some DMA transfer here - Hieu
	int k;
	for (k=0; k<NUM_SPES; k++){
		//ctx[k].swData = pSwData; 
		//ctx[k].pData = pSwData->pData;
		ctx[k].queryLen = queryLen;
	}
/*
	char *dbArray = (char *) calloc (rptCount, MAX_SEQ_LENGTH);	 
	//printf("dbArray = %x\n", (int)dbArray);
	if (!dbArray) {
        fprintf (stderr, "Unable to allocate memory for SW data\n");
        exit (-1);
    }
    size_t tmp = ((size_t) dbArray + 127) & ~(0x7f);
    dbArray = (char *) tmp;    
	int *dbPos = (int *) malloc(NUM_DB_SEQS * sizeof(int)+127);
    tmp = ((size_t) dbPos + 127) & ~(0x7f);
    dbPos = (int *) tmp;    
	int *dbLenPos = (int *) malloc(NUM_DB_SEQS * sizeof(int)+127);
    tmp = ((size_t) dbLenPos + 127) & ~(0x7f);
    dbLenPos = (int *) tmp;
    for (k=0; k<NUM_SPES; k++){
		ctx[k].dbPos = dbPos; 
		ctx[k].dbLenPos = dbLenPos;
    } 	    

	unsigned char * tempPtr; 
	int cnt = 0;
	//int ccc = 0;
	int dbLen;
	tempPtr = (char *) nextSeqPPE (dbLib, &dbLen);	
	while (dbLen > 0){
		//printf("seq %d = %d\n", cnt, (int)(*tempPtr));
		memcpy (dbArray, tempPtr, dbLen+1);
		*(dbLenPos+cnt) = (int) (dbLen+1);  
		*(dbPos+cnt) = (int) dbArray;		
		tmp = ((size_t) dbArray + (dbLen+128)) & ~(0x7f);
		dbArray = (char *) tmp;
		cnt++;
		//ccc++;
		//do something if cnt > rptCount - Hieu - Feb 28
		//allocate a new block for dbArray
		if (cnt > rptCount){
			dbArray = (char *) calloc (rptCount, MAX_SEQ_LENGTH);	 
			//printf("dbArray = %x\n", (int)dbArray);	
			dbLenPos += rptCount*sizeof(int);
			dbPos += rptCount*sizeof(int);
			cnt = 0;						
		}		
		tempPtr = (char *) nextSeqPPE (dbLib, &dbLen);		
	}
    //printf ("***count = %d***\n", ccc);
     */

    closeLib (queryLib);
    closeLib (dbLib);
    
    return 0;
}



void *
swStripedInit(unsigned char   *querySeq,
              int              queryLength,
              signed char     *matrix)
{
    int i, j, k;

    int segSize;
    int nCount;

    int bias;

    int lenQryByte;
    int lenQryShort;

    int weight;

    short *ps;
    //char *pc;

    signed char *matrixRow;

    size_t aligned;

    SwDataPPE *pSwData;   
 
    lenQryByte = (queryLength + 15) / 16;
    lenQryShort = (queryLength + 7) / 8;

    pSwData = (SwDataPPE *) malloc (sizeof (SwDataPPE)+127);
    size_t tmp = ((size_t) pSwData + 127) & ~(0xff);
    pSwData = (SwDataPPE *) tmp;
    
    //printf("pSwData address = %d\n", (int)pSwData);
    if (!pSwData) {
        fprintf (stderr, "Unable to allocate memory for SW data\n");
        exit (-1); 
    }
	//modified - Mar 3
    nCount = 64 +                             /* slack bytes */
//             lenQryByte * ALPHA_SIZE +        /* query profile byte */
             lenQryShort * ALPHA_SIZE +       /* query profile short */
             (lenQryShort * 3);               /* vH1, vH2 and vE */

	for (i=0; i<NUM_SPES; i++)
		ctx[i].nCount = (nCount+15)&~0x0f;//changed - dec 5
	
    pSwData->pData = (unsigned char *) calloc (nCount+1, 16);
    //printf("pSwData->pData = %x\n", pSwData->pData);
    if (!pSwData->pData) {
        fprintf (stderr, "Unable to allocate memory for SW data buffers\n");
        exit (-1);
    }
    tmp = ((size_t) (pSwData->pData) + 127) & ~(0x7f);
    pSwData->pData = (unsigned char *) tmp;
    
    aligned = ((size_t) pSwData->pData + 15) & ~(0x0f);

	//changed - Mar 3
//    pSwData->pvbQueryProf = (ppeVector *) aligned;
//    pSwData->pvsQueryProf = pSwData->pvbQueryProf + lenQryByte * ALPHA_SIZE;
	pSwData->pvbQueryProf = (ppeVector *) 0;
	pSwData->pvsQueryProf = (ppeVector *) aligned;
	
    pSwData->pvH1 = pSwData->pvsQueryProf + lenQryShort * ALPHA_SIZE;
    pSwData->pvH2 = pSwData->pvH1 + lenQryShort;
    pSwData->pvE  = pSwData->pvH2 + lenQryShort;

    /* Find the bias to use in the substitution matrix */
    bias = 127;
    for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) {
        if (matrix[i] < bias) {
            bias = matrix[i];
        }
    }
    if (bias > 0) {
        bias = 0;
    }
 
    /* Fill in the byte query profile */
//    pc = (char *) pSwData->pvbQueryProf;
//    segSize = (queryLength + 15) / 16;
//    nCount = segSize * 16;
//    for (i = 0; i < ALPHA_SIZE; ++i) {
//        matrixRow = matrix + i * ALPHA_SIZE;
//        for (j = 0; j < segSize; ++j) {
//            for (k = j; k < nCount; k += segSize) {
//                if (k >= queryLength) {
//                    weight = 0;
//                } else {
//                    weight = matrixRow[*(querySeq + k)];
//                }
//                *pc++ = (char) (weight - bias);
//            }
//        }
//    }

    /* Fill in the short query profile */
    ps = (short *) pSwData->pvsQueryProf;
    segSize = (queryLength + 7) / 8;
    nCount = segSize * 8;
    for (i = 0; i < ALPHA_SIZE; ++i) {
        matrixRow = matrix + i * ALPHA_SIZE;
        for (j = 0; j < segSize; ++j) {
            for (k = j; k < nCount; k += segSize) {
                if (k >= queryLength) {
                    weight = 0;
                } else {
                    weight = matrixRow[*(querySeq + k)];
                }
                *ps++ = (unsigned short) weight;
            }
        }
    }

    pSwData->bias = (unsigned short) -bias;

    return pSwData;
}


SCORE_LIST *
initList (int count)
{
    int i;

    SCORE_LIST *hdr;
    SCORE_NODE *list;
    SCORE_NODE *prev;

    hdr = (SCORE_LIST *) malloc (sizeof (SCORE_LIST));
    if (hdr == NULL) {
        fprintf (stderr, "Cannot allocate storage for score header\n");
        exit (-1);
    }

    //list = (SCORE_NODE *) calloc (count, sizeof (SCORE_NODE));
    
    if (list == NULL) {
        fprintf (stderr, "Cannot allocate storage for scores\n");
        exit (-1);
    } 
	//printf("list node = %x\n", (int)list);
    /* initialize the scores list */
    hdr->minScore = 0;
    hdr->first = NULL;
    hdr->last = NULL;
    hdr->free = list;
    hdr->buffer = list;

    prev = NULL;
    for (i = 0; i < count; ++i) {
        list[i].name[0] = '\0';//null character - Hieu
        list[i].score = 0;

        if (i == 0) {
            list[i].prev = NULL;
        } else {
            list[i].prev = &list[i-1];
        }

        if (i == count - 1) {
            list[i].next = NULL;
        } else {
            list[i].next = &list[i+1];
        }
    }

    return hdr;
}

void freeList (SCORE_LIST *list)
{
    free (list->buffer);
    free (list);
}

int insertList (SCORE_LIST *list, int score, char *name)
{
    SCORE_NODE *node;
    SCORE_NODE *ptr = list->first;

    if (list->free != NULL) {
        node = list->free;
        list->free = list->free->next;
    } else if (score > list->last->score) {
        node = list->last;
        list->last = node->prev;
        list->last->next = NULL;
    } else {
        /* should never happen */
        return list->minScore + 1;
    }

    strncpy (node->name, name, MAX_SCORE_NAME);
    node->name[MAX_SCORE_NAME - 1] = '\0';
    node->score = score;

    while (ptr && ptr->score >= score) {
        ptr = ptr->next;
    }

    if (list->first == NULL) {
        list->first = node;
        list->last = node;
        node->prev = NULL;
        node->next = NULL;
    } else if (ptr == NULL) {
        node->prev = list->last;
        node->next = NULL;
        node->prev->next  = node;
        list->last = node;
    } else {
        node->prev = ptr->prev;
        node->next = ptr;

        if (node->prev == NULL) {
            list->first = node;
        } else {
            node->prev->next = node;
        }
        ptr->prev = node;
    }

    if (list->free == NULL) {
        list->minScore = list->last->score + 1;
    }

    return list->minScore;
}

void printResults (SCORE_LIST *list)
{
    SCORE_NODE *ptr = list->first;

    printf ("Score  Description\n");

    while (ptr) {
        printf ("%5d  %s\n", ptr->score, ptr->name);
        ptr = ptr->next;
    }
}
