#include "pseud.h" #include static void CalculateSubsHist(double p, SubsHistPtr subs_hist, int num_step); static void AddSubsHist(double p, double *extra_hist, int num_step, int len); static void SelectAndSubstitute(int index, int num_events, double *PMatEach, SimulStructPtr simul_struct); SubsHistPtr SubsHistFree(SubsHistPtr subs_hist) { int i; double **hist; if(subs_hist==NULL)return NULL; if(subs_hist->hist){ hist = subs_hist->hist; for(i=0;i<(subs_hist->len_range);i++){ hist[i]=MemFree(hist[i]); } hist = MemFree(hist); } if((subs_hist->index>=0)&&(subs_hist->PMatEach)){ subs_hist->PMatEach = MemFree(subs_hist->PMatEach); } subs_hist = MemFree(subs_hist); return subs_hist; } short SetupSubsHist(SubsHistPtr subs_hist, short index, int min_len, int len_range, int num_step, double *PMat) { int i, p; double **hist; double *PMatEach, total; subs_hist->index = index; subs_hist->min_len = min_len; subs_hist->len_range = len_range; hist = (double **)calloc(len_range,sizeof(double *)); if(hist==NULL){printf("hist allocation failed\n");return 1;} for(i=0;ihist = hist; if(index>=0){ subs_hist->PMatEach=(double *)calloc(4,sizeof(double)); if(subs_hist->PMatEach==NULL){printf("PMatEach allocation failed\n");return 1;} PMatEach = subs_hist->PMatEach; PMatEach[0]=0; total=0; p=0; for(i=0;i<4;i++){ if(i!=index){ PMatEach[p+1]=PMat[index*4+i]+PMatEach[p]; total+= PMat[index*4+i]; p++; } } for(i=1;i<4;i++){PMatEach[i]/=total;} } return 0; } SubsMatrixPtr SubsMatrixDestruct(SubsMatrixPtr subs_matrix) { int i; SubsHistPtr PNTR subs_hist_array; if(subs_matrix==NULL){return NULL;} subs_hist_array = subs_matrix->subs_hist_array; for(i=0;i<4;i++){ subs_hist_array[i] = SubsHistFree(subs_hist_array[i]); } subs_hist_array = MemFree(subs_hist_array); subs_matrix = MemFree(subs_matrix); return subs_matrix; } SubsMatrixPtr SubsMatrixNew(int num_step) { int i; SubsMatrixPtr subs_matrix; SubsHistPtr PNTR subs_hist_array; subs_matrix = (SubsMatrixPtr)calloc(1,sizeof(SubsMatrix)); if(subs_matrix==NULL){return NULL;} subs_matrix->num_step = num_step; subs_hist_array = (SubsHistPtr PNTR)calloc(4,sizeof(SubsHistPtr)); if(subs_hist_array==NULL){subs_matrix=MemFree(subs_matrix);return NULL;} for(i=0;i<4;i++){ subs_hist_array[i]=(SubsHistPtr)calloc(1,sizeof(SubsHist)); if(subs_hist_array[i]==NULL){subs_matrix=MemFree(subs_matrix);return NULL;} } subs_matrix->subs_hist_array = subs_hist_array; return subs_matrix; } static void CalculateSubsHist(double p, SubsHistPtr subs_hist, int num_step) { /*Modified on Oct 19, 2000*/ int i, j; int len, l, m; double *temp, **hist; /*temp = len C j * p^j * (1-p)^(len-j) */ hist = subs_hist->hist; temp = (double *)calloc(num_step,sizeof(double)); for(i=0;ilen_range;i++){ len=subs_hist->min_len+i; if(len>0){ for(j=0;j0){temp[j]*=l; temp[j]/=m; temp[j]*=p; l--; m--;} for(l=len;l>j;l--){temp[j] *= (1-p);} } hist[i][0]=0; for(j=0;j0){ /*temp = len C j * p^j * (1-p)^(len-j) */ for(j=0;j0){temp[j]*=l; temp[j]/=m; temp[j]*=p; l--; m--;} for(l=len;l>j;l--){temp[j] *= (1-p);} } extra_hist[0]=0; for(j=0;jsubs_hist_array; for(i=0;i<4;i++){ subs_hist = subs_hist_array[i]; hist = subs_hist->hist; /*printf("index=%d:\n",subs_hist->index); */ fprintf(fsub,"index=%d:\n",subs_hist->index); for(j=0;jlen_range;j++){ /*printf("len=%d:",(subs_hist->min_len+j)); */ fprintf(fsub,"len=%d:",(subs_hist->min_len+j)); for(k=0;k<=subs_matrix->num_step;k++){ /*printf("%1.6f, ",hist[j][k]); */ fprintf(fsub,"%1.6f, ",hist[j][k]); } /*printf("\n"); */ fprintf(fsub,"\n"); } } return; } SubsMatrixPtr CalculateSubsMatrix(OptionsBlkPtr options, char *subsfile) { SubsMatrixPtr subs_matrix; SubsHistPtr PNTR subs_hist_array; FILE *fsub; int i, num[20], range, num_step, min_len; Uint1Ptr orig_seq; double p; double *PMat; short index, failed; num_step = NumStep; subs_matrix = SubsMatrixNew(num_step); if(subs_matrix==NULL){return NULL;} /*count base*/ orig_seq = options->orig_seq; for(i=0;i<20;i++){num[i]=0;} for(i=0;i<(options->orig_len);i++){ num[orig_seq[i]]++; } subs_hist_array = subs_matrix->subs_hist_array; PMat = options->PMat; /*calculate substitution numbers for T,C,A,G*/ for(i=0;i<4;i++){ /*printf("Processing %d\n",i); */ index = (short)i; range = 40; min_len=num[i+1]-range; p = 1-PMat[index*4+index];/*probability of substitution*/ failed = SetupSubsHist(subs_hist_array[i], index, min_len, 2*range+1, num_step, options->PMat); if(failed){subs_matrix = SubsMatrixDestruct(subs_matrix);return NULL;} CalculateSubsHist(p, subs_hist_array[i], num_step); } /*Display SubsMatrix*/ if((fsub = fopen(subsfile,"wt"))==NULL){ printf("Cannot open substitution file(press any key)\n");getch(); return subs_matrix; } DisplaySubsMatrix(subs_matrix, fsub); fclose(fsub); return subs_matrix; } void RecalculateSubsMatrix(SubsMatrixPtr subs_matrix, OptionsBlkPtr options/*, char *subsfile*/) { SubsHistPtr PNTR subs_hist_array; SubsHistPtr subs_hist; /*FILE *fsub;*/ int i, j, k, num[20], range; Uint1Ptr orig_seq; double p, total; double *PMat; double *PMatEach; short index; if(subs_matrix==NULL){printf("subs_matrix is NULL\n"); getch();return;} /*count base*/ orig_seq = options->orig_seq; for(i=0;i<20;i++){num[i]=0;} for(i=0;i<(options->orig_len);i++){ num[orig_seq[i]]++; } subs_hist_array = subs_matrix->subs_hist_array; PMat = options->PMat; /*calculate substitution numbers for T,C,A,G*/ for(i=0;i<4;i++){ /*printf("Processing %d\n",i); */ subs_hist = subs_hist_array[i]; range = (subs_hist->len_range-1)/2; index = (short)i; subs_hist->min_len=num[i+1]-range; p = 1-PMat[index*4+index];/*probability of substitution*/ PMatEach = subs_hist->PMatEach; PMatEach[0]=0; total=0; k=0; for(j=0;j<4;j++){ if(j!=index){ PMatEach[k+1]=PMat[index*4+j]+PMatEach[k]; total+= PMat[index*4+j]; k++; } } for(j=1;j<4;j++){PMatEach[j]/=total;} CalculateSubsHist(p, subs_hist, subs_matrix->num_step); } /*Display SubsMatrix*/ /*if((fsub = fopen(subsfile,"wt"))==NULL){ printf("Cannot open substitution file(press any key)\n");getch(); return; } DisplaySubsMatrix(subs_matrix, fsub); fclose(fsub); */ return; } SimulStructPtr SimulStructFree(SimulStructPtr simul_struct) { int i; short **pos_base; if(simul_struct==NULL){return NULL;} if(simul_struct->simul_seq){ simul_struct->simul_seq = MemFree(simul_struct->simul_seq); } if(simul_struct->num_base){ simul_struct->num_base = MemFree(simul_struct->num_base); } if(simul_struct->pos_base){ pos_base = simul_struct->pos_base; for(i=0;i<4;i++){pos_base[i]=MemFree(pos_base[i]);} pos_base = MemFree(pos_base); } if(simul_struct->pos_record){ simul_struct->pos_record = MemFree(simul_struct->pos_record); } simul_struct = MemFree(simul_struct); return simul_struct; } SimulStructPtr SimulStructAllocate(OptionsBlkPtr options) { SimulStructPtr simul_struct; Uint1Ptr simul_seq; int i, index; int *num_base; short **pos_base; simul_struct = (SimulStructPtr)calloc(1,sizeof(SimulStruct)); if(simul_struct==NULL){return NULL;} simul_struct->seqlen = options->orig_len; simul_struct->simul_seq = (Uint1Ptr)calloc(simul_struct->seqlen+(options->nmax*options->lmax+10),sizeof(Uint1)); if(simul_struct->simul_seq==NULL){simul_struct=MemFree(simul_struct); return NULL;} memcpy(simul_struct->simul_seq, options->orig_seq, (simul_struct->seqlen)*sizeof(Uint1)); simul_struct->num_base = (int *)calloc(4,sizeof(int)); if(simul_struct->num_base==NULL){simul_struct=SimulStructFree(simul_struct); return NULL;} pos_base = (short **)calloc(4,sizeof(short *)); if(pos_base==NULL){simul_struct=SimulStructFree(simul_struct); return NULL;} for(i=0;i<4;i++){ pos_base[i]=(short *)calloc(simul_struct->seqlen+(options->nmax*options->lmax+10), sizeof(short)); if(pos_base[i]==NULL){simul_struct=SimulStructFree(simul_struct); return NULL;} } simul_struct->pos_base = pos_base; /*set up num_base and pos_base*/ num_base = simul_struct->num_base; for(i=0;i<4;i++){num_base[i]=0;} simul_seq = simul_struct->simul_seq; for(i=0;i<(simul_struct->seqlen);i++){ if((simul_seq[i]>0)&&(simul_seq[i]<=4)){ index = simul_seq[i]-1; pos_base[index][num_base[index]]=(short)i; num_base[index]++; } } simul_struct->pos_record = (short *)calloc(NumStep+5,sizeof(short)); if(simul_struct->pos_record==NULL){simul_struct=SimulStructFree(simul_struct); return NULL;} return simul_struct; } void InitializeSimulStruct(SimulStructPtr simul_struct, OptionsBlkPtr options) { int i, index, *num_base; short **pos_base; Uint1Ptr simul_seq; simul_struct->seqlen = options->orig_len; memcpy(simul_struct->simul_seq, options->orig_seq, (simul_struct->seqlen)*sizeof(Uint1)); /*set up num_base and pos_base*/ num_base = simul_struct->num_base; pos_base = simul_struct->pos_base; for(i=0;i<4;i++){num_base[i]=0;} simul_seq = simul_struct->simul_seq; for(i=0;i<(simul_struct->seqlen);i++){ if((simul_seq[i]>0)&&(simul_seq[i]<=4)){ index = simul_seq[i]-1; pos_base[index][num_base[index]]=(short)i; num_base[index]++; } } return; } static void SelectAndSubstitute(int index, int num_events, double *PMatEach, SimulStructPtr simul_struct) { int i, count, pos, base; double r; Uint1Ptr simul_seq; int *num_base; short **pos_base, *pos_record, ok_flag; simul_seq = simul_struct->simul_seq; num_base = simul_struct->num_base; pos_base = simul_struct->pos_base; pos_record = simul_struct->pos_record; count=0; while(count0){ for(i=0;i=PMatEach[i])&&(r=index){base+=1;} simul_seq[pos]=(Uint1)(base+1); pos_record[count]=(short)pos; count++; } } return; } short RecalculateSimulStruct(SimulStructPtr simul_struct) { int i, index, *num_base; short **pos_base; Uint1Ptr simul_seq; num_base = simul_struct->num_base; pos_base = simul_struct->pos_base; for(i=0;i<4;i++){num_base[i]=0;} simul_seq = simul_struct->simul_seq; for(i=0;i<(simul_struct->seqlen);i++){ if((simul_seq[i]>0)&&(simul_seq[i]<=4)){ index = simul_seq[i]-1; pos_base[index][num_base[index]]=(short)i; num_base[index]++; } } if(simul_struct->seqlen<=0){printf("seqlen<=0\n");return 1;} for(i=0;i<4;i++){if(num_base[i]==0){}} return 0; } void SubstituteQuick(SubsMatrixPtr subs_matrix, SimulStructPtr simul_struct, OptionsBlkPtr options) { int i, j, p, num_events; SubsHistPtr PNTR subs_hist_array; SubsHistPtr subs_hist; double r, **hist, *extra_hist; int *num_base; double *PMat; num_base = simul_struct->num_base; subs_hist_array = subs_matrix->subs_hist_array; /*the number of substitutions*/ for(i=0;i<4;i++){ subs_hist = subs_hist_array[i]; hist = subs_hist->hist; r = rndu(); j = num_base[i]-(subs_hist->min_len); /*printf("j=%d, r=%1.6f, ",j, r); */ if((j<0)||(j>=subs_hist->len_range)){ /*printf("Warning!i=%d, j=%d\n",i, j); */ if((j+subs_hist->min_len)<=0){num_events=0;} else{ extra_hist = (double *)calloc(subs_matrix->num_step+1,sizeof(double)); if(extra_hist==NULL){ printf("no memory for extra_hist\n");getch(); num_events=0; } else{ PMat = options->PMat; /*printf("Calculating extra_hist\n");*/ AddSubsHist(1-PMat[i*4+i], extra_hist, subs_matrix->num_step, subs_hist->min_len+j); r = rndu(); for(p=0;p<(subs_matrix->num_step);p++){ if((r>=extra_hist[p])&&(rnum_step);p++){ if((r>=hist[j][p])&&(r0){ SelectAndSubstitute(i, num_events, subs_hist->PMatEach, simul_struct); RecalculateSimulStruct(simul_struct); /*printf("substitute done\n"); */ } } return; }