/* This file consists of functions that calculates the prior probability and the likelihood for a given (c,t,p). */ #include "pseud.h" #include static void CalculateProbChainT(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options); static void ModCalculateProbChainT(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options); static void AdjustCalculateProbChainT(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options); static void AdjustCalculateProbChainTF9(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options); static int SetupRange(short position, double *hot_seq, double *range_hot, int range_length, int seqlen, short indel); static double PossibilityOfSelectedRange(double *hot_seq, int seqlen, double *range_hot, int range_length); static void CalculateProbChainT(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options) { int t_unit, i, seqlen, num_t; /*exp(-lambda*t*Kn)*(1-exp(-labmda*t))^(ni+nd)*Kid*/ int Kn; double Kid, lambda, t, pid; short *id_array, l, lmax, nsum; double *pID, sum_pID; double temp; id_array = new_cand->id_array; seqlen = options->orig_len; num_t = options->num_t; lmax = options->lmax; lambda = options->lambda; t = options->branch_length; pID = options->pID; sum_pID = 0; for(i=0;i<2*lmax;i++){sum_pID += pID[i];} Kn=0; Kid=1.0; nsum = 0; for(t_unit=0;t_unit0){ l = (short)(id_array[t_unit]-1); Kid *= (seqlen+1); } if(id_array[t_unit]<0){ l = (short)(lmax-id_array[t_unit]-1); Kid *= (seqlen+1+id_array[t_unit]); } nsum =(short)(nsum+1); if((l<0)||(l>2*lmax)){printf("Waring! l=%d\n",l);getch();} Kid *= (pID[l]/sum_pID); Kn += seqlen; } seqlen += id_array[t_unit]; } /*prob = exp(-lambda*t*Kn)*(1-exp(-lambda*t))^nsum*Kid; log10(x)=log(x)/log(10) log10(prob)={((-lambda*t*Kn))+nsum*log(1-exp(-lambda*t))+log(Kid)}/log10 */ /*Kid*/ if(Kid<=0){(*lnL)=MinlnL; return;} temp=0; temp += log(Kid); /*pid=(1-exp(-lambda*t))^nsum*/ pid = exp(-lambda*t); pid = 1-pid; if(pid<=0){printf("prob=0?\n");(*lnL)=MinlnL; return;} temp += nsum*log(pid); /*exp(-lambda*t*Kn)*/ temp += (-lambda*t*Kn); temp /= log(10.0); /*printf("lnL=%5.1f\n",temp); */ (*lnL) = temp; return; } static void ModCalculateProbChainT(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options) { /*t=branch_length pI = t*lambda*(sum(pI)/sum(pID)) =0.01*t pD = t*lambda*(sum(pD)/sum(pID)) =0.025*t indel==0:TTexp(-Eindel(any)) indel>0 :1-exp(-Eins(n)), Eins(n)=r*lambda*(pI[n]/sum_pID)*(seqlen+1) indel<0 :1-exp(-Edel(n)), Edel(n)=r*lambda*(pD[n]/sum_pID)*(seqlen+indel+1) */ int t_unit, i, seqlen, num_t; double lambda, t, Eins, Edel, pre_Eindel; short *id_array, l, lmax; double *pID, sum_pID; double temp; id_array = new_cand->id_array; num_t = options->num_t; lmax = options->lmax; t = options->branch_length; lambda = options->lambda; seqlen = options->orig_len; pID = options->pID; sum_pID = 0; for(i=0;i<(2*lmax);i++){sum_pID += pID[i];} pre_Eindel = t*lambda; temp = 1.0; for(t_unit=0;t_unit0){ l = (short)(id_array[t_unit]-1); Eins = t*lambda*(pID[l]/sum_pID)*(seqlen+1); temp *= (1.0-exp(-1.0*Eins)); seqlen += id_array[t_unit]; } if(id_array[t_unit]<0){ l = (short)(lmax-id_array[t_unit]-1); Edel = t*lambda*(pID[l]/sum_pID)*(seqlen+id_array[t_unit]+1); temp *= (1.0-exp(-1.0*Edel)); seqlen += id_array[t_unit]; } } } if(temp<=0){temp = MinlnL;} else{temp = log10(temp); } (*lnL) = temp; return; } static void AdjustCalculateProbChainT(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options) { /*t=branch_length pI = t*lambda*(sum(pI)/sum(pID)) =0.01*t pD = t*lambda*(sum(pD)/sum(pID)) =0.025*t indel==0:exp(-Eindel(any)) indel>0 :1-exp(-AjustEins(n)), Eins(n)=r*lambda*(pI[n]/sum_pID)*(seqlen+1) AjustEins(n)=Eins(n)*{1+(MRC[0][n-1]*options->y)/(seqlen+1)} indel<0 :1-exp(-AjustEdel(n)), Edel(n)=r*lambda*(pD[n]/sum_pID)*(seqlen+indel+1) AdjustEdel(n)=Edel(n)*{1+(MRC[1][n-1]*options->z+2*MRC[2][n-1]*options->z)/(seqlen-n+1)} F = TT{1-AjustEins(n)}*TT{1-AdjustEdel(n)} prob(chain-t) *=(F)^10* */ int t_unit, i, seqlen, num_t; double lambda, t, Eins, Edel, F/*, pre_Eindel*/; short *id_array, l, lmax; double *pID, sum_pID; double temp, adjust; int **MRC; id_array = new_cand->id_array; num_t = options->num_t; lmax = options->lmax; t = options->branch_length; lambda = options->lambda; seqlen = options->orig_len; MRC = options->MRC; pID = options->pID; sum_pID = 0; for(i=0;i<(2*lmax);i++){sum_pID += pID[i];} /*F = TT{1-AjustEins(n)}*TT{1-AdjustEdel(n)}*/ F = 1.0; for(i=0;iy)/(seqlen+1)} */ adjust = MRC[0][i]*(options->y); adjust /= (seqlen+1); adjust += 1.0; Eins *= adjust; F *= (1-Eins); } for(i=lmax;i<(2*lmax);i++){ Edel = t*lambda*(pID[i]/sum_pID)*(seqlen-(i-lmax+1)+1); /*AdjustEdel(n)=Edel(n)*{1+(MRC[1][n-1]*options->z+2*MRC[2][n-1]*options->z)/(seqlen-n+1)}*/ adjust = MRC[1][i-lmax]*(options->z); adjust += 2*MRC[2][i-lmax]*(options->z); adjust /= (seqlen-(i-lmax+1)+1); adjust += 1.0; Edel *= adjust; F *= (1-Edel); } /*pre_Eindel = t*lambda;*/ temp = 1.0; for(t_unit=0;t_unit0){ l = (short)(id_array[t_unit]-1); Eins = t*lambda*(pID[l]/sum_pID)*(seqlen+1); /*AjustEins(n)=Eins(n)*{1+(MRC[0][n-1]*options->y)/(seqlen+1)} */ adjust = MRC[0][id_array[t_unit]-1]*(options->y); adjust /= (seqlen+1); adjust += 1.0; Eins *= adjust; temp *= (1.0-exp(-1.0*Eins)); seqlen += id_array[t_unit]; } if(id_array[t_unit]<0){ l = (short)(lmax-id_array[t_unit]-1); Edel = t*lambda*(pID[l]/sum_pID)*(seqlen+id_array[t_unit]+1); /*AdjustEdel(n)=Edel(n)*{1+(MRC[1][n-1]*options->z+2*MRC[2][n-1]*options->z)/(seqlen-n+1)}*/ adjust = MRC[1][-id_array[t_unit]-1]*(options->z); adjust += 2*MRC[2][-id_array[t_unit]-1]*(options->z); adjust /= (seqlen+id_array[t_unit]+1); adjust += 1.0; Edel *= adjust; temp *= (1.0-exp(-1.0*Edel)); seqlen += id_array[t_unit]; } /*}*/ temp *= F; } if(temp<=0){temp = MinlnL;} else{temp = log10(temp); } (*lnL) = temp; return; } /* This calculates the prior probability. */ static void AdjustCalculateProbChainTF9(CandStructPtr new_cand, double *lnL, OptionsBlkPtr options) { /*modified on April 9, 2001 t=branch_length pI = t*lambda*(sum(pI)/sum(pID)) =0.01*t pD = t*lambda*(sum(pD)/sum(pID)) =0.025*t indel==0:TTexp(-AjustEindel(any)) indel>0 :1-exp(-AjustEins(n)), Eins(n)=r*lambda*(pI[n]/sum_pID)*(seqlen+1) AjustEins(n)=Eins(n)*{1+(MRC[0][n-1]*options->y)/(seqlen+1)} indel<0 :1-exp(-AjustEdel(n)), Edel(n)=r*lambda*(pD[n]/sum_pID)*(seqlen+indel+1) AdjustEdel(n)=Edel(n)*{1+(MRC[1][n-1]*options->z+2*MRC[2][n-1]*options->z)/(seqlen-n+1)} F[n-1]=exp(-AdjustEins(n)),(n=1~lmax), F[lmax-n-1]=exp(-AdjustEdel(n))(n=-1~-lmax) F[2*lmax] = TT{exp(-AdjustEins(n))}*TT{exp(-AdjustEdel(n))} time-unit with no insertion nor deletion:prob(chain-t) *= F time-unit with insertion or deletion:prob(chain-t) *= F/(exp(-AdjustEins(n))) or F/(exp(-AdjustEdel(n))) */ int t_unit, i, seqlen, num_t; double lambda, t, Eins, Edel, *F/*, pre_Eindel*/; short *id_array, l, lmax; double *pID, sum_pID; double temp, adjust; int **MRC; id_array = new_cand->id_array; num_t = options->num_t; lmax = options->lmax; t = options->branch_length; lambda = options->lambda; seqlen = options->orig_len; MRC = options->MRC; F = (double *)calloc(2*lmax+1,sizeof(double)); if(F==NULL){printf("no memory for F\n");getch();} pID = options->pID; sum_pID = 0; for(i=0;i<(2*lmax);i++){sum_pID += pID[i];} /*F = TT{exp(-AjustEins(n))}*TT{exp(-AdjustEdel(n))}*/ F[2*lmax] = 1.0; for(i=0;iy)/(seqlen+1)} */ adjust = MRC[0][i]*(options->y); adjust /= (seqlen+1); adjust += 1.0; Eins *= adjust; F[i]=exp(-1.0*Eins); F[2*lmax] *= F[i]; } for(i=lmax;i<(2*lmax);i++){ Edel = t*lambda*(pID[i]/sum_pID)*(seqlen-(i-lmax+1)+1); /*AdjustEdel(n)=Edel(n)*{1+(MRC[1][n-1]*options->z+2*MRC[2][n-1]*options->z)/(seqlen-n+1)}*/ adjust = MRC[1][i-lmax]*(options->z); adjust += 2*MRC[2][i-lmax]*(options->z); adjust /= (seqlen-(i-lmax+1)+1); adjust += 1.0; Edel *= adjust; F[i] = exp(-1.0*Edel); F[2*lmax] *= F[i]; } /*pre_Eindel = t*lambda;*/ temp = 1.0; for(t_unit=0;t_unit0){ l = (short)(id_array[t_unit]-1); Eins = t*lambda*(pID[l]/sum_pID)*(seqlen+1); /*AjustEins(n)=Eins(n)*{1+(MRC[0][n-1]*options->y)/(seqlen+1)} */ adjust = MRC[0][id_array[t_unit]-1]*(options->y); adjust /= (seqlen+1); adjust += 1.0; Eins *= adjust; temp *= (1.0-exp(-1.0*Eins)); temp /= F[l]; /*F(any)/F(ins[n])*/ seqlen += id_array[t_unit]; } if(id_array[t_unit]<0){ l = (short)(lmax-id_array[t_unit]-1); Edel = t*lambda*(pID[l]/sum_pID)*(seqlen+id_array[t_unit]+1); /*AdjustEdel(n)=Edel(n)*{1+(MRC[1][n-1]*options->z+2*MRC[2][n-1]*options->z)/(seqlen-n+1)}*/ adjust = MRC[1][-id_array[t_unit]-1]*(options->z); adjust += 2*MRC[2][-id_array[t_unit]-1]*(options->z); adjust /= (seqlen+id_array[t_unit]+1); adjust += 1.0; Edel *= adjust; temp *= (1.0-exp(-1.0*Edel)); temp /= F[l]; /*F(any)/F(del[n])*/ seqlen += id_array[t_unit]; } /*}*/ temp *= F[2*lmax]; } if(temp<=0){temp = MinlnL;} else{temp = log10(temp); } (*lnL) = temp; return; } /* This is currently not used .*/ short SubstituteSimulseq(Uint1Ptr simul_seq, int seqlen, double *PMat) { int i, j, index; double r, limit[5]; for(i=0;i=limit[j])&&(r0){ for(i=0;i<=seqlen;i++){hot_seq[i]=1.0;} for(i=0;i=seqlen){ok_flag=0;break;} if(simul_seq[i+j]!=simul_seq[i+j+indel]){ok_flag=0;break;} } if(ok_flag){hot_seq[i+indel] += (options->y);} } } if(indel<0){ for(i=0;i<=(seqlen+indel);i++){hot_seq[i]=1.0;} for(i=0;i=seqlen){ok_flag=0;break;} if(simul_seq[i+j]!=simul_seq[i+j-indel]){ok_flag=0;break;} } if(ok_flag){hot_seq[i]+=(options->z); hot_seq[i-indel]+=(options->z);} } for(i=seqlen;i>seqlen+indel;i--){hot_seq[i]=0.0;} } return; } /* In the following 'hot_seq' is an array representing the extra probability (if any) for insertion/deletion endowed to each position. Unlike the FindModuloCenter (above), the following takes into account the filters. */ void FindModuloCenterFilter(Uint1Ptr simul_seq, double *hot_seq, int seqlen, short indel, OptionsBlkPtr options, int filter) { int i, j; short ok_flag; short *mrc_pos, *check, mrc_width; mrc_pos = (short *)calloc(seqlen, sizeof(short)); if(mrc_pos==NULL){ printf("no memory for mrc_pos\n");getch(); for(i=0;i<=seqlen;i++){hot_seq[i]=1.0;} return; } /*find modulo rep center*/ if(indel>0){mrc_width = indel;} else{mrc_width = (short)(-indel);} for(i=0;i=seqlen){ok_flag=0;break;} if(simul_seq[i+j]!=simul_seq[i+j+mrc_width]){ok_flag=0;break;} } if(ok_flag){mrc_pos[i+mrc_width] = 1;} } /*filter>=1*/ if(filter>=1){ check = (short *)calloc(seqlen,sizeof(short)); if(check==NULL){ printf("no memory for mrc_pos\n");getch(); for(i=0;i<=seqlen;i++){hot_seq[i]=1.0;} return; } /*filter 1*/ if((filter==1)&&(mrc_width==1)){ for(i=0;i<(seqlen-mrc_width);i++){ if((mrc_pos[i]+mrc_pos[i+mrc_width])>=2){check[i]=(short)(check[i]+1);check[i+mrc_width]=(short)(check[i+mrc_width]+1);} } for(i=0;i=1){mrc_pos[i]=1;}else{mrc_pos[i]=0;}} } /*filter 2*/ if(filter==2){ if(mrc_width==1){ for(i=0;i<(seqlen-2*mrc_width);i++){ if((mrc_pos[i]+mrc_pos[i+mrc_width]+mrc_pos[i+2*mrc_width])>=3){check[i]=(short)(check[i]+1);check[i+mrc_width]=(short)(check[i+mrc_width]+1);check[i+2*mrc_width]=(short)(check[i+2*mrc_width]+1);} } for(i=0;i=1){mrc_pos[i]=1;}else{mrc_pos[i]=0;}} } if(mrc_width==2){ for(i=0;i<(seqlen-mrc_width);i++){ if((mrc_pos[i]+mrc_pos[i+mrc_width])>=2){check[i]=(short)(check[i]+1);check[i+mrc_width]=(short)(check[i+mrc_width]+1);} } for(i=0;i=1){mrc_pos[i]=1;}else{mrc_pos[i]=0;}} } } free(check); } /*hot_seq*/ if(indel>0){ for(i=0;i=1){hot_seq[i]=1.0+(options->y);} else{hot_seq[i]=1.0;} } hot_seq[seqlen]=1.0; } if(indel<0){ for(i=0;i<(seqlen+indel);i++){ hot_seq[i]=1.0; if(mrc_pos[i]>=1){hot_seq[i] += (options->z); hot_seq[i+indel] += (options->z);} } hot_seq[seqlen+indel]=1.0; for(i=seqlen;i>(seqlen+indel);i--){hot_seq[i]=0.0;} } free(mrc_pos); return; } int SelectPositions(double *hot_seq, int max_pos) { int i, pos; double chance; double *limit_hot; if(max_pos==0){return -1;} if(max_pos==1){pos=0;return pos;} limit_hot = (double *)calloc(max_pos+2,sizeof(double)); if(limit_hot==NULL){printf("out of memory\n");return -1;} limit_hot[0]=0; for(i=0;i=limit_hot[i])&&(chance=step1)&&(r<(step1+pi[p]))){base = (Uint1)(p+1);simul_seq[pos+i]=base;break;} step1 += pi[p]; } if(base==(NASize+1)){printf("Select base failed(press any key)\n");getch();} } return; } void DupBase(int pos, short indel, Uint1Ptr simul_seq, int seqlen) { int i; double r; /*duplication of neighbouring base*/ if(pos=(seqlen-indel)){ for(i=0;i=indel)&&(pos<(seqlen-indel))){ r = rndu(); if(r<0.5){ for(i=0;ifilter); /*SelectPositions*/ if(indel>0) selected_pos = SelectPositions(hot_seq, seqlen+1); if(indel<0) selected_pos = SelectPositions(hot_seq, seqlen+indel+1); if(selected_pos==-1){return -1;} /*check segment*/ if(selected_pos!=position){ hot_seq = MemFree(hot_seq); return 0; } /*insertion*/ if(indel>0){ for(i=seqlen-1;i>=selected_pos;i--){simul_seq[i+indel]=simul_seq[i];} /*simul_seq[seqlen+indel]='\0'; */ if(hot_seq[selected_pos]<=1.0){/*selected_pos is not a modulo center*/ chance = rndu(); if((selected_pos==0)||(selected_pos==seqlen)){ if(chance<(1-(options->x)/2.0)){ RandomInsert(selected_pos, indel, options->pi, simul_seq); } else{ DupBase(selected_pos, indel, simul_seq, seqlen); } } else{ if(chance<(1-(options->x))){ RandomInsert(selected_pos, indel, options->pi, simul_seq); } else{ DupBase(selected_pos, indel, simul_seq, seqlen); } } } if(hot_seq[selected_pos]>1.0){ /*chance = rndu()*(1+options->y); */ chance = rndu()*hot_seq[selected_pos]; if(chance<(1-(options->x))){ RandomInsert(selected_pos, indel, options->pi, simul_seq); } else{ DupModulo(selected_pos, indel, simul_seq); } } } /*deletion*/ if(indel<0){ for(i=selected_pos;i<(seqlen+indel);i++){simul_seq[i]=simul_seq[i-indel];} simul_seq[seqlen+indel]='\0'; } hot_seq = MemFree(hot_seq); return 1; } double EstimateAtPosvector(OptionsBlkPtr options, CandStructPtr new_cand, ScoreBlkPtr score_block, FILE *fout) { long int iter, max_iter; int t, i, p, seqlen, num_t; Uint1Ptr orig_seq; Uint1Ptr target_seq; Uint1Ptr simul_seq; double *PMat; /*P(t)ij*/ short *pos_vector; short *id_array; short ok_flag, end_flag; int count; double prob_pos; num_t = options->num_t; PMat = options->PMat; id_array = new_cand->id_array; seqlen = options->orig_len; orig_seq = options->orig_seq; target_seq = options->target_seq; simul_seq = (Uint1Ptr)calloc(seqlen+(options->nmax)*(options->lmax)+10,sizeof(Uint1)); if(simul_seq==NULL){ printf("simul_seq allocation failure\n");getch(); new_cand->lnL=MinlnL; return 0; } pos_vector = new_cand->pos_vector; /*iteration cycle*/ max_iter=1000000; count = 0; end_flag=0; Initializelkl(score_block); for(iter=0;iterorig_len; for(i=0;icount_per_cycle))){ /*printf("count=%d, calculate score_block\n",count); */ end_flag=CalculateScoreBlock(score_block, options, fout); Initializelkl(score_block); } if(end_flag){break;} } } /*fprintf(fout,"count=%d\n",count); */ if((count<=0)||(iter<=0)){prob_pos=log10(1.0/max_iter);} else{ prob_pos = (double)count/iter; prob_pos = log10(prob_pos); } if(countcount_per_cycle){ end_flag=CalculateScoreBlock(score_block, options, fout); /*score_block->MP = (score_block->target_seqlen)*-1.0; score_block->VP = 10;*/ } simul_seq = MemFree(simul_seq); return prob_pos; } static int SetupRange(short position, double *hot_seq, double *range_hot, int range_length, int seqlen, short indel) { int i, range_start, half1, half2; half1 = range_length/2; half2 = range_length-half1; if(indel>0){ if((position>=half1)&&(position<(seqlen+1-half2))){range_start = position-half1;} else{ if(position=half1)&&(position<(seqlen+1+indel-half2))){range_start = position-half1;} else{ if(positionsimul_seq; seqlen = simul_struct->seqlen; hot_seq = (double *)calloc(seqlen+10,sizeof(double)); if(hot_seq==NULL){printf("memory over for hot_seq\n");getch();return 0;} range_length = 1; range_hot = (double *)calloc(range_length+2,sizeof(double)); if(range_hot==NULL){printf("memory over for range_hot\n");getch();return 0;} /*FindModuloCenter(simul_seq, hot_seq, seqlen, indel, options);*/ FindModuloCenterFilter(simul_seq, hot_seq, seqlen, indel, options, options->filter); /*Setup range*/ range_start = SetupRange(position, hot_seq, range_hot, range_length, seqlen, indel); if(range_start<0){ range_start=0; if(indel>0) range_length=seqlen; else range_length=seqlen+indel; for(i=0;i<=range_length;i++){range_hot[i]=hot_seq[i];} } /*SelectPositions*/ if(range_length<=0){hot_seq = MemFree(hot_seq); range_hot = MemFree(range_hot); return -1;} selected_pos = SelectPositions(range_hot, range_length); selected_pos += range_start; /*check segment*/ if(selected_pos!=position){ hot_seq = MemFree(hot_seq); range_hot = MemFree(range_hot); return 0; } /*range_p*/ (*range_p) *= PossibilityOfSelectedRange(hot_seq, seqlen, range_hot, range_length); /*insertion*/ if(indel>0){ for(i=seqlen-1;i>=selected_pos;i--){simul_seq[i+indel]=simul_seq[i];} /*simul_seq[seqlen+indel]='\0'; */ if(hot_seq[selected_pos]<=1.0){/*selected_pos is not a modulo center*/ chance = rndu(); if((selected_pos==0)||(selected_pos==seqlen)){ if(chance<(1-(options->x)/2.0)){ RandomInsert(selected_pos, indel, options->pi, simul_seq); } else{ DupBase(selected_pos, indel, simul_seq, seqlen); } } else{ if(chance<(1-(options->x))){ RandomInsert(selected_pos, indel, options->pi, simul_seq); } else{ DupBase(selected_pos, indel, simul_seq, seqlen); } } } if(hot_seq[selected_pos]>1.0){ /*chance = rndu()*(1+options->y); */ chance = rndu()*hot_seq[selected_pos]; if(chance<(1-(options->x))){ RandomInsert(selected_pos, indel, options->pi, simul_seq); } else{ DupModulo(selected_pos, indel, simul_seq); } } } /*deletion*/ if(indel<0){ for(i=selected_pos;i<(seqlen+indel);i++){simul_seq[i]=simul_seq[i-indel];} simul_seq[seqlen+indel]='\0'; } hot_seq = MemFree(hot_seq); range_hot = MemFree(range_hot); return 1; } /* This performs simulation runs and calculates the likelihood of the (c,t,p).*/ double EstimateAtPosvectorQuick(OptionsBlkPtr options, CandStructPtr new_cand, ScoreBlkPtr score_block, SubsMatrixPtr subs_matrix, FILE *fout) { long int iter, max_iter; int t, /*i, */p, /*seqlen, */num_t; SimulStructPtr simul_struct; Uint1Ptr target_seq; short *pos_vector; short *id_array; short ok_flag, end_flag, failed; int count; double prob_pos, range_p, ave_range_p; num_t = options->num_t; id_array = new_cand->id_array; pos_vector = new_cand->pos_vector; /*printf("start estimation\n"); */ target_seq = options->target_seq; simul_struct = SimulStructAllocate(options); if(simul_struct==NULL){ printf("Warning! simul_struct allocation failed\n");getch(); new_cand->lnL=MinlnL; return 0; } /*printf("simul struct allocated\n");*/ /*iteration cycle*/ max_iter=1000000; count = 0; end_flag=0; ave_range_p = 0; Initializelkl(score_block); for(iter=0;iterseqlen += id_array[t]; failed=RecalculateSimulStruct(simul_struct); if(failed){ printf("IndelSimulSeqQuick failed\n"); prob_pos = MinlnL; simul_struct = SimulStructFree(simul_struct); return prob_pos; } /*printf("seqlen = %d\n",(simul_struct->seqlen));*/ /*fprintf(fout,"id_array[%d]=%d, pos=%d\n",t, id_array[t], pos_vector[p]); for(i=0;isimul_seq, score_block); if(!(count%(score_block->count_per_cycle))){ /*printf("count=%d, calculate score_block\n",count); */ end_flag=CalculateScoreBlock(score_block, options, fout); Initializelkl(score_block); } if(end_flag){break;} } } /*fprintf(fout,"count=%d\n",count); */ if((count<=0)||(ave_range_p<=0)||(iter<=0)){prob_pos=log10(1.0/max_iter);} else{ prob_pos = ave_range_p/iter; /*prob_pos = (double)count/iter; ave_range_p /= count; prob_pos *= ave_range_p; */ if(prob_pos<=0){printf("prob_pos=%1.3e(press any key)\n");getch();prob_pos = MinlnL;} else{prob_pos = log10(prob_pos);} } if(countcount_per_cycle){ end_flag=CalculateScoreBlock(score_block, options, fout); /*score_block->MP = (score_block->target_seqlen)*-1.0; score_block->VP = 10;*/ } /*printf("trial done\n"); */ simul_struct = SimulStructFree(simul_struct); /*printf("simul struct free done\n"); */ return prob_pos; } void TrialOfNewCandidate(CandStructPtr new_cand, OptionsBlkPtr options, SubsMatrixPtr subs_matrix, FILE *fout) { double lnL, lnP_pos; ScoreBlkPtr score_block; int count_per_cycle; /*printf("processing estimation\n"); */ /*Calculate probability of chain-t*/ lnL=0; /*CalculateProbChainT(new_cand, &lnL, options); */ /*ModCalculateProbChainT(new_cand, &lnL, options); */ /*AdjustCalculateProbChainT(new_cand, &lnL, options); */ AdjustCalculateProbChainTF9(new_cand, &lnL, options); /*printf("ln(probability of chain-t)=%2.4f\n",lnL); fprintf(fout,"ln(prob chain-t)=%2.4f, ",lnL); */ count_per_cycle = 10000b; score_block = SetupScoreblock(options, new_cand, count_per_cycle); /*printf("set up score block done\n");*/ /*lnP_pos = EstimateAtPosvector(options, new_cand, score_block, fout); */ lnP_pos = EstimateAtPosvectorQuick(options, new_cand, score_block, subs_matrix, fout); if(lnP_pos==MinlnL){new_cand->lnL = MinlnL; score_block = ScoreBlkFree(score_block); return;} /*fprintf(fout,"ln(prob_pos)=%2.4f\n",lnP_pos);*/ lnL += lnP_pos; /*fprintf(fout,"MP=%2.4f, VP=%2.4f\n",score_block->MP, score_block->VP); */ lnL += score_block->MP; new_cand->lnL = lnL; /*fprintf(fout,"lnL = %2.4f\n",new_cand->lnL); */ score_block = ScoreBlkFree(score_block); return; }