[med-svn] [Git][med-team/tantan][upstream] New upstream version 18

Mon Dec 17 13:01:49 GMT 2018

Andreas Tille pushed to branch upstream at Debian Med / tantan


Commits:
e07c7bf4 by Andreas Tille at 2018-12-17T12:52:39Z
New upstream version 18
- - - - -


20 changed files:

- ChangeLog.txt
- README.html
- README.txt
- − src/CA_code/lambda_calculator.c
- − src/CA_code/lambda_calculator.h
- − src/CA_code/lubksb.c
- − src/CA_code/ludcmp.c
- − src/CA_code/nrutil.c
- − src/CA_code/nrutil.h
- + src/LambdaCalculator.cc
- src/mcf_score_matrix_probs.hh → src/LambdaCalculator.hh
- src/Makefile
- − src/mcf_score_matrix_probs.cc
- src/mcf_tantan_options.cc
- src/mcf_tantan_options.hh
- src/tantan.cc
- src/tantan_app.cc
- src/version.hh
- test/tantan_test.out
- test/tantan_test.sh


Changes:

=====================================
ChangeLog.txt
=====================================
@@ -1,8 +1,36 @@
+2018-12-10  Martin C. Frith  <Martin C. Frith>
+
+	* README.txt, src/mcf_tantan_options.cc, src/mcf_tantan_options.hh,
+	src/tantan_app.cc, test/tantan_test.out, test/tantan_test.sh:
+	Add match score, mismatch cost options
+	[2383404c795a] [tip]
+
+	* src/tantan.cc:
+	Refactor
+	[d61af119db30]
+
+	* src/tantan.cc:
+	Refactor
+	[81bdfe23217d]
+
+	* src/CA_code/lambda_calculator.c, src/CA_code/lambda_calculator.h,
+	src/CA_code/lubksb.c, src/CA_code/ludcmp.c, src/CA_code/nrutil.c,
+	src/CA_code/nrutil.h, src/LambdaCalculator.cc,
+	src/LambdaCalculator.hh, src/Makefile,
+	src/mcf_score_matrix_probs.cc, src/mcf_score_matrix_probs.hh,
+	src/tantan_app.cc:
+	Use Konta-san's code for matrix lambda
+	[fccc8e5e9c1b]
+
+	* src/tantan.cc, src/tantan_app.cc, test/tantan_test.sh:
+	Refactor
+	[f9a9da99553d]
+
 2012-10-16  Martin C. Frith  <Martin C. Frith>
 
 	* README.txt:
 	Expanded the documentation: installation & FAQ
-	[946a951b1a06] [tip]
+	[946a951b1a06]
 
 	* src/CA_code/nrutil.c:
 	Just fixed a compiler warning


=====================================
README.html
=====================================
@@ -465,6 +465,12 @@ repeats, so it's easy to lift the masking after determining homology.</p>
 <kbd><span class="option">-d</span></kbd></td>
 <td>probability decay per period (period-(i+1) / period-i)</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-i</span></kbd></td>
+<td>match score</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">-j</span></kbd></td>
+<td>mismatch cost</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-a</span></kbd></td>
 <td>gap existence cost</td></tr>
 <tr><td class="option-group">


=====================================
README.txt
=====================================
@@ -141,6 +141,8 @@ Options
 -e  probability of a repeat ending per position
 -w  maximum tandem repeat period to consider
 -d  probability decay per period (period-(i+1) / period-i)
+-i  match score
+-j  mismatch cost
 -a  gap existence cost
 -b  gap extension cost
 -s  minimum repeat probability for masking


=====================================
src/CA_code/lambda_calculator.c deleted
=====================================
@@ -1,465 +0,0 @@
-// Copyright 2008 Michiaki Hamada
-// Adapted from public domain code by Yi-Kuo Yu, NCBI
-
-/**
- * See lambda_calculator.h
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "nrutil.h"
-int Alphsize;
-#include "nrutil.c"
-#include "ludcmp.c"
-#include "lubksb.c"
-#include "lambda_calculator.h"
-#define Epsilon 1.0e-36
-#define E_bound 1.0e-12 
-#define Infty   1000000.0
-#define min(A,B) ((A) > (B) ? (B) : (A) )
-#define max(A,B) ((A) > (B) ? (A) : (B) )
-#define bool int
-#define true 1
-#define false 0
-double Lambda_UB; //lambda upper bound
-double r_max_m, c_max_m; //min of each row's (column's) max
-
-void makematrix( const double** mat_b, double **a, double lambda);
-typedef struct Lambda {
-  double min;
-  double max;
-  int flag;    // 1 means there is a range, -1 means no solution possible.
-} Lambda;
-typedef struct Sum {
-  double value;
-  int flag;   // 1 means no negative bg_freq, -1 means there is negative bg_freq
-} Sum;
-Lambda Find_JP(const double** mat_b, double la_min, double la_max, double **JP, double *p_in, double *q_in);
-Sum Check_root(const double** mat_b, double **a, double lambda, double *p, double *q);
-double Check_det(const double** mat_b, double **a, double lambda);
-Sum Nail_lambda(const double** mat_b, int flag_sign, double lambda_min, double lambda_max, double *p, double *q, double *la_add);
-double Nail_det(const double** mat_b, int flag_sign, double lambda_min, double lambda_max);
-bool Check_range( const double** mat_b );
-
-double *Locate_det_zero( const double** mat_b, int * ); //pointer to root list are returned with how many of them by int 
-
-
-double calculate_lambda ( const double** mat_b, int alpha_size,
-                          double* p, double* q )
-{
-  double **JP/*, *q, *p*/;
-  int k;
-  double *root_location;
-  int N_root; 
-  Lambda Lambda_local;
-
-  Alphsize = alpha_size;
-
-  if( ! Check_range( mat_b ) ) return -1.0;
-
-  root_location = Locate_det_zero(mat_b, &N_root);
-  if( root_location == NULL && N_root > 0 ) return -1.0;
-
-  //q=dvector(1,Alphsize);
-  //p=dvector(1,Alphsize);
-  JP = dmatrix(1,Alphsize,1,Alphsize);
-
-  if (N_root == 0){
-    Lambda_local = Find_JP(mat_b, 0, Lambda_UB, JP, p, q);
-    if (1 == Lambda_local.flag) { // sensible solution found  
-      // Remember to find the right place to free the vectors
-      //free_dvector(p, 1,Alphsize);
-      //free_dvector(q, 1,Alphsize);
-      free( root_location );
-      free_dmatrix( JP, 1, Alphsize, 1, Alphsize );
-      return (Lambda_local.min + Lambda_local.max) / 2.0;
-    }
-    else if (-1 == Lambda_local.flag) {
-      //printf("matrix pass first screening but no sensible solution found. :-( \n");
-    }
-  } 
-  else if (N_root > 0) {
-    //printf("N_root = %d for this matirx \n", N_root);
-    //for (i=0;i<N_root;i++) printf("N_root[%d] = %lf\n",i,root_location[i]);
-    for (k=0; k<= N_root; k++){
-      if (k==0) {Lambda_local.min =0; Lambda_local.max = root_location[0];}
-      else if (k== N_root) {Lambda_local.min = root_location[N_root-1]; Lambda_local.max = Lambda_UB+Epsilon;}
-      else {Lambda_local.min = root_location[k-1]; Lambda_local.max = root_location[k]; }
-      Lambda_local = Find_JP(mat_b, Lambda_local.min, Lambda_local.max, JP, p, q);
-      if (1 == Lambda_local.flag) { // sensible solution found  
-	//free_dvector(p, 1,Alphsize);
-	//free_dvector(q, 1,Alphsize);
-	free( root_location );
-      free_dmatrix( JP, 1, Alphsize, 1, Alphsize );
-	return (Lambda_local.min + Lambda_local.max) / 2.0;
-      }
-      else if (-1 == Lambda_local.flag) {
-	//printf("matrix pass first screening but still no sensible solution found. :-( \n");
-      }
-    }
-  } 
-  // Remember to find the right place to free the vectors
-  //free_dvector(p, 1,Alphsize);
-  //free_dvector(q, 1,Alphsize);
-  free( root_location );
-  free_dmatrix( JP, 1, Alphsize, 1, Alphsize );
-  return -1.0;
-}
-
-bool Check_range( const double** mat_b )
-{
-  int i,j;
-  int pos_flag_r, neg_flag_r;
-  int pos_flag_c, neg_flag_c;
-  double r_max, c_max; //max of each row (or column)
-
-  int L_r=0, L_c=0; // number of zero-score rows (columns)
-
-  // First make sure each row and column have both pos and neg entries
-  r_max_m = c_max_m = 100000000000.0;
-  for (i=1;i<=Alphsize; i++) {
-    r_max = 0; c_max = 0;
-    pos_flag_r = -1; neg_flag_r = -1; pos_flag_c = -1; neg_flag_c = -1;
-    for (j = 1; j<= Alphsize; j++){
-      if (mat_b[i][j] > 0) {
-	if (mat_b[i][j]> r_max) r_max = mat_b[i][j];
-	pos_flag_r = 1;
-      }
-      else if (mat_b[i][j] < 0) neg_flag_r = 1;
-      if (mat_b[j][i] > 0) {
-	if (mat_b[j][i] > c_max) c_max = mat_b[j][i];
-	pos_flag_c = 1;
-      }
-      else if (mat_b[j][i] < 0) neg_flag_c = 1;
-    }
-    if ((pos_flag_r == -1)||(neg_flag_r == -1)||(pos_flag_c == -1)||(neg_flag_c == -1)) {
-      if ((pos_flag_r == -1)&&(neg_flag_r == -1)){
-	printf ("only zero score at row  %d\n", i);
-	L_r++;
-      }
-      else if ((pos_flag_c == -1)&& (neg_flag_c == -1)){
-	printf ("only zero score at column %d\n", i);
-	L_c++;
-      }
-      else{
-	//printf("all positive or all negative at row or column %d\n", i);
-	//printf("therefore invalid matrix. exit now. \n");
-	return false;
-	//exit(1);
-      }
-    }
-    if ((r_max < r_max_m)&&(r_max > 0)) r_max_m = r_max;
-    if ((c_max < c_max_m)&&(c_max > 0)) c_max_m = c_max;
-  }
-
-
-  // Find the upper bound for lambda
-  if (r_max_m > c_max_m) {
-    Lambda_UB = 1.1*log(1.0*Alphsize-L_r)/r_max_m ;
-  }
-  else {
-    Lambda_UB = 1.1*log(1.0*Alphsize-L_c)/c_max_m;
-  }
-  //printf("the upper bound for lambda is %lf\n", Lambda_UB);
-  return true;
-}
-
-
-
-double Check_det(const double** mat_b, double **a, double lambda)
-{
-  double d;
-  int i, /*j,*/ *indx;
-
-  indx = ivector(1,Alphsize);
-  makematrix(mat_b, a,lambda);
-  ludcmp(a,Alphsize,indx,&d);
-  for(i=1;i<=Alphsize;i++) d *= a[i][i];
-  free_ivector(indx,1,Alphsize);
-  return d;  //returning the determinant
-}
-
-
-Sum Check_root(const double** mat_b, double **a, double lambda, double *p, double *q)
-{
-  double **y, /* *col,*/ d;
-  //double sum = 0.0;
-  int i,j;//, *indx;
-  Sum Sum_here; 
-
-  y=dmatrix(1,Alphsize,1,Alphsize);
-  //indx = ivector(1,Alphsize);
-  int indx[Alphsize+1];
-  //col = dvector(1,Alphsize);
-  double col[Alphsize+1];
-
-  makematrix(mat_b, a,lambda);
-  ludcmp(a,Alphsize,indx,&d);
-  Sum_here.value = 0.0;
-  for(i=1;i<=Alphsize;i++) q[i]=0.0;
-  for(j=1;j<=Alphsize;j++){
-    for (i=1;i<=Alphsize;i++) col[i]=0.0;
-    col[j]=1.0;
-    lubksb(a,Alphsize,indx,col);
-    p[j]=0.0;
-    for (i=1;i<=Alphsize;i++) {
-      y[i][j]=col[i]; Sum_here.value += y[i][j]; 
-      p[j]+=y[i][j]; q[i]+=y[i][j]; 
-    }
-  }
-  
-  Sum_here.flag = 1;
-  for (i=1;i<Alphsize; i++) {
-    if ((p[i] < 0) || (q[i] < 0)) {
-      Sum_here.flag = -1;
-      //printf("problematic freq. p[%d] = %.4f q[%d]=%.4f\n",i,p[i],i,q[i]);
-    }
-  }
-  free_dmatrix(y,1,Alphsize,1,Alphsize);
-  return Sum_here; 
-}
-
-
-
-double * Locate_det_zero(const double** mat_b, int *N_root_add)
-{
-  double **a/*,  *q, *p */; // a is the exponentiated matrix of socres, p and q are bg_freqs
-  int i/*,j,k*/;
-  int N;  // number of points for first round
-  int flag_sign; 
-  double lambda/*, l_tmp, sum,  sum_min, sum_max */;
-  double lambda_root, dlambda /*, dsum=0.5 */; 
-  //double *l_here, *s_here;
-  double root[5000];
-  double *root_temp; 
-  //double error=0.000000000001;
-  int zero_monitor = 0;  // record number of zeros found in the range 
-  //int flag;  
-
-  a=dmatrix(1,Alphsize,1,Alphsize);
-  //Sum_local = (Sum *)malloc(sizeof(Sum));
-  //Lambda_local = (Lambda *)malloc(sizeof(Lambda));
-  
-  N = 2 + max(400, ((int) (Lambda_UB-0)/0.005)); 
-  //printf("N = %d in Locate_det_zero\n", N);
-  dlambda = (Lambda_UB)/(N*1.0); 
-  //l_here = (double *)malloc((N+1)*sizeof(double));
-  //s_here = (double *)malloc((N+1)*sizeof(double));
-  double l_here[N+1];
-  double s_here[N+1];
-
-  for (i=0; i< N ; i++){
-    lambda = (i+1)*dlambda; 
-    s_here[i] = Check_det(mat_b, a,lambda); 
-    l_here[i] = lambda; 
-  }
-
-  if (s_here[0] < 0.0) flag_sign = -1;
-  if (s_here[0] > 0.0 ) flag_sign = 1;
-  if (fabs(s_here[0])/exp(l_here[0]*(r_max_m+c_max_m)/2.0) <= Epsilon) {
-    root[zero_monitor++] = l_here[0];
-    flag_sign = 0;
-  }
-  
-  for (i=1;i<N; i++){
-    if ((flag_sign != 0)&& (fabs(s_here[i])>Epsilon)) {
-      if (s_here[i-1]*s_here[i] < 0){
-	//printf("occurring at regular places\n");
-	lambda_root = Nail_det(mat_b, flag_sign, l_here[i-1], l_here[i]);
-	root[zero_monitor++] = lambda_root;
-	flag_sign = -flag_sign;  // the flag switch sign after one sol found 
-	//printf("a (regular) root of det found at %12.10f, i= %d\n", lambda_root,i);
-      }
-    }
-    else {
-      if (s_here[i] < 0.0) flag_sign = -1;
-      if (s_here[i] > 0.0 ) flag_sign = 1;
-      if (fabs(s_here[i])/exp(l_here[i]*(r_max_m+c_max_m)/2.0) <= Epsilon) {
-	root[zero_monitor++] = l_here[i];
-      }
-    }
-  }
-  //printf("total number of solution found in range is %d\n", i_monitor);
-  root_temp = (double *)malloc(zero_monitor*sizeof(double));
-  *N_root_add = zero_monitor; 
-  if (zero_monitor > 0) {
-    if (zero_monitor >= N/4) {
-      //printf("It is likely that uniform zero determinant is occurring.\n");
-      //printf("number of small det points = %d out of %d, exit now....\n",zero_monitor, N);
-      free( root_temp );
-      return NULL;
-      //exit(1);
-    }
-    for (i = 0; i < zero_monitor; i++){
-      root_temp[i] = root[i];
-      //printf("root_location[%d] = %lf\n",i,root_temp[i]);
-    }
-  }
-  free_dmatrix( a, 1, Alphsize, 1, Alphsize );
-  return root_temp;  
-
-} 
-
-
-Lambda Find_JP(const double** mat_b, double la_min, double la_max, double **JP, double *p_in, double *q_in)
-{
-  double **a, *q, *p; // a is the exponentiated matrix of socres, p and q are bg_freqs
-  int i,j/*,k*/;
-  int N;  // number of points for first round
-  double lambda/*, l_tmp, sum, sum_min, sum_max*/;
-  double lambda_max, lambda_min, lambda_final, dlambda/*, dsum=0.5*/; 
-  //double *l_here, *s_here;
-  //double error=0.000000000000001;
-  //int validity_flag; // 1 means valid, -1 means not valid.
-  int flag_sign;       // 1 means small lambda sum > 1, -1 means otherwise
-  int flag_done = -1;       // 1 means find sensible solution, -1 means sensible not found 
-  int i_monitor = 0;          // record number of solution found in the range, including nonsense ones 
-  int j_monitor;
-
-  Lambda Lambda_local;
-  //Sum *Sum_local;   
-  Sum Sum_local;   
-
-  lambda_min = la_min; 
-  lambda_max = la_max;
-  q = q_in;
-  p = p_in;
-  a=dmatrix(1,Alphsize,1,Alphsize);
-  //Sum_local = (Sum *)malloc(sizeof(Sum));
-  //Lambda_local = (Lambda *)malloc(sizeof(Lambda));
-  
-  N = 2 + max(400, ((int) (lambda_max-lambda_min)/0.005)); 
-  //printf("N = %d in Find_JP\n", N);
-  dlambda = (lambda_max-lambda_min)/(N*1.0); 
-  //l_here = (double *)malloc((N+1)*sizeof(double));
-  //s_here = (double *)malloc((N+1)*sizeof(double));
-  double l_here[N+1];
-  double s_here[N+1];
-  //printf("lambda_min enter = %12.10e, lambda_max = %12.10f\n", lambda_min, lambda_max);
-  for (i=0; i< N-1 ; i++){
-    lambda = lambda_min + (i+1)*dlambda; 
-    makematrix(mat_b, a,lambda);
-    Sum_local = Check_root(mat_b, a,lambda,p,q); 
-    l_here[i] = lambda; 
-    s_here[i] = Sum_local.value - 1.0;
-    //printf("scan %d th time in Find_JP\n",i );
-  }
-  //printf("finish first time scanining in Find_JP\n");
-  if (s_here[0] < 0.0) flag_sign = -1;
-  else if (s_here[0] > 0.0 ) flag_sign = 1;
-  else if (s_here[0] == 0.0) {  //needs refined definition on flag_sign
-    printf("enter the exact hit, rarely occurs other than when lambda = 0 \n"); 
-    j_monitor = 1;
-    flag_sign = 0;
-    while ((flag_sign == 0) &&(j_monitor < N)){
-      Sum_local = Check_root(mat_b, a,l_here[0]+ j_monitor*dlambda/N,p,q);
-      if (Sum_local.value > 1.0) {
-	flag_sign  = 1;
-      }
-      else if (Sum_local.value <1.0){
-	flag_sign = -1;
-      }
-      j_monitor++; 
-    }
-  }
-  
-  for (i=1;i<N; i++){  // should be N-1 ???
-    if (flag_sign == 0) {printf("flag_sign = 0 \n"); exit(1);}
-    if (s_here[i-1]*s_here[i] < 0){
-      lambda_min = l_here[i-1];
-      lambda_max = l_here[i];
-      Sum_local = Nail_lambda(mat_b, flag_sign, lambda_min, lambda_max, p,q, &lambda_final);
-      if (Sum_local.flag == 1) {
-	i =N; flag_done = 1; Lambda_local.flag = 1;
-	Lambda_local.min = lambda_final, Lambda_local.max = lambda_final;
-      }
-      flag_sign = -flag_sign;  // the flag switch sign after one sol found 
-      i_monitor++ ;
-    }
-  }
-
-  if (flag_done == 1){
-    // Write correct JP to the matrix
-    makematrix(mat_b, a,lambda_final);
-    for (i=1;i<=Alphsize;i++){
-      for (j=1;j<=Alphsize;j++){
-	JP[i][j] =  a[i][j]*p[i]*q[j];
-      }
-    }     
-    free_dmatrix(a,1,Alphsize,1,Alphsize);
-    return Lambda_local;
-  }
-  else if (flag_done == -1){
-    //printf("no sensible solution in the plausible x range: (%lf,%lf)\n", la_min, la_max);
-    Lambda_local.flag = -1; Lambda_local.min = 0; Lambda_local.max = Infty;
-    return Lambda_local; 
-  }
-  // never come here
-  return Lambda_local;
-} 
-
-
-Sum Nail_lambda(const double** mat_b, int flag_sign, double lambda_min, double lambda_max, double *p, double *q, double *lam_add)
-{
-  double **a;
-  double lambda;
-
-  //Sum *Sum_local;
-  Sum Sum_local;
-  a = dmatrix(1,Alphsize,1,Alphsize);
-  //Sum_local = (Sum *)malloc(sizeof(Sum));
-
-  lambda = (lambda_min+lambda_max)/2.0;
-  Sum_local = Check_root(mat_b, a, lambda, p, q);  
-  while (fabs(Sum_local.value-1.0)>E_bound){
-    if (flag_sign*(Sum_local.value-1.0)<0) lambda_max = lambda;
-    else if (flag_sign*(Sum_local.value-1.0)> 0) lambda_min = lambda;
-
-    // Added by MCF to avoid infinite loop:
-    if (lambda == (lambda_min+lambda_max)/2.0){
-      Sum_local.flag = -1;
-      break;
-    }
-
-    lambda = (lambda_min+lambda_max)/2.0;
-    Sum_local = Check_root(mat_b, a, lambda, p, q); 
-  }
-  free_dmatrix(a,1,Alphsize,1,Alphsize);
-  *lam_add = lambda; 
-  return Sum_local;
-}
-
-
-double Nail_det(const double** mat_b, int flag_sign, double lambda_min, double lambda_max)
-{
-  double **a;
-  double lambda;
-  double value;
-
-  a = dmatrix(1,Alphsize,1,Alphsize);
-
-  lambda = (lambda_min+lambda_max)/2.0;
-  value = Check_det(mat_b, a, lambda);  
-  while ((fabs(value)>E_bound)&&(lambda > 0)){
-    if (flag_sign*(value)<0) lambda_max = lambda;
-    else if (flag_sign*(value)> 0) lambda_min = lambda;
-    lambda = (lambda_min+lambda_max)/2.0;
-    value = Check_det(mat_b, a, lambda); 
-  }
-  free_dmatrix(a,1,Alphsize,1,Alphsize);
-  return lambda; 
-}
-
-void makematrix(const double** mat_b, double **a, double lambda)
-{
-
-  int i,j;
- 
-  for (i=1;i<=Alphsize;i++)
-    for(j=1;j<=Alphsize;j++){
-      *(*(a+i)+j)=exp(lambda*mat_b[i][j]);       
-    }
-}
-
-
-


=====================================
src/CA_code/lambda_calculator.h deleted
=====================================
@@ -1,10 +0,0 @@
-// Copyright 2008 Michiaki Hamada
-
-#ifndef __H_INCLUDE_LAMBDA_CALCULATOR_HH
-#define __H_INCLUDE_LAMBDA_CALCULATOR_HH
-
-// These pointers are 1-based!
-double calculate_lambda( const double** mat_b, int alpha_size,
-                         double* p, double* q );
-
-#endif // __H_INCLUDE_LAMBDA_CALCULATOR_HH


=====================================
src/CA_code/lubksb.c deleted
=====================================
@@ -1,24 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-void lubksb(a,n,indx,b)
-double **a,b[];
-int n,*indx;
-{
-	int i,ii=0,ip,j;
-	double sum;
-
-	for (i=1;i<=n;i++) {
-		ip=indx[i];
-		sum=b[ip];
-		b[ip]=b[i];
-		if (ii)
-			for (j=ii;j<=i-1;j++) sum -= a[i][j]*b[j];
-		else if (sum) ii=i;
-		b[i]=sum;
-	}
-	for (i=n;i>=1;i--) {
-		sum=b[i];
-		for (j=i+1;j<=n;j++) sum -= a[i][j]*b[j];
-		b[i]=sum/a[i][i];
-	}
-}


=====================================
src/CA_code/ludcmp.c deleted
=====================================
@@ -1,61 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-#include <math.h>
-
-#define TINY 1.0e-20;
-
-void ludcmp(a,n,indx,d)
-int n,*indx;
-double **a,*d;
-{
-	int i,imax,j,k;
-	double big,dum,sum,temp;
-	double *vv,*dvector();
-	void nrerror(),free_dvector();
-
-	vv=dvector(1,n);
-	*d=1.0;
-	for (i=1;i<=n;i++) {
-		big=0.0;
-		for (j=1;j<=n;j++)
-			if ((temp=fabs(a[i][j])) > big) big=temp;
-		if (big == 0.0) nrerror("Singular matrix in routine LUDCMP");
-		vv[i]=1.0/big;
-	}
-	for (j=1;j<=n;j++) {
-		for (i=1;i<j;i++) {
-			sum=a[i][j];
-			for (k=1;k<i;k++) sum -= a[i][k]*a[k][j];
-			a[i][j]=sum;
-		}
-		big=0.0;
-		for (i=j;i<=n;i++) {
-			sum=a[i][j];
-			for (k=1;k<j;k++)
-				sum -= a[i][k]*a[k][j];
-			a[i][j]=sum;
-			if ( (dum=vv[i]*fabs(sum)) >= big) {
-				big=dum;
-				imax=i;
-			}
-		}
-		if (j != imax) {
-			for (k=1;k<=n;k++) {
-				dum=a[imax][k];
-				a[imax][k]=a[j][k];
-				a[j][k]=dum;
-			}
-			*d = -(*d);
-			vv[imax]=vv[j];
-		}
-		indx[j]=imax;
-		if (a[j][j] == 0.0) a[j][j]=TINY;
-		if (j != n) {
-			dum=1.0/(a[j][j]);
-			for (i=j+1;i<=n;i++) a[i][j] *= dum;
-		}
-	}
-	free_dvector(vv,1,n);
-}
-
-#undef TINY


=====================================
src/CA_code/nrutil.c deleted
=====================================
@@ -1,239 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-//#include <malloc.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-void nrerror(error_text)
-char error_text[];
-{
-	void exit();
-
-	fprintf(stderr,"Numerical Recipes run-time error...\n");
-	fprintf(stderr,"%s\n",error_text);
-	fprintf(stderr,"...now exiting to system...\n");
-	exit(1);
-}
-
-float *vector(nl,nh)
-int nl,nh;
-{
-	float *v;
-
-	v=(float *)malloc((unsigned) (nh-nl+1)*sizeof(float));
-	if (!v) nrerror("allocation failure in vector()");
-	return v-nl;
-}
-
-int *ivector(nl,nh)
-int nl,nh;
-{
-	int *v;
-
-	v=(int *)malloc((unsigned) (nh-nl+1)*sizeof(int));
-	if (!v) nrerror("allocation failure in ivector()");
-	return v-nl;
-}
-
-double *dvector(nl,nh)
-int nl,nh;
-{
-	double *v;
-
-	v=(double *)malloc((unsigned) (nh-nl+1)*sizeof(double));
-	if (!v) nrerror("allocation failure in dvector()");
-	return v-nl;
-}
-
-float **matrix(nrl,nrh,ncl,nch)
-int nrl,nrh,ncl,nch;
-{
-	int i;
-	float **m;
-
-	m=(float **) malloc((unsigned) (nrh-nrl+1)*sizeof(float *));
-	if (!m) nrerror("allocation failure 1 in matrix()");
-	m -= nrl;
-
-	for(i=nrl;i<=nrh;i++) {
-		m[i]=(float *) malloc((unsigned) (nch-ncl+1)*sizeof(float));
-		if (!m[i]) nrerror("allocation failure 2 in matrix()");
-		m[i] -= ncl;
-	}
-	return m;
-}
-
-double **dmatrix(nrl,nrh,ncl,nch)
-int nrl,nrh,ncl,nch;
-{
-	int i;
-	double **m;
-
-	m=(double **) malloc((unsigned) (nrh-nrl+1)*sizeof(double *));
-	if (!m) nrerror("allocation failure 1 in dmatrix()");
-	m -= nrl;
-
-	for(i=nrl;i<=nrh;i++) {
-		m[i]=(double *) malloc((unsigned) (nch-ncl+1)*sizeof(double));
-		if (!m[i]) nrerror("allocation failure 2 in dmatrix()");
-		m[i] -= ncl;
-	}
-	return m;
-}
-
-double ***darray3(n1l,n1h,n2l,n2h,n3l,n3h)
-int n1l,n1h,n2l,n2h,n3l,n3h;
-{
-  int i;
-  double ***m;
-
-  m=(double ***) malloc((unsigned) (n1h-n1l+1)*sizeof(double **));
-  if (!m) nrerror("allocation failure 1 in darray3()");
-  m -= n1l;
-
-  for(i=n1l;i<=n1h;i++)
-  {
-    m[i]=dmatrix(n2l,n2h,n3l,n3h);
-  }
-  return m;
-}
-
-double ****darray4(n1l,n1h,n2l,n2h,n3l,n3h,n4l,n4h)
-int n1l,n1h,n2l,n2h,n3l,n3h,n4l,n4h;
-{
-  int i;
-  double ****m;
-
-  m=(double ****) malloc((unsigned) (n1h-n1l+1)*sizeof(double ***));
-  if (!m) nrerror("allocation failure 1 in darray4()");
-  m -= n1l;
-
-  for(i=n1l;i<=n1h;i++)
-  {
-    m[i]=darray3(n2l,n2h,n3l,n3h,n4l,n4h);
-  }
-  return m;
-}
-
-int **imatrix(nrl,nrh,ncl,nch)
-int nrl,nrh,ncl,nch;
-{
-	int i,**m;
-
-	m=(int **)malloc((unsigned) (nrh-nrl+1)*sizeof(int *));
-	if (!m) nrerror("allocation failure 1 in imatrix()");
-	m -= nrl;
-
-	for(i=nrl;i<=nrh;i++) {
-		m[i]=(int *)malloc((unsigned) (nch-ncl+1)*sizeof(int));
-		if (!m[i]) nrerror("allocation failure 2 in imatrix()");
-		m[i] -= ncl;
-	}
-	return m;
-}
-
-float **submatrix(a,oldrl,oldrh,oldcl,oldch,newrl,newcl)
-float **a;
-int oldrl,oldrh,oldcl,oldch,newrl,newcl;
-{
-	int i,j;
-	float **m;
-
-	m=(float **) malloc((unsigned) (oldrh-oldrl+1)*sizeof(float *));
-	if (!m) nrerror("allocation failure in submatrix()");
-	m -= newrl;
-
-	for(i=oldrl,j=newrl;i<=oldrh;i++,j++) m[j]=a[i]+oldcl-newcl;
-
-	return m;
-}
-
-void free_vector(v,nl,nh)
-float *v;
-int nl,nh;
-{
-	free((char*) (v+nl));
-}
-
-void free_ivector(v,nl,nh)
-int *v,nl,nh;
-{
-	free((char*) (v+nl));
-}
-
-void free_dvector(v,nl,nh)
-double *v;
-int nl,nh;
-{
-	free((char*) (v+nl));
-}
-
-void free_matrix(m,nrl,nrh,ncl,nch)
-float **m;
-int nrl,nrh,ncl,nch;
-{
-	int i;
-
-	for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl));
-	free((char*) (m+nrl));
-}
-
-void free_dmatrix(m,nrl,nrh,ncl,nch)
-double **m;
-int nrl,nrh,ncl,nch;
-{
-	int i;
-
-	for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl));
-	free((char*) (m+nrl));
-}
-
-void free_imatrix(m,nrl,nrh,ncl,nch)
-int **m;
-int nrl,nrh,ncl,nch;
-{
-	int i;
-
-	for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl));
-	free((char*) (m+nrl));
-}
-
-void free_darray3(m,n1l,n1h,n2l,n2h,n3l,n3h)
-double ***m;
-int n1l,n1h,n2l,n2h,n3l,n3h;
-{
-	int i;
-
-	for(i=n1h;i>=n1l;i--) free_dmatrix(m[i],n2l,n2h,n3l,n3h);
-	free((char*) (m+n1l));
-}
-
-void free_submatrix(b,nrl,nrh,ncl,nch)
-float **b;
-int nrl,nrh,ncl,nch;
-{
-	free((char*) (b+nrl));
-}
-
-float **convert_matrix(a,nrl,nrh,ncl,nch)
-float *a;
-int nrl,nrh,ncl,nch;
-{
-	int i,j,nrow,ncol;
-	float **m;
-
-	nrow=nrh-nrl+1;
-	ncol=nch-ncl+1;
-	m = (float **) malloc((unsigned) (nrow)*sizeof(float*));
-	if (!m) nrerror("allocation failure in convert_matrix()");
-	m -= nrl;
-	for(i=0,j=nrl;i<=nrow-1;i++,j++) m[j]=a+ncol*i-ncl;
-	return m;
-}
-
-void free_convert_matrix(b,nrl,nrh,ncl,nch)
-float **b;
-int nrl,nrh,ncl,nch;
-{
-	free((char*) (b+nrl));
-}


=====================================
src/CA_code/nrutil.h deleted
=====================================
@@ -1,19 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-float *vector();
-float **matrix();
-float **convert_matrix();
-double *dvector();
-double **dmatrix();
-int *ivector();
-int **imatrix();
-float **submatrix();
-void free_vector();
-void free_dvector();
-void free_ivector();
-void free_matrix();
-void free_dmatrix();
-void free_imatrix();
-void free_submatrix();
-void free_convert_matrix();
-void nrerror();


=====================================
src/LambdaCalculator.cc
=====================================
@@ -0,0 +1,408 @@
+// Copyright 2015 Yutaro Konta
+
+#include "LambdaCalculator.hh"
+#include <vector>
+#include <cassert>
+#include <cstdio>  // sprintf
+#include <cstdlib>
+#include <cfloat>
+#include <cmath>
+//#include <iostream>
+using namespace std;
+
+static double roundToFewDigits(double x)
+{
+  // This rounding fixes some inaccuracies, e.g. for DNA with a simple
+  // match/mismatch matrix it's likely to make all the probabilities
+  // exactly 0.25, as they should be.
+  char buffer[32];
+  sprintf(buffer, "%g", x);
+  return atof(buffer);
+}
+
+static double** makematrix(int m, int n, double val)
+{
+  double** mat = new double* [m];
+  for (int i=0; i<m; i++)
+    {
+      mat[i] = new double [n];
+      for (int j=0; j<n; j++)
+        mat[i][j] = val;
+    }
+  return mat;
+}
+
+static void deletematrix(double** a, int m)
+{
+  for (int i=0; i<m; i++)
+    delete[] a[i];
+  delete[] a;
+}
+
+static double summatrix(double** a, int m, int n)
+{
+  double s = 0;
+  for (int i=0; i<m; i++)
+    for (int j=0; j<n; j++)
+      s += a[i][j];
+  return s;
+}
+
+static int max_index(double** a, int n, int i)
+{
+  double max = -DBL_MAX;
+  int maxindex = -1;
+
+  for (int j=i; j<n; j++)
+    {
+      if (fabs(a[j][i]) > max)
+        {
+          max = fabs(a[j][i]);
+          maxindex = j;
+        }
+    }
+  return maxindex;
+}
+
+static void swap_matrix(double** a, int i, int j)
+{
+  double* v = a[i];
+  a[i] = a[j];
+  a[j] = v;
+}
+
+static void swap_vector(int* a, int i, int j)
+{
+  int v = a[i];
+  a[i] = a[j];
+  a[j] = v;
+}
+
+static bool lu_pivoting(double** a, int* idx, int n)
+{
+  int v;
+
+  for (int i=0; i<n; i++)
+    idx[i] = i;
+
+  for (int i=0; i<n; i++)
+    {
+      v = max_index(a, n, i);
+      assert(v >= 0);
+      if (fabs(a[v][i]) < 1e-10)
+        {
+          return false; // singular matrix!
+        }
+
+      swap_matrix(a, i, v);
+      swap_vector(idx, i, v);
+
+      a[i][i] = 1.0/a[i][i];
+      for (int j=i+1; j<n; j++)
+        {
+          a[j][i] = a[j][i] * a[i][i];
+          for (int k=i+1; k<n; k++)
+            a[j][k] = a[j][k] - a[j][i] * a[i][k];
+        }
+    }
+  return true;
+}
+
+static void solvep(double **a, double *x, double *b, int n)
+{
+  double *y = new double [n];
+
+  for (int i=0; i<n; i++)
+    {
+      y[i] = b[i];
+      for (int j=0; j<i; j++)
+        y[i] -= a[i][j] * y[j];
+    }
+
+  for (int i=n-1; i>=0; i--)
+    {
+      x[i] = y[i];
+      for (int j=i+1; j<n; j++)
+        x[i] -= a[i][j] * x[j];
+      x[i] *= a[i][i]; // needed because a[i][i] is inverted
+    }
+  delete[] y;
+}
+
+static void transpose(double **a, int n)
+{
+  double v;
+  for (int i=0; i<n; i++)
+    {
+      for (int j=0; j<i; j++)
+        {
+          v = a[i][j];
+          a[i][j] = a[j][i];
+          a[j][i] = v;
+        }
+    }
+}
+
+static bool invert(double **a, double **inv, int n)
+{
+  int* idx = new int [n];
+
+  double **e = makematrix(n,n,0);
+
+  if(!lu_pivoting(a, idx, n))
+    return false;
+
+  for (int i=0; i<n; i++)
+    e[idx[i]][i] = 1; // transposed
+
+  delete[] idx;
+
+  for (int i=0; i<n; i++)
+    solvep(a, inv[i], e[i], n);
+
+  deletematrix(e, n);
+  transpose(inv, n); // transpose inv to make the inverted matrix of a
+  return true;
+}
+
+static bool calculate_inv_sum(double **matrix, int alpha_size, double tau, double* inv_sum)
+{
+  double **m = makematrix(alpha_size, alpha_size, 0);
+  double **y = makematrix(alpha_size, alpha_size, 0);
+
+  for (int i=0; i<alpha_size; i++)
+    for (int j=0; j<alpha_size; j++)
+      m[i][j] = exp(tau * matrix[i][j]);
+
+  if(!invert(m, y, alpha_size))
+    return false;
+
+  *inv_sum = summatrix(y, alpha_size, alpha_size);
+
+  deletematrix(m, alpha_size);
+  deletematrix(y, alpha_size);
+  return true;
+}
+
+namespace cbrc{
+
+void LambdaCalculator::setBad(){
+  lambda_ = -1;
+  letterProbs1_.clear();
+  letterProbs2_.clear();
+}
+
+bool LambdaCalculator::find_ub(double **matrix, int alpha_size, double *ub)
+{
+  double r_max_min = DBL_MAX;
+  double c_max_min = DBL_MAX;
+  double r_max;
+  double c_max;
+
+  double r_min;
+  double c_min;
+
+  int l_r = 0;
+  int l_c = 0;
+
+  for (int i=0; i<alpha_size; i++)
+    {
+      r_max = -DBL_MAX;
+      r_min = DBL_MAX;
+      for (int j=0; j<alpha_size; j++)
+        {
+          if (matrix[i][j] > r_max)
+            r_max = matrix[i][j];
+
+          if (matrix[i][j] < r_min)
+            r_min = matrix[i][j];
+        }
+      if (r_max == 0 && r_min == 0)
+        l_r++;
+      else if (r_max <= 0 || r_min >= 0)
+        return false;
+      else if (r_max < r_max_min)
+        r_max_min = r_max;
+    }
+
+  for (int j=0; j<alpha_size; j++)
+    {
+      c_max = -DBL_MAX;
+      c_min = DBL_MAX;
+      for (int i=0; i<alpha_size; i++)
+        {
+          if (matrix[i][j] > c_max)
+            c_max = matrix[i][j];
+
+          if (matrix[i][j] < c_min)
+            c_min = matrix[i][j];
+        }
+      if (c_max == 0 && c_min == 0)
+        l_c++;
+      else if (c_max <= 0 || c_min >= 0)
+        return false;
+      else if (c_max < c_max_min)
+        c_max_min = c_max;
+    }
+
+  if (l_r == alpha_size) return false;
+
+  // the multiplication by 1.1 is sometimes necessary, presumably to
+  // prevent the upper bound from being too tight:
+  if (r_max_min > c_max_min)
+    *ub = 1.1 * log(1.0 * (alpha_size - l_r))/r_max_min;
+  else
+    *ub = 1.1 * log(1.0 * (alpha_size - l_c))/c_max_min;
+
+  return true;
+}
+
+bool LambdaCalculator::binary_search(double** matrix, int alpha_size, double lb, double ub, vector<double>& letprob1, vector<double>& letprob2, double* lambda, int maxiter)
+{
+  double l=0;
+  double r=0;
+  double l_sum=0;
+  double r_sum=0;
+  int iter=0;
+
+  while (iter < maxiter && (l>=r || (l_sum < 1.0 && r_sum < 1.0) || (l_sum > 1.0 && r_sum > 1.0)))
+    {
+      l = lb + (ub - lb)*rand()/RAND_MAX;
+      r = lb + (ub - lb)*rand()/RAND_MAX;
+
+      if (!calculate_inv_sum(matrix, alpha_size, l, &l_sum) || !calculate_inv_sum(matrix, alpha_size, r, &r_sum))
+        {
+          l = 0;
+          r = 0;
+        }
+      iter++;
+    }
+
+  if (iter >= maxiter)
+    return false;
+
+  while (l_sum != 1.0 && r_sum != 1.0 && (l+r)/2.0 != l && (l+r)/2.0 != r)
+    {
+      double mid = (l + r)/2.0;
+      double mid_sum;
+      if (!calculate_inv_sum(matrix, alpha_size, mid, &mid_sum))
+        return false;
+
+      if (fabs(mid_sum) >= DBL_MAX)
+        return false;
+
+      if ((l_sum < 1.0 && mid_sum >= 1.0) || (l_sum > 1.0 && mid_sum <= 1.0))
+        {
+          r = mid;
+          r_sum = mid_sum;
+        }
+
+      else if ((r_sum < 1.0 && mid_sum >= 1.0) || (r_sum > 1.0 && mid_sum <= 1.0))
+        {
+          l = mid;
+          l_sum = mid_sum;
+        }
+
+      else
+        return false;
+    }
+
+  if (fabs(l_sum - 1.0) < fabs(r_sum - 1.0))
+    {
+      if (check_lambda(matrix, l, alpha_size, letprob1, letprob2))
+        {
+          *lambda = l;
+          return true;
+        }
+      return false;
+    }
+
+  if (check_lambda(matrix, r, alpha_size, letprob1, letprob2))
+    {
+      *lambda = r;
+      return true;
+    }
+  return false;
+}
+
+double LambdaCalculator::calculate_lambda(double** matrix, int alpha_size, vector<double>& letprob1, vector<double>& letprob2, int maxiter, int max_boundary_search_iter, double lb_ratio)
+{
+  double ub;
+
+  if (!find_ub(matrix, alpha_size, &ub))
+    return -1;
+
+  double lb = ub*lb_ratio;
+  double lambda = -1;
+  int iter = 0;
+  bool flag = false;
+
+  while (!flag && iter < maxiter)
+    {
+      flag = binary_search(matrix, alpha_size, lb, ub, letprob1, letprob2, &lambda, max_boundary_search_iter);
+      iter++;
+    }
+
+  return lambda;
+}
+
+bool LambdaCalculator::check_lambda(double** matrix, double lambda, int alpha_size, vector<double>& letprob1, vector<double>& letprob2)
+{
+  double **m = makematrix(alpha_size, alpha_size, 0);
+  double **y = makematrix(alpha_size, alpha_size, 0);
+
+  for (int i=0; i<alpha_size; i++)
+    for (int j=0; j<alpha_size; j++)
+      m[i][j] = exp(lambda * matrix[i][j]);
+
+  invert(m, y, alpha_size);
+
+  for (int i=0; i<alpha_size; i++)
+    {
+      double p = 0;
+      for (int j=0;j<alpha_size; j++)
+        p += y[i][j];
+      if (p < 0 || p > 1)
+        {
+          letprob2.clear();
+          return false;
+        }
+      letprob2.push_back(roundToFewDigits(p));
+    }
+
+  for (int j=0; j<alpha_size; j++)
+    {
+      double q = 0;
+      for (int i=0; i<alpha_size; i++)
+        q += y[i][j];
+      if (q < 0 || q > 1)
+        {
+          letprob2.clear();
+          letprob1.clear();
+          return false;
+        }
+      letprob1.push_back(roundToFewDigits(q));
+    }
+
+  deletematrix(m, alpha_size);
+  deletematrix(y, alpha_size);
+
+  return true;
+}
+
+void LambdaCalculator::calculate(const const_int_ptr *matrix, int alphSize) {
+  assert(alphSize >= 0);
+  setBad();
+
+  int maxiter = 1000;
+  int max_boundary_search_iter = 100;
+  double lb_ratio = 1e-6;
+
+  double** mat = makematrix(alphSize, alphSize, 0);
+  for (int i=0; i<alphSize; i++)
+    for (int j=0; j<alphSize; j++)
+      mat[i][j] = matrix[i][j];
+  lambda_ = calculate_lambda(mat, alphSize, letterProbs1_, letterProbs2_, maxiter, max_boundary_search_iter, lb_ratio);
+  deletematrix(mat, alphSize);
+}
+}


=====================================
src/mcf_score_matrix_probs.hh → src/LambdaCalculator.hh
=====================================
@@ -1,4 +1,5 @@
-// Copyright 2010 Martin C. Frith
+// Copyright 2008 Michiaki Hamada
+// Modified 2015 Yutaro Konta
 
 // This class calculates the scale factor (lambda), and the letter
 // probabilities, that are implicit in a scoring matrix.  The
@@ -9,23 +10,20 @@
 // probabilities should be identical.  With this code, they might
 // differ minutely from exact identity.
 
-#ifndef MCF_SCORE_MATRIX_PROBS_HH
-#define MCF_SCORE_MATRIX_PROBS_HH
+#ifndef LAMBDA_CALCULATOR_HH
+#define LAMBDA_CALCULATOR_HH
 
 #include <vector>
 
-namespace mcf {
+namespace cbrc{
 
 typedef const int *const_int_ptr;
 
-class ScoreMatrixProbs {
+class LambdaCalculator{
  public:
-  ScoreMatrixProbs() { setBad(); }
+  LambdaCalculator() { setBad(); }
 
-  ScoreMatrixProbs(const const_int_ptr *scoreMatrix, unsigned alphabetSize)
-  { init(scoreMatrix, alphabetSize); }
-
-  void init(const const_int_ptr *scoreMatrix, unsigned alphabetSize);
+  void calculate(const const_int_ptr *matrix, int alphSize);
 
   // Put us in the bad/undefined state.
   void setBad();
@@ -37,19 +35,24 @@ class ScoreMatrixProbs {
   double lambda() const { return lambda_; }
 
   // The probabilities of letters corresponding to matrix rows (1st index).
-  // In the bad/undefined state, it is an empty vector.
-  const std::vector<double>& letterProbs1() const { return letterProbs1_; }
+  // In the bad/undefined state, it is a null pointer.
+  const double *letterProbs1() const {return isBad() ? 0 : &letterProbs1_[0];}
 
   // The probabilities of letters corresponding to matrix columns (2nd index).
-  // In the bad/undefined state, it is an empty vector.
-  const std::vector<double>& letterProbs2() const { return letterProbs2_; }
+  // In the bad/undefined state, it is a null pointer.
+  const double *letterProbs2() const {return isBad() ? 0 : &letterProbs2_[0];}
 
  private:
   double lambda_;
   std::vector<double> letterProbs1_;
   std::vector<double> letterProbs2_;
+
+  bool find_ub(double **matrix, int alpha_size, double *ub);
+  bool binary_search(double** matrix, int alpha_size, double lb, double ub, std::vector<double>& letprob1, std::vector<double>& letprob2, double* lambda, int maxiter);
+  double calculate_lambda(double** matrix, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2, int maxiter, int max_boundary_search_iter, double lb_ratio);
+  bool check_lambda(double** matrix, double lambda, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2);
 };
 
-}
+}  // end namespace
 
 #endif


=====================================
src/Makefile
=====================================
@@ -2,20 +2,13 @@ CXXFLAGS = -O3 -Wall -W -Wcast-qual -Wswitch-enum -Wundef	\
 -Wcast-align -Wold-style-cast
 # -Wconversion
 
-CFLAGS = -Wall
-
-COBJ = CA_code/lambda_calculator.o
-
 all: tantan
 
-tantan: *.cc *.hh version.hh Makefile $(COBJ)
-	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ *.cc $(COBJ)
-
-$(COBJ): CA_code/*.c CA_code/*.h Makefile
-	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ CA_code/lambda_calculator.c
+tantan: *.cc *.hh version.hh Makefile
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ *.cc
 
 clean:
-	rm -f tantan $(COBJ)
+	rm -f tantan
 
 VERSION = \"`hg id -n`\"
 


=====================================
src/mcf_score_matrix_probs.cc deleted
=====================================
@@ -1,45 +0,0 @@
-// Copyright 2010 Martin C. Frith
-
-#include "mcf_score_matrix_probs.hh"
-
-namespace lambda {
-extern "C" {
-#include "CA_code/lambda_calculator.h"
-}
-}
-
-namespace mcf {
-
-void ScoreMatrixProbs::setBad() {
-  lambda_ = -1;
-  letterProbs1_.clear();
-  letterProbs2_.clear();
-}
-
-void ScoreMatrixProbs::init(const const_int_ptr *scoreMatrix,
-                            unsigned alphabetSize) {
-  // We need to pass the parameters as 1-based pointers, hence the +1s
-  // and -1s.
-
-  std::vector<double> cells(alphabetSize * alphabetSize + 1);
-  std::vector<const double*> pointers(alphabetSize + 1);
-
-  for (unsigned i = 0; i < alphabetSize; ++i) {
-    pointers[i+1] = &cells[i * alphabetSize];
-
-    for (unsigned j = 0; j < alphabetSize; ++j) {
-      cells[i * alphabetSize + j + 1] = scoreMatrix[i][j];
-    }
-  }
-
-  letterProbs1_.resize(alphabetSize);
-  letterProbs2_.resize(alphabetSize);
-
-  lambda_ = lambda::calculate_lambda(&pointers[0], alphabetSize,
-                                     &letterProbs1_[0] - 1,
-                                     &letterProbs2_[0] - 1);
-
-  if (lambda_ < 0) setBad();
-}
-
-}


=====================================
src/mcf_tantan_options.cc
=====================================
@@ -52,6 +52,8 @@ TantanOptions::TantanOptions() :
     repeatEndProb(0.05),
     maxCycleLength(-1),  // depends on isProtein
     repeatOffsetProbDecay(0.9),
+    matchScore(0),
+    mismatchCost(0),
     gapExistenceCost(0),
     gapExtensionCost(-1),  // means: no gaps
     minMaskProb(0.5),
@@ -75,6 +77,8 @@ Options (default settings):\n\
  -w  maximum tandem repeat period to consider (100, but -p selects 50)\n\
  -d  probability decay per period ("
       + stringify(repeatOffsetProbDecay) + ")\n\
+ -i  match score (1, but -p selects BLOSUM62)\n\
+ -j  mismatch cost (1, but -p selects BLOSUM62)\n\
  -a  gap existence cost ("
       + stringify(gapExistenceCost) + ")\n\
  -b  gap extension cost (infinite: no gaps)\n\
@@ -94,7 +98,7 @@ Home page: http://www.cbrc.jp/tantan/\n\
 #include "version.hh"
       "\n";
 
-  const char *optstring = "px:cm:r:e:w:d:a:b:s:f:h";
+  const char *optstring = "px:cm:r:e:w:d:i:j:a:b:s:f:h";
 
   int i;
   while ((i = myGetopt(argc, argv, optstring, help, version)) != -1) {
@@ -132,6 +136,16 @@ Home page: http://www.cbrc.jp/tantan/\n\
         if (repeatOffsetProbDecay <= 0 || repeatOffsetProbDecay > 1)
           badopt(c, optarg);
         break;
+      case 'i':
+	unstringify(matchScore, optarg);
+	if (matchScore <= 0)
+	  badopt(c, optarg);
+	break;
+      case 'j':
+	unstringify(mismatchCost, optarg);
+	if (mismatchCost <= 0)
+	  badopt(c, optarg);
+	break;
       case 'a':
         unstringify(gapExistenceCost, optarg);
         break;


=====================================
src/mcf_tantan_options.hh
=====================================
@@ -18,6 +18,8 @@ struct TantanOptions {
   double repeatEndProb;
   int maxCycleLength;
   double repeatOffsetProbDecay;
+  int matchScore;
+  int mismatchCost;
   int gapExistenceCost;
   int gapExtensionCost;
   double minMaskProb;


=====================================
src/tantan.cc
=====================================
@@ -99,7 +99,7 @@ struct Tantan {
     //f2g = firstGapProb;
     //g2f = 1 - otherGapProb;
     oneGapProb = firstGapProb * (1 - otherGapProb);
-    endGapProb = firstGapProb * 1;
+    endGapProb = firstGapProb * (maxRepeatOffset > 1);
     f2f0 = 1 - repeatEndProb;
     f2f1 = 1 - repeatEndProb - firstGapProb;
     f2f2 = 1 - repeatEndProb - firstGapProb * 2;
@@ -125,8 +125,7 @@ struct Tantan {
   double forwardTotal() {
     double fromForeground = std::accumulate(foregroundProbs.begin(),
                                             foregroundProbs.end(), 0.0);
-    fromForeground *= f2b;
-    double total = backgroundProb * b2b + fromForeground;
+    double total = backgroundProb * b2b + fromForeground * f2b;
     assert(total > 0);
     return total;
   }
@@ -148,36 +147,31 @@ struct Tantan {
     double f = *foregroundPtr;
     double fromForeground = f;
 
-    if (insertionProbs.empty()) {
-      *foregroundPtr = fromBackground + f * f2f0;
-    } else {
-      double *insertionPtr = &insertionProbs.back();
-      double i = *insertionPtr;
-      *foregroundPtr = fromBackground + f * f2f1 + i * endGapProb;
-      double d = f;
-      --foregroundPtr;
-      fromBackground *= b2fGrowth;
-
-      while (foregroundPtr > &foregroundProbs.front()) {
-        f = *foregroundPtr;
-        fromForeground += f;
-        i = *(insertionPtr - 1);
-        *foregroundPtr = fromBackground + f * f2f2 + (i + d) * oneGapProb;
-        *insertionPtr = f + i * g2g;
-        d = f + d * g2g;
-        --foregroundPtr;
-        --insertionPtr;
-        fromBackground *= b2fGrowth;
-      }
+    double *insertionPtr = &insertionProbs.back();
+    double i = *insertionPtr;
+    *foregroundPtr = fromBackground + f * f2f1 + i * endGapProb;
+    double d = f;
+    --foregroundPtr;
+    fromBackground *= b2fGrowth;
 
+    while (foregroundPtr > &foregroundProbs.front()) {
       f = *foregroundPtr;
       fromForeground += f;
-      *foregroundPtr = fromBackground + f * f2f1 + d * endGapProb;
-      *insertionPtr = f;
+      i = *(insertionPtr - 1);
+      *foregroundPtr = fromBackground + f * f2f2 + (i + d) * oneGapProb;
+      *insertionPtr = f + i * g2g;
+      d = f + d * g2g;
+      --foregroundPtr;
+      --insertionPtr;
+      fromBackground *= b2fGrowth;
     }
 
-    fromForeground *= f2b;
-    backgroundProb = backgroundProb * b2b + fromForeground;
+    f = *foregroundPtr;
+    fromForeground += f;
+    *foregroundPtr = fromBackground + f * f2f1 + d * endGapProb;
+    *insertionPtr = f;
+
+    backgroundProb = backgroundProb * b2b + fromForeground * f2b;
   }
 
   void calcBackwardTransitionProbsWithGaps() {
@@ -186,37 +180,32 @@ struct Tantan {
     double f = *foregroundPtr;
     double toForeground = f;
 
-    if (insertionProbs.empty()) {
-      *foregroundPtr = toBackground + f2f0 * f;
-    } else {
-      double *insertionPtr = &insertionProbs.front();
-      double i = *insertionPtr;
-      *foregroundPtr = toBackground + f2f1 * f + i;
-      double d = endGapProb * f;
-      ++foregroundPtr;
-      toForeground *= b2fGrowth;
-
-      while (foregroundPtr < &foregroundProbs.back()) {
-        f = *foregroundPtr;
-        toForeground += f;
-        i = *(insertionPtr + 1);
-        *foregroundPtr = toBackground + f2f2 * f + (i + d);
-        double oneGapProb_f = oneGapProb * f;
-        *insertionPtr = oneGapProb_f + g2g * i;
-        d = oneGapProb_f + g2g * d;
-        ++foregroundPtr;
-        ++insertionPtr;
-        toForeground *= b2fGrowth;
-      }
+    double *insertionPtr = &insertionProbs.front();
+    double i = *insertionPtr;
+    *foregroundPtr = toBackground + f2f1 * f + i;
+    double d = endGapProb * f;
+    ++foregroundPtr;
+    toForeground *= b2fGrowth;
 
+    while (foregroundPtr < &foregroundProbs.back()) {
       f = *foregroundPtr;
       toForeground += f;
-      *foregroundPtr = toBackground + f2f1 * f + d;
-      *insertionPtr = endGapProb * f;
+      i = *(insertionPtr + 1);
+      *foregroundPtr = toBackground + f2f2 * f + (i + d);
+      double oneGapProb_f = oneGapProb * f;
+      *insertionPtr = oneGapProb_f + g2g * i;
+      d = oneGapProb_f + g2g * d;
+      ++foregroundPtr;
+      ++insertionPtr;
+      toForeground *= b2fGrowth;
     }
 
-    toForeground *= b2fLast;
-    backgroundProb = b2b * backgroundProb + toForeground;
+    f = *foregroundPtr;
+    toForeground += f;
+    *foregroundPtr = toBackground + f2f1 * f + d;
+    *insertionPtr = endGapProb * f;
+
+    backgroundProb = b2b * backgroundProb + b2fLast * toForeground;
   }
 
   void calcForwardTransitionProbs() {
@@ -235,8 +224,7 @@ struct Tantan {
       fromBackground *= b2fGrowth;
     }
 
-    fromForeground *= f2b;
-    backgroundProb = backgroundProb * b2b + fromForeground;
+    backgroundProb = backgroundProb * b2b + fromForeground * f2b;
   }
 
   void calcBackwardTransitionProbs() {
@@ -255,8 +243,7 @@ struct Tantan {
       ++foregroundPtr;
     }
 
-    toForeground *= b2fLast;
-    backgroundProb = b2b * backgroundProb + toForeground;
+    backgroundProb = b2b * backgroundProb + b2fLast * toForeground;
   }
 
   void addEndCounts(double forwardProb,
@@ -281,12 +268,17 @@ struct Tantan {
     }
   }
 
-  void calcEmissionProbs() {
-    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+  bool isNearSeqBeg() {
+    return seqPtr - seqBeg < maxRepeatOffset;
+  }
 
-    bool isNearSeqBeg = (seqPtr - seqBeg < maxRepeatOffset);
-    const uchar *seqStop = isNearSeqBeg ? seqBeg : seqPtr - maxRepeatOffset;
+  const uchar *seqFurthestBack() {
+    return isNearSeqBeg() ? seqBeg : seqPtr - maxRepeatOffset;
+  }
 
+  void calcEmissionProbs() {
+    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+    const uchar *seqStop = seqFurthestBack();
     double *foregroundPtr = BEG(foregroundProbs);
     const uchar *offsetPtr = seqPtr;
 


=====================================
src/tantan_app.cc
=====================================
@@ -6,10 +6,10 @@
 #include "mcf_alphabet.hh"
 #include "mcf_fasta_sequence.hh"
 #include "mcf_score_matrix.hh"
-#include "mcf_score_matrix_probs.hh"
 #include "mcf_tantan_options.hh"
 #include "mcf_util.hh"
 #include "tantan.hh"
+#include "LambdaCalculator.hh"
 
 #include <algorithm>  // copy, fill_n
 #include <cassert>
@@ -73,29 +73,42 @@ void initScoresAndProbabilities() {
 
   ScoreMatrix scoreMatrix;
 
-  if (options.scoreMatrixFileName)
+  if (options.scoreMatrixFileName) {
     unfilify(scoreMatrix, options.scoreMatrixFileName);
-  else if (options.isProtein)
-    unstringify(scoreMatrix, ScoreMatrix::blosum62);
-  else
-    scoreMatrix.initMatchMismatch(1, 1, "ACGTU");  // allow for RNA
+  } else if (options.isProtein) {
+    if (options.matchScore || options.mismatchCost) {
+      scoreMatrix.initMatchMismatch(std::max(options.matchScore, 1),
+				    std::max(options.mismatchCost, 1),
+				    Alphabet::protein);
+    } else {
+      unstringify(scoreMatrix, ScoreMatrix::blosum62);
+    }
+  } else {
+    scoreMatrix.initMatchMismatch(std::max(options.matchScore, 1),
+				  std::max(options.mismatchCost, 1),
+				  "ACGTU");  // allow for RNA
+  }
 
   scoreMatrix.makeFastMatrix(fastMatrixPointers, scoreMatrixSize,
                              alphabet.lettersToNumbers,
                              scoreMatrix.minScore(), false);
 
-  ScoreMatrixProbs smp(fastMatrixPointers, alphabet.size);
-  if (smp.isBad())
+  cbrc::LambdaCalculator matCalc;
+  matCalc.calculate(fastMatrixPointers, alphabet.size);
+  if (matCalc.isBad())
     throw Error("can't calculate probabilities for this score matrix");
+  double matrixLambda = matCalc.lambda();
 
-  for (int i = 0; i < scoreMatrixSize; ++i)
-    for (int j = 0; j < scoreMatrixSize; ++j)
-      probMatrix[i][j] = std::exp(smp.lambda() * fastMatrix[i][j]);
+  for (int i = 0; i < scoreMatrixSize; ++i) {
+    for (int j = 0; j < scoreMatrixSize; ++j) {
+      probMatrix[i][j] = std::exp(matrixLambda * fastMatrix[i][j]);
+    }
+  }
 
   if (options.gapExtensionCost > 0) {
     int firstGapCost = options.gapExistenceCost + options.gapExtensionCost;
-    firstGapProb = std::exp(-smp.lambda() * firstGapCost);
-    otherGapProb = std::exp(-smp.lambda() * options.gapExtensionCost);
+    firstGapProb = std::exp(-matrixLambda * firstGapCost);
+    otherGapProb = std::exp(-matrixLambda * options.gapExtensionCost);
 
     // the gap existence cost includes the gap ending cost:
     firstGapProb /= (1 - otherGapProb);
@@ -103,7 +116,7 @@ void initScoresAndProbabilities() {
     // XXX check if firstGapProb is too high
   }
 
-  //std::cerr << "lambda: " << smp.lambda() << "\n";
+  //std::cerr << "lambda: " << matrixLambda << "\n";
   //std::cerr << "firstGapProb: " << firstGapProb << "\n";
   //std::cerr << "otherGapProb: " << otherGapProb << "\n";
 }


=====================================
src/version.hh
=====================================
@@ -1 +1 @@
-"13"
+"18"


=====================================
test/tantan_test.out
=====================================
@@ -1487,3 +1487,5 @@ IIIIIIIIIIIIIIIEI6IIII7I8I&IBI(3<@1.3,&:9/'(-
 GTGGTCCACATATACAATGGAATATTACTCAGCTATCAGAAAGAA
 +SRR019778.100 I331_2_FC3076HAAXX:1:1:1682:1290 length=45
 IIIIIIIIIIIIIIIIDIII2BDHIIHCF=/-6A1921*/1-*-7
+
+205


=====================================
test/tantan_test.sh
=====================================
@@ -8,7 +8,7 @@ cd $(dirname $0)
 PATH=../src:$PATH
 
 countLowercaseLetters () {
-    grep -v '^>' "$@" | tr -cd a-z | wc -c
+    grep -v '^>' "$@" | tr -cd a-z | wc -c | tr -d ' '
 }
 
 {
@@ -27,5 +27,7 @@ countLowercaseLetters () {
     tantan -p -a11 -b2 titin_human.fa | countLowercaseLetters
     echo
     tantan panda.fastq
-} |
-diff tantan_test.out -
+    echo
+    tantan -i2 -j3 hg19_chrM.fa | countLowercaseLetters
+} 2>&1 |
+diff -u tantan_test.out -



View it on GitLab: https://salsa.debian.org/med-team/tantan/commit/e07c7bf4fd74924aa5cb856a2966f8850e974ac7

-- 
View it on GitLab: https://salsa.debian.org/med-team/tantan/commit/e07c7bf4fd74924aa5cb856a2966f8850e974ac7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181217/1a0d3e13/attachment-0001.html>