[med-svn] [Git][med-team/tantan][master] 6 commits: New upstream version 18
Andreas Tille
gitlab at salsa.debian.org
Mon Dec 17 13:01:43 GMT 2018
Andreas Tille pushed to branch master at Debian Med / tantan
Commits:
e07c7bf4 by Andreas Tille at 2018-12-17T12:52:39Z
New upstream version 18
- - - - -
df53f7d8 by Andreas Tille at 2018-12-17T12:52:40Z
Update upstream source from tag 'upstream/18'
Update to upstream version '18'
with Debian dir 346523c3721d5081fc1c8cf6e9162b08053677bd
- - - - -
b0e15b98 by Andreas Tille at 2018-12-17T12:52:40Z
New upstream version
- - - - -
facd45e7 by Andreas Tille at 2018-12-17T12:52:42Z
Standards-Version: 4.2.1
- - - - -
dd34bb96 by Andreas Tille at 2018-12-17T12:58:47Z
Adapt patch
- - - - -
417b5568 by Andreas Tille at 2018-12-17T13:00:22Z
Upload to unstable
- - - - -
23 changed files:
- ChangeLog.txt
- README.html
- README.txt
- debian/changelog
- debian/control
- debian/patches/buildflags.patch
- − src/CA_code/lambda_calculator.c
- − src/CA_code/lambda_calculator.h
- − src/CA_code/lubksb.c
- − src/CA_code/ludcmp.c
- − src/CA_code/nrutil.c
- − src/CA_code/nrutil.h
- + src/LambdaCalculator.cc
- src/mcf_score_matrix_probs.hh → src/LambdaCalculator.hh
- src/Makefile
- − src/mcf_score_matrix_probs.cc
- src/mcf_tantan_options.cc
- src/mcf_tantan_options.hh
- src/tantan.cc
- src/tantan_app.cc
- src/version.hh
- test/tantan_test.out
- test/tantan_test.sh
Changes:
=====================================
ChangeLog.txt
=====================================
@@ -1,8 +1,36 @@
+2018-12-10 Martin C. Frith <Martin C. Frith>
+
+ * README.txt, src/mcf_tantan_options.cc, src/mcf_tantan_options.hh,
+ src/tantan_app.cc, test/tantan_test.out, test/tantan_test.sh:
+ Add match score, mismatch cost options
+ [2383404c795a] [tip]
+
+ * src/tantan.cc:
+ Refactor
+ [d61af119db30]
+
+ * src/tantan.cc:
+ Refactor
+ [81bdfe23217d]
+
+ * src/CA_code/lambda_calculator.c, src/CA_code/lambda_calculator.h,
+ src/CA_code/lubksb.c, src/CA_code/ludcmp.c, src/CA_code/nrutil.c,
+ src/CA_code/nrutil.h, src/LambdaCalculator.cc,
+ src/LambdaCalculator.hh, src/Makefile,
+ src/mcf_score_matrix_probs.cc, src/mcf_score_matrix_probs.hh,
+ src/tantan_app.cc:
+ Use Konta-san's code for matrix lambda
+ [fccc8e5e9c1b]
+
+ * src/tantan.cc, src/tantan_app.cc, test/tantan_test.sh:
+ Refactor
+ [f9a9da99553d]
+
2012-10-16 Martin C. Frith <Martin C. Frith>
* README.txt:
Expanded the documentation: installation & FAQ
- [946a951b1a06] [tip]
+ [946a951b1a06]
* src/CA_code/nrutil.c:
Just fixed a compiler warning
=====================================
README.html
=====================================
@@ -465,6 +465,12 @@ repeats, so it's easy to lift the masking after determining homology.</p>
<kbd><span class="option">-d</span></kbd></td>
<td>probability decay per period (period-(i+1) / period-i)</td></tr>
<tr><td class="option-group">
+<kbd><span class="option">-i</span></kbd></td>
+<td>match score</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">-j</span></kbd></td>
+<td>mismatch cost</td></tr>
+<tr><td class="option-group">
<kbd><span class="option">-a</span></kbd></td>
<td>gap existence cost</td></tr>
<tr><td class="option-group">
=====================================
README.txt
=====================================
@@ -141,6 +141,8 @@ Options
-e probability of a repeat ending per position
-w maximum tandem repeat period to consider
-d probability decay per period (period-(i+1) / period-i)
+-i match score
+-j mismatch cost
-a gap existence cost
-b gap extension cost
-s minimum repeat probability for masking
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+tantan (18-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+ * Standards-Version: 4.2.1
+
+ -- Andreas Tille <tille at debian.org> Mon, 17 Dec 2018 13:58:58 +0100
+
tantan (13-5) unstable; urgency=medium
* Team upload.
=====================================
debian/control
=====================================
@@ -4,7 +4,7 @@ Uploaders: Sascha Steinbiss <sascha at steinbiss.name>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~)
-Standards-Version: 4.1.4
+Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/tantan
Vcs-Git: https://salsa.debian.org/med-team/tantan.git
Homepage: http://www.cbrc.jp/tantan/
=====================================
debian/patches/buildflags.patch
=====================================
@@ -3,33 +3,25 @@ Description: add buildflags
Author: Sascha Steinbiss <sascha at steinbiss.name>
--- a/src/Makefile
+++ b/src/Makefile
-@@ -1,17 +1,21 @@
+@@ -1,11 +1,14 @@
-CXXFLAGS = -O3 -Wall -W -Wcast-qual -Wswitch-enum -Wundef \
-+#CXXFLAGS += -O3 -g -Wall -W -Wcast-qual -Wswitch-enum -Wundef \
- -Wcast-align -Wold-style-cast
+--Wcast-align -Wold-style-cast
++#CXXFLAGS = -O3 -Wall -W -Wcast-qual -Wswitch-enum -Wundef \
++#-Wcast-align -Wold-style-cast
# -Wconversion
--CFLAGS = -Wall
-+#CFLAGS += -Wall
+ all: tantan
- COBJ = CA_code/lambda_calculator.o
+-tantan: *.cc *.hh version.hh Makefile
+- $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ *.cc
+CCSRCS = $(sort $(wildcard *.cc))
+CCHDRS = $(sort $(wildcard *.hh))
-+CACODESRCS = $(sort $(wildcard CA_code/*.c))
-+CACODEHDRS = $(sort $(wildcard CA_code/*.h))
-
- all: tantan
-
--tantan: *.cc *.hh version.hh Makefile $(COBJ)
-- $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ *.cc $(COBJ)
-+tantan: $(CCSRCS) $(CCHDRS) version.hh Makefile $(COBJ)
-+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $(CCSRCS) $(COBJ)
-
--$(COBJ): CA_code/*.c CA_code/*.h Makefile
-+$(COBJ): $(CACODESRCS) $(CACODEHDRS) Makefile
- $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ CA_code/lambda_calculator.c
++
++tantan: $(CCSRCS) $(CCHDRS) version.hh Makefile
++ $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $(CCSRCS)
clean:
+ rm -f tantan
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,5 @@
=====================================
src/CA_code/lambda_calculator.c deleted
=====================================
@@ -1,465 +0,0 @@
-// Copyright 2008 Michiaki Hamada
-// Adapted from public domain code by Yi-Kuo Yu, NCBI
-
-/**
- * See lambda_calculator.h
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "nrutil.h"
-int Alphsize;
-#include "nrutil.c"
-#include "ludcmp.c"
-#include "lubksb.c"
-#include "lambda_calculator.h"
-#define Epsilon 1.0e-36
-#define E_bound 1.0e-12
-#define Infty 1000000.0
-#define min(A,B) ((A) > (B) ? (B) : (A) )
-#define max(A,B) ((A) > (B) ? (A) : (B) )
-#define bool int
-#define true 1
-#define false 0
-double Lambda_UB; //lambda upper bound
-double r_max_m, c_max_m; //min of each row's (column's) max
-
-void makematrix( const double** mat_b, double **a, double lambda);
-typedef struct Lambda {
- double min;
- double max;
- int flag; // 1 means there is a range, -1 means no solution possible.
-} Lambda;
-typedef struct Sum {
- double value;
- int flag; // 1 means no negative bg_freq, -1 means there is negative bg_freq
-} Sum;
-Lambda Find_JP(const double** mat_b, double la_min, double la_max, double **JP, double *p_in, double *q_in);
-Sum Check_root(const double** mat_b, double **a, double lambda, double *p, double *q);
-double Check_det(const double** mat_b, double **a, double lambda);
-Sum Nail_lambda(const double** mat_b, int flag_sign, double lambda_min, double lambda_max, double *p, double *q, double *la_add);
-double Nail_det(const double** mat_b, int flag_sign, double lambda_min, double lambda_max);
-bool Check_range( const double** mat_b );
-
-double *Locate_det_zero( const double** mat_b, int * ); //pointer to root list are returned with how many of them by int
-
-
-double calculate_lambda ( const double** mat_b, int alpha_size,
- double* p, double* q )
-{
- double **JP/*, *q, *p*/;
- int k;
- double *root_location;
- int N_root;
- Lambda Lambda_local;
-
- Alphsize = alpha_size;
-
- if( ! Check_range( mat_b ) ) return -1.0;
-
- root_location = Locate_det_zero(mat_b, &N_root);
- if( root_location == NULL && N_root > 0 ) return -1.0;
-
- //q=dvector(1,Alphsize);
- //p=dvector(1,Alphsize);
- JP = dmatrix(1,Alphsize,1,Alphsize);
-
- if (N_root == 0){
- Lambda_local = Find_JP(mat_b, 0, Lambda_UB, JP, p, q);
- if (1 == Lambda_local.flag) { // sensible solution found
- // Remember to find the right place to free the vectors
- //free_dvector(p, 1,Alphsize);
- //free_dvector(q, 1,Alphsize);
- free( root_location );
- free_dmatrix( JP, 1, Alphsize, 1, Alphsize );
- return (Lambda_local.min + Lambda_local.max) / 2.0;
- }
- else if (-1 == Lambda_local.flag) {
- //printf("matrix pass first screening but no sensible solution found. :-( \n");
- }
- }
- else if (N_root > 0) {
- //printf("N_root = %d for this matirx \n", N_root);
- //for (i=0;i<N_root;i++) printf("N_root[%d] = %lf\n",i,root_location[i]);
- for (k=0; k<= N_root; k++){
- if (k==0) {Lambda_local.min =0; Lambda_local.max = root_location[0];}
- else if (k== N_root) {Lambda_local.min = root_location[N_root-1]; Lambda_local.max = Lambda_UB+Epsilon;}
- else {Lambda_local.min = root_location[k-1]; Lambda_local.max = root_location[k]; }
- Lambda_local = Find_JP(mat_b, Lambda_local.min, Lambda_local.max, JP, p, q);
- if (1 == Lambda_local.flag) { // sensible solution found
- //free_dvector(p, 1,Alphsize);
- //free_dvector(q, 1,Alphsize);
- free( root_location );
- free_dmatrix( JP, 1, Alphsize, 1, Alphsize );
- return (Lambda_local.min + Lambda_local.max) / 2.0;
- }
- else if (-1 == Lambda_local.flag) {
- //printf("matrix pass first screening but still no sensible solution found. :-( \n");
- }
- }
- }
- // Remember to find the right place to free the vectors
- //free_dvector(p, 1,Alphsize);
- //free_dvector(q, 1,Alphsize);
- free( root_location );
- free_dmatrix( JP, 1, Alphsize, 1, Alphsize );
- return -1.0;
-}
-
-bool Check_range( const double** mat_b )
-{
- int i,j;
- int pos_flag_r, neg_flag_r;
- int pos_flag_c, neg_flag_c;
- double r_max, c_max; //max of each row (or column)
-
- int L_r=0, L_c=0; // number of zero-score rows (columns)
-
- // First make sure each row and column have both pos and neg entries
- r_max_m = c_max_m = 100000000000.0;
- for (i=1;i<=Alphsize; i++) {
- r_max = 0; c_max = 0;
- pos_flag_r = -1; neg_flag_r = -1; pos_flag_c = -1; neg_flag_c = -1;
- for (j = 1; j<= Alphsize; j++){
- if (mat_b[i][j] > 0) {
- if (mat_b[i][j]> r_max) r_max = mat_b[i][j];
- pos_flag_r = 1;
- }
- else if (mat_b[i][j] < 0) neg_flag_r = 1;
- if (mat_b[j][i] > 0) {
- if (mat_b[j][i] > c_max) c_max = mat_b[j][i];
- pos_flag_c = 1;
- }
- else if (mat_b[j][i] < 0) neg_flag_c = 1;
- }
- if ((pos_flag_r == -1)||(neg_flag_r == -1)||(pos_flag_c == -1)||(neg_flag_c == -1)) {
- if ((pos_flag_r == -1)&&(neg_flag_r == -1)){
- printf ("only zero score at row %d\n", i);
- L_r++;
- }
- else if ((pos_flag_c == -1)&& (neg_flag_c == -1)){
- printf ("only zero score at column %d\n", i);
- L_c++;
- }
- else{
- //printf("all positive or all negative at row or column %d\n", i);
- //printf("therefore invalid matrix. exit now. \n");
- return false;
- //exit(1);
- }
- }
- if ((r_max < r_max_m)&&(r_max > 0)) r_max_m = r_max;
- if ((c_max < c_max_m)&&(c_max > 0)) c_max_m = c_max;
- }
-
-
- // Find the upper bound for lambda
- if (r_max_m > c_max_m) {
- Lambda_UB = 1.1*log(1.0*Alphsize-L_r)/r_max_m ;
- }
- else {
- Lambda_UB = 1.1*log(1.0*Alphsize-L_c)/c_max_m;
- }
- //printf("the upper bound for lambda is %lf\n", Lambda_UB);
- return true;
-}
-
-
-
-double Check_det(const double** mat_b, double **a, double lambda)
-{
- double d;
- int i, /*j,*/ *indx;
-
- indx = ivector(1,Alphsize);
- makematrix(mat_b, a,lambda);
- ludcmp(a,Alphsize,indx,&d);
- for(i=1;i<=Alphsize;i++) d *= a[i][i];
- free_ivector(indx,1,Alphsize);
- return d; //returning the determinant
-}
-
-
-Sum Check_root(const double** mat_b, double **a, double lambda, double *p, double *q)
-{
- double **y, /* *col,*/ d;
- //double sum = 0.0;
- int i,j;//, *indx;
- Sum Sum_here;
-
- y=dmatrix(1,Alphsize,1,Alphsize);
- //indx = ivector(1,Alphsize);
- int indx[Alphsize+1];
- //col = dvector(1,Alphsize);
- double col[Alphsize+1];
-
- makematrix(mat_b, a,lambda);
- ludcmp(a,Alphsize,indx,&d);
- Sum_here.value = 0.0;
- for(i=1;i<=Alphsize;i++) q[i]=0.0;
- for(j=1;j<=Alphsize;j++){
- for (i=1;i<=Alphsize;i++) col[i]=0.0;
- col[j]=1.0;
- lubksb(a,Alphsize,indx,col);
- p[j]=0.0;
- for (i=1;i<=Alphsize;i++) {
- y[i][j]=col[i]; Sum_here.value += y[i][j];
- p[j]+=y[i][j]; q[i]+=y[i][j];
- }
- }
-
- Sum_here.flag = 1;
- for (i=1;i<Alphsize; i++) {
- if ((p[i] < 0) || (q[i] < 0)) {
- Sum_here.flag = -1;
- //printf("problematic freq. p[%d] = %.4f q[%d]=%.4f\n",i,p[i],i,q[i]);
- }
- }
- free_dmatrix(y,1,Alphsize,1,Alphsize);
- return Sum_here;
-}
-
-
-
-double * Locate_det_zero(const double** mat_b, int *N_root_add)
-{
- double **a/*, *q, *p */; // a is the exponentiated matrix of socres, p and q are bg_freqs
- int i/*,j,k*/;
- int N; // number of points for first round
- int flag_sign;
- double lambda/*, l_tmp, sum, sum_min, sum_max */;
- double lambda_root, dlambda /*, dsum=0.5 */;
- //double *l_here, *s_here;
- double root[5000];
- double *root_temp;
- //double error=0.000000000001;
- int zero_monitor = 0; // record number of zeros found in the range
- //int flag;
-
- a=dmatrix(1,Alphsize,1,Alphsize);
- //Sum_local = (Sum *)malloc(sizeof(Sum));
- //Lambda_local = (Lambda *)malloc(sizeof(Lambda));
-
- N = 2 + max(400, ((int) (Lambda_UB-0)/0.005));
- //printf("N = %d in Locate_det_zero\n", N);
- dlambda = (Lambda_UB)/(N*1.0);
- //l_here = (double *)malloc((N+1)*sizeof(double));
- //s_here = (double *)malloc((N+1)*sizeof(double));
- double l_here[N+1];
- double s_here[N+1];
-
- for (i=0; i< N ; i++){
- lambda = (i+1)*dlambda;
- s_here[i] = Check_det(mat_b, a,lambda);
- l_here[i] = lambda;
- }
-
- if (s_here[0] < 0.0) flag_sign = -1;
- if (s_here[0] > 0.0 ) flag_sign = 1;
- if (fabs(s_here[0])/exp(l_here[0]*(r_max_m+c_max_m)/2.0) <= Epsilon) {
- root[zero_monitor++] = l_here[0];
- flag_sign = 0;
- }
-
- for (i=1;i<N; i++){
- if ((flag_sign != 0)&& (fabs(s_here[i])>Epsilon)) {
- if (s_here[i-1]*s_here[i] < 0){
- //printf("occurring at regular places\n");
- lambda_root = Nail_det(mat_b, flag_sign, l_here[i-1], l_here[i]);
- root[zero_monitor++] = lambda_root;
- flag_sign = -flag_sign; // the flag switch sign after one sol found
- //printf("a (regular) root of det found at %12.10f, i= %d\n", lambda_root,i);
- }
- }
- else {
- if (s_here[i] < 0.0) flag_sign = -1;
- if (s_here[i] > 0.0 ) flag_sign = 1;
- if (fabs(s_here[i])/exp(l_here[i]*(r_max_m+c_max_m)/2.0) <= Epsilon) {
- root[zero_monitor++] = l_here[i];
- }
- }
- }
- //printf("total number of solution found in range is %d\n", i_monitor);
- root_temp = (double *)malloc(zero_monitor*sizeof(double));
- *N_root_add = zero_monitor;
- if (zero_monitor > 0) {
- if (zero_monitor >= N/4) {
- //printf("It is likely that uniform zero determinant is occurring.\n");
- //printf("number of small det points = %d out of %d, exit now....\n",zero_monitor, N);
- free( root_temp );
- return NULL;
- //exit(1);
- }
- for (i = 0; i < zero_monitor; i++){
- root_temp[i] = root[i];
- //printf("root_location[%d] = %lf\n",i,root_temp[i]);
- }
- }
- free_dmatrix( a, 1, Alphsize, 1, Alphsize );
- return root_temp;
-
-}
-
-
-Lambda Find_JP(const double** mat_b, double la_min, double la_max, double **JP, double *p_in, double *q_in)
-{
- double **a, *q, *p; // a is the exponentiated matrix of socres, p and q are bg_freqs
- int i,j/*,k*/;
- int N; // number of points for first round
- double lambda/*, l_tmp, sum, sum_min, sum_max*/;
- double lambda_max, lambda_min, lambda_final, dlambda/*, dsum=0.5*/;
- //double *l_here, *s_here;
- //double error=0.000000000000001;
- //int validity_flag; // 1 means valid, -1 means not valid.
- int flag_sign; // 1 means small lambda sum > 1, -1 means otherwise
- int flag_done = -1; // 1 means find sensible solution, -1 means sensible not found
- int i_monitor = 0; // record number of solution found in the range, including nonsense ones
- int j_monitor;
-
- Lambda Lambda_local;
- //Sum *Sum_local;
- Sum Sum_local;
-
- lambda_min = la_min;
- lambda_max = la_max;
- q = q_in;
- p = p_in;
- a=dmatrix(1,Alphsize,1,Alphsize);
- //Sum_local = (Sum *)malloc(sizeof(Sum));
- //Lambda_local = (Lambda *)malloc(sizeof(Lambda));
-
- N = 2 + max(400, ((int) (lambda_max-lambda_min)/0.005));
- //printf("N = %d in Find_JP\n", N);
- dlambda = (lambda_max-lambda_min)/(N*1.0);
- //l_here = (double *)malloc((N+1)*sizeof(double));
- //s_here = (double *)malloc((N+1)*sizeof(double));
- double l_here[N+1];
- double s_here[N+1];
- //printf("lambda_min enter = %12.10e, lambda_max = %12.10f\n", lambda_min, lambda_max);
- for (i=0; i< N-1 ; i++){
- lambda = lambda_min + (i+1)*dlambda;
- makematrix(mat_b, a,lambda);
- Sum_local = Check_root(mat_b, a,lambda,p,q);
- l_here[i] = lambda;
- s_here[i] = Sum_local.value - 1.0;
- //printf("scan %d th time in Find_JP\n",i );
- }
- //printf("finish first time scanining in Find_JP\n");
- if (s_here[0] < 0.0) flag_sign = -1;
- else if (s_here[0] > 0.0 ) flag_sign = 1;
- else if (s_here[0] == 0.0) { //needs refined definition on flag_sign
- printf("enter the exact hit, rarely occurs other than when lambda = 0 \n");
- j_monitor = 1;
- flag_sign = 0;
- while ((flag_sign == 0) &&(j_monitor < N)){
- Sum_local = Check_root(mat_b, a,l_here[0]+ j_monitor*dlambda/N,p,q);
- if (Sum_local.value > 1.0) {
- flag_sign = 1;
- }
- else if (Sum_local.value <1.0){
- flag_sign = -1;
- }
- j_monitor++;
- }
- }
-
- for (i=1;i<N; i++){ // should be N-1 ???
- if (flag_sign == 0) {printf("flag_sign = 0 \n"); exit(1);}
- if (s_here[i-1]*s_here[i] < 0){
- lambda_min = l_here[i-1];
- lambda_max = l_here[i];
- Sum_local = Nail_lambda(mat_b, flag_sign, lambda_min, lambda_max, p,q, &lambda_final);
- if (Sum_local.flag == 1) {
- i =N; flag_done = 1; Lambda_local.flag = 1;
- Lambda_local.min = lambda_final, Lambda_local.max = lambda_final;
- }
- flag_sign = -flag_sign; // the flag switch sign after one sol found
- i_monitor++ ;
- }
- }
-
- if (flag_done == 1){
- // Write correct JP to the matrix
- makematrix(mat_b, a,lambda_final);
- for (i=1;i<=Alphsize;i++){
- for (j=1;j<=Alphsize;j++){
- JP[i][j] = a[i][j]*p[i]*q[j];
- }
- }
- free_dmatrix(a,1,Alphsize,1,Alphsize);
- return Lambda_local;
- }
- else if (flag_done == -1){
- //printf("no sensible solution in the plausible x range: (%lf,%lf)\n", la_min, la_max);
- Lambda_local.flag = -1; Lambda_local.min = 0; Lambda_local.max = Infty;
- return Lambda_local;
- }
- // never come here
- return Lambda_local;
-}
-
-
-Sum Nail_lambda(const double** mat_b, int flag_sign, double lambda_min, double lambda_max, double *p, double *q, double *lam_add)
-{
- double **a;
- double lambda;
-
- //Sum *Sum_local;
- Sum Sum_local;
- a = dmatrix(1,Alphsize,1,Alphsize);
- //Sum_local = (Sum *)malloc(sizeof(Sum));
-
- lambda = (lambda_min+lambda_max)/2.0;
- Sum_local = Check_root(mat_b, a, lambda, p, q);
- while (fabs(Sum_local.value-1.0)>E_bound){
- if (flag_sign*(Sum_local.value-1.0)<0) lambda_max = lambda;
- else if (flag_sign*(Sum_local.value-1.0)> 0) lambda_min = lambda;
-
- // Added by MCF to avoid infinite loop:
- if (lambda == (lambda_min+lambda_max)/2.0){
- Sum_local.flag = -1;
- break;
- }
-
- lambda = (lambda_min+lambda_max)/2.0;
- Sum_local = Check_root(mat_b, a, lambda, p, q);
- }
- free_dmatrix(a,1,Alphsize,1,Alphsize);
- *lam_add = lambda;
- return Sum_local;
-}
-
-
-double Nail_det(const double** mat_b, int flag_sign, double lambda_min, double lambda_max)
-{
- double **a;
- double lambda;
- double value;
-
- a = dmatrix(1,Alphsize,1,Alphsize);
-
- lambda = (lambda_min+lambda_max)/2.0;
- value = Check_det(mat_b, a, lambda);
- while ((fabs(value)>E_bound)&&(lambda > 0)){
- if (flag_sign*(value)<0) lambda_max = lambda;
- else if (flag_sign*(value)> 0) lambda_min = lambda;
- lambda = (lambda_min+lambda_max)/2.0;
- value = Check_det(mat_b, a, lambda);
- }
- free_dmatrix(a,1,Alphsize,1,Alphsize);
- return lambda;
-}
-
-void makematrix(const double** mat_b, double **a, double lambda)
-{
-
- int i,j;
-
- for (i=1;i<=Alphsize;i++)
- for(j=1;j<=Alphsize;j++){
- *(*(a+i)+j)=exp(lambda*mat_b[i][j]);
- }
-}
-
-
-
=====================================
src/CA_code/lambda_calculator.h deleted
=====================================
@@ -1,10 +0,0 @@
-// Copyright 2008 Michiaki Hamada
-
-#ifndef __H_INCLUDE_LAMBDA_CALCULATOR_HH
-#define __H_INCLUDE_LAMBDA_CALCULATOR_HH
-
-// These pointers are 1-based!
-double calculate_lambda( const double** mat_b, int alpha_size,
- double* p, double* q );
-
-#endif // __H_INCLUDE_LAMBDA_CALCULATOR_HH
=====================================
src/CA_code/lubksb.c deleted
=====================================
@@ -1,24 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-void lubksb(a,n,indx,b)
-double **a,b[];
-int n,*indx;
-{
- int i,ii=0,ip,j;
- double sum;
-
- for (i=1;i<=n;i++) {
- ip=indx[i];
- sum=b[ip];
- b[ip]=b[i];
- if (ii)
- for (j=ii;j<=i-1;j++) sum -= a[i][j]*b[j];
- else if (sum) ii=i;
- b[i]=sum;
- }
- for (i=n;i>=1;i--) {
- sum=b[i];
- for (j=i+1;j<=n;j++) sum -= a[i][j]*b[j];
- b[i]=sum/a[i][i];
- }
-}
=====================================
src/CA_code/ludcmp.c deleted
=====================================
@@ -1,61 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-#include <math.h>
-
-#define TINY 1.0e-20;
-
-void ludcmp(a,n,indx,d)
-int n,*indx;
-double **a,*d;
-{
- int i,imax,j,k;
- double big,dum,sum,temp;
- double *vv,*dvector();
- void nrerror(),free_dvector();
-
- vv=dvector(1,n);
- *d=1.0;
- for (i=1;i<=n;i++) {
- big=0.0;
- for (j=1;j<=n;j++)
- if ((temp=fabs(a[i][j])) > big) big=temp;
- if (big == 0.0) nrerror("Singular matrix in routine LUDCMP");
- vv[i]=1.0/big;
- }
- for (j=1;j<=n;j++) {
- for (i=1;i<j;i++) {
- sum=a[i][j];
- for (k=1;k<i;k++) sum -= a[i][k]*a[k][j];
- a[i][j]=sum;
- }
- big=0.0;
- for (i=j;i<=n;i++) {
- sum=a[i][j];
- for (k=1;k<j;k++)
- sum -= a[i][k]*a[k][j];
- a[i][j]=sum;
- if ( (dum=vv[i]*fabs(sum)) >= big) {
- big=dum;
- imax=i;
- }
- }
- if (j != imax) {
- for (k=1;k<=n;k++) {
- dum=a[imax][k];
- a[imax][k]=a[j][k];
- a[j][k]=dum;
- }
- *d = -(*d);
- vv[imax]=vv[j];
- }
- indx[j]=imax;
- if (a[j][j] == 0.0) a[j][j]=TINY;
- if (j != n) {
- dum=1.0/(a[j][j]);
- for (i=j+1;i<=n;i++) a[i][j] *= dum;
- }
- }
- free_dvector(vv,1,n);
-}
-
-#undef TINY
=====================================
src/CA_code/nrutil.c deleted
=====================================
@@ -1,239 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-//#include <malloc.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-void nrerror(error_text)
-char error_text[];
-{
- void exit();
-
- fprintf(stderr,"Numerical Recipes run-time error...\n");
- fprintf(stderr,"%s\n",error_text);
- fprintf(stderr,"...now exiting to system...\n");
- exit(1);
-}
-
-float *vector(nl,nh)
-int nl,nh;
-{
- float *v;
-
- v=(float *)malloc((unsigned) (nh-nl+1)*sizeof(float));
- if (!v) nrerror("allocation failure in vector()");
- return v-nl;
-}
-
-int *ivector(nl,nh)
-int nl,nh;
-{
- int *v;
-
- v=(int *)malloc((unsigned) (nh-nl+1)*sizeof(int));
- if (!v) nrerror("allocation failure in ivector()");
- return v-nl;
-}
-
-double *dvector(nl,nh)
-int nl,nh;
-{
- double *v;
-
- v=(double *)malloc((unsigned) (nh-nl+1)*sizeof(double));
- if (!v) nrerror("allocation failure in dvector()");
- return v-nl;
-}
-
-float **matrix(nrl,nrh,ncl,nch)
-int nrl,nrh,ncl,nch;
-{
- int i;
- float **m;
-
- m=(float **) malloc((unsigned) (nrh-nrl+1)*sizeof(float *));
- if (!m) nrerror("allocation failure 1 in matrix()");
- m -= nrl;
-
- for(i=nrl;i<=nrh;i++) {
- m[i]=(float *) malloc((unsigned) (nch-ncl+1)*sizeof(float));
- if (!m[i]) nrerror("allocation failure 2 in matrix()");
- m[i] -= ncl;
- }
- return m;
-}
-
-double **dmatrix(nrl,nrh,ncl,nch)
-int nrl,nrh,ncl,nch;
-{
- int i;
- double **m;
-
- m=(double **) malloc((unsigned) (nrh-nrl+1)*sizeof(double *));
- if (!m) nrerror("allocation failure 1 in dmatrix()");
- m -= nrl;
-
- for(i=nrl;i<=nrh;i++) {
- m[i]=(double *) malloc((unsigned) (nch-ncl+1)*sizeof(double));
- if (!m[i]) nrerror("allocation failure 2 in dmatrix()");
- m[i] -= ncl;
- }
- return m;
-}
-
-double ***darray3(n1l,n1h,n2l,n2h,n3l,n3h)
-int n1l,n1h,n2l,n2h,n3l,n3h;
-{
- int i;
- double ***m;
-
- m=(double ***) malloc((unsigned) (n1h-n1l+1)*sizeof(double **));
- if (!m) nrerror("allocation failure 1 in darray3()");
- m -= n1l;
-
- for(i=n1l;i<=n1h;i++)
- {
- m[i]=dmatrix(n2l,n2h,n3l,n3h);
- }
- return m;
-}
-
-double ****darray4(n1l,n1h,n2l,n2h,n3l,n3h,n4l,n4h)
-int n1l,n1h,n2l,n2h,n3l,n3h,n4l,n4h;
-{
- int i;
- double ****m;
-
- m=(double ****) malloc((unsigned) (n1h-n1l+1)*sizeof(double ***));
- if (!m) nrerror("allocation failure 1 in darray4()");
- m -= n1l;
-
- for(i=n1l;i<=n1h;i++)
- {
- m[i]=darray3(n2l,n2h,n3l,n3h,n4l,n4h);
- }
- return m;
-}
-
-int **imatrix(nrl,nrh,ncl,nch)
-int nrl,nrh,ncl,nch;
-{
- int i,**m;
-
- m=(int **)malloc((unsigned) (nrh-nrl+1)*sizeof(int *));
- if (!m) nrerror("allocation failure 1 in imatrix()");
- m -= nrl;
-
- for(i=nrl;i<=nrh;i++) {
- m[i]=(int *)malloc((unsigned) (nch-ncl+1)*sizeof(int));
- if (!m[i]) nrerror("allocation failure 2 in imatrix()");
- m[i] -= ncl;
- }
- return m;
-}
-
-float **submatrix(a,oldrl,oldrh,oldcl,oldch,newrl,newcl)
-float **a;
-int oldrl,oldrh,oldcl,oldch,newrl,newcl;
-{
- int i,j;
- float **m;
-
- m=(float **) malloc((unsigned) (oldrh-oldrl+1)*sizeof(float *));
- if (!m) nrerror("allocation failure in submatrix()");
- m -= newrl;
-
- for(i=oldrl,j=newrl;i<=oldrh;i++,j++) m[j]=a[i]+oldcl-newcl;
-
- return m;
-}
-
-void free_vector(v,nl,nh)
-float *v;
-int nl,nh;
-{
- free((char*) (v+nl));
-}
-
-void free_ivector(v,nl,nh)
-int *v,nl,nh;
-{
- free((char*) (v+nl));
-}
-
-void free_dvector(v,nl,nh)
-double *v;
-int nl,nh;
-{
- free((char*) (v+nl));
-}
-
-void free_matrix(m,nrl,nrh,ncl,nch)
-float **m;
-int nrl,nrh,ncl,nch;
-{
- int i;
-
- for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl));
- free((char*) (m+nrl));
-}
-
-void free_dmatrix(m,nrl,nrh,ncl,nch)
-double **m;
-int nrl,nrh,ncl,nch;
-{
- int i;
-
- for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl));
- free((char*) (m+nrl));
-}
-
-void free_imatrix(m,nrl,nrh,ncl,nch)
-int **m;
-int nrl,nrh,ncl,nch;
-{
- int i;
-
- for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl));
- free((char*) (m+nrl));
-}
-
-void free_darray3(m,n1l,n1h,n2l,n2h,n3l,n3h)
-double ***m;
-int n1l,n1h,n2l,n2h,n3l,n3h;
-{
- int i;
-
- for(i=n1h;i>=n1l;i--) free_dmatrix(m[i],n2l,n2h,n3l,n3h);
- free((char*) (m+n1l));
-}
-
-void free_submatrix(b,nrl,nrh,ncl,nch)
-float **b;
-int nrl,nrh,ncl,nch;
-{
- free((char*) (b+nrl));
-}
-
-float **convert_matrix(a,nrl,nrh,ncl,nch)
-float *a;
-int nrl,nrh,ncl,nch;
-{
- int i,j,nrow,ncol;
- float **m;
-
- nrow=nrh-nrl+1;
- ncol=nch-ncl+1;
- m = (float **) malloc((unsigned) (nrow)*sizeof(float*));
- if (!m) nrerror("allocation failure in convert_matrix()");
- m -= nrl;
- for(i=0,j=nrl;i<=nrow-1;i++,j++) m[j]=a+ncol*i-ncl;
- return m;
-}
-
-void free_convert_matrix(b,nrl,nrh,ncl,nch)
-float **b;
-int nrl,nrh,ncl,nch;
-{
- free((char*) (b+nrl));
-}
=====================================
src/CA_code/nrutil.h deleted
=====================================
@@ -1,19 +0,0 @@
-// Public domain code from Yi-Kuo Yu & Stephen Altschul, NCBI
-
-float *vector();
-float **matrix();
-float **convert_matrix();
-double *dvector();
-double **dmatrix();
-int *ivector();
-int **imatrix();
-float **submatrix();
-void free_vector();
-void free_dvector();
-void free_ivector();
-void free_matrix();
-void free_dmatrix();
-void free_imatrix();
-void free_submatrix();
-void free_convert_matrix();
-void nrerror();
=====================================
src/LambdaCalculator.cc
=====================================
@@ -0,0 +1,408 @@
+// Copyright 2015 Yutaro Konta
+
+#include "LambdaCalculator.hh"
+#include <vector>
+#include <cassert>
+#include <cstdio> // sprintf
+#include <cstdlib>
+#include <cfloat>
+#include <cmath>
+//#include <iostream>
+using namespace std;
+
+static double roundToFewDigits(double x)
+{
+ // This rounding fixes some inaccuracies, e.g. for DNA with a simple
+ // match/mismatch matrix it's likely to make all the probabilities
+ // exactly 0.25, as they should be.
+ char buffer[32];
+ sprintf(buffer, "%g", x);
+ return atof(buffer);
+}
+
+static double** makematrix(int m, int n, double val)
+{
+ double** mat = new double* [m];
+ for (int i=0; i<m; i++)
+ {
+ mat[i] = new double [n];
+ for (int j=0; j<n; j++)
+ mat[i][j] = val;
+ }
+ return mat;
+}
+
+static void deletematrix(double** a, int m)
+{
+ for (int i=0; i<m; i++)
+ delete[] a[i];
+ delete[] a;
+}
+
+static double summatrix(double** a, int m, int n)
+{
+ double s = 0;
+ for (int i=0; i<m; i++)
+ for (int j=0; j<n; j++)
+ s += a[i][j];
+ return s;
+}
+
+static int max_index(double** a, int n, int i)
+{
+ double max = -DBL_MAX;
+ int maxindex = -1;
+
+ for (int j=i; j<n; j++)
+ {
+ if (fabs(a[j][i]) > max)
+ {
+ max = fabs(a[j][i]);
+ maxindex = j;
+ }
+ }
+ return maxindex;
+}
+
+static void swap_matrix(double** a, int i, int j)
+{
+ double* v = a[i];
+ a[i] = a[j];
+ a[j] = v;
+}
+
+static void swap_vector(int* a, int i, int j)
+{
+ int v = a[i];
+ a[i] = a[j];
+ a[j] = v;
+}
+
+static bool lu_pivoting(double** a, int* idx, int n)
+{
+ int v;
+
+ for (int i=0; i<n; i++)
+ idx[i] = i;
+
+ for (int i=0; i<n; i++)
+ {
+ v = max_index(a, n, i);
+ assert(v >= 0);
+ if (fabs(a[v][i]) < 1e-10)
+ {
+ return false; // singular matrix!
+ }
+
+ swap_matrix(a, i, v);
+ swap_vector(idx, i, v);
+
+ a[i][i] = 1.0/a[i][i];
+ for (int j=i+1; j<n; j++)
+ {
+ a[j][i] = a[j][i] * a[i][i];
+ for (int k=i+1; k<n; k++)
+ a[j][k] = a[j][k] - a[j][i] * a[i][k];
+ }
+ }
+ return true;
+}
+
+static void solvep(double **a, double *x, double *b, int n)
+{
+ double *y = new double [n];
+
+ for (int i=0; i<n; i++)
+ {
+ y[i] = b[i];
+ for (int j=0; j<i; j++)
+ y[i] -= a[i][j] * y[j];
+ }
+
+ for (int i=n-1; i>=0; i--)
+ {
+ x[i] = y[i];
+ for (int j=i+1; j<n; j++)
+ x[i] -= a[i][j] * x[j];
+ x[i] *= a[i][i]; // needed because a[i][i] is inverted
+ }
+ delete[] y;
+}
+
+static void transpose(double **a, int n)
+{
+ double v;
+ for (int i=0; i<n; i++)
+ {
+ for (int j=0; j<i; j++)
+ {
+ v = a[i][j];
+ a[i][j] = a[j][i];
+ a[j][i] = v;
+ }
+ }
+}
+
+static bool invert(double **a, double **inv, int n)
+{
+ int* idx = new int [n];
+
+ double **e = makematrix(n,n,0);
+
+ if(!lu_pivoting(a, idx, n))
+ return false;
+
+ for (int i=0; i<n; i++)
+ e[idx[i]][i] = 1; // transposed
+
+ delete[] idx;
+
+ for (int i=0; i<n; i++)
+ solvep(a, inv[i], e[i], n);
+
+ deletematrix(e, n);
+ transpose(inv, n); // transpose inv to make the inverted matrix of a
+ return true;
+}
+
+static bool calculate_inv_sum(double **matrix, int alpha_size, double tau, double* inv_sum)
+{
+ double **m = makematrix(alpha_size, alpha_size, 0);
+ double **y = makematrix(alpha_size, alpha_size, 0);
+
+ for (int i=0; i<alpha_size; i++)
+ for (int j=0; j<alpha_size; j++)
+ m[i][j] = exp(tau * matrix[i][j]);
+
+ if(!invert(m, y, alpha_size))
+ return false;
+
+ *inv_sum = summatrix(y, alpha_size, alpha_size);
+
+ deletematrix(m, alpha_size);
+ deletematrix(y, alpha_size);
+ return true;
+}
+
+namespace cbrc{
+
+void LambdaCalculator::setBad(){
+ lambda_ = -1;
+ letterProbs1_.clear();
+ letterProbs2_.clear();
+}
+
+bool LambdaCalculator::find_ub(double **matrix, int alpha_size, double *ub)
+{
+ double r_max_min = DBL_MAX;
+ double c_max_min = DBL_MAX;
+ double r_max;
+ double c_max;
+
+ double r_min;
+ double c_min;
+
+ int l_r = 0;
+ int l_c = 0;
+
+ for (int i=0; i<alpha_size; i++)
+ {
+ r_max = -DBL_MAX;
+ r_min = DBL_MAX;
+ for (int j=0; j<alpha_size; j++)
+ {
+ if (matrix[i][j] > r_max)
+ r_max = matrix[i][j];
+
+ if (matrix[i][j] < r_min)
+ r_min = matrix[i][j];
+ }
+ if (r_max == 0 && r_min == 0)
+ l_r++;
+ else if (r_max <= 0 || r_min >= 0)
+ return false;
+ else if (r_max < r_max_min)
+ r_max_min = r_max;
+ }
+
+ for (int j=0; j<alpha_size; j++)
+ {
+ c_max = -DBL_MAX;
+ c_min = DBL_MAX;
+ for (int i=0; i<alpha_size; i++)
+ {
+ if (matrix[i][j] > c_max)
+ c_max = matrix[i][j];
+
+ if (matrix[i][j] < c_min)
+ c_min = matrix[i][j];
+ }
+ if (c_max == 0 && c_min == 0)
+ l_c++;
+ else if (c_max <= 0 || c_min >= 0)
+ return false;
+ else if (c_max < c_max_min)
+ c_max_min = c_max;
+ }
+
+ if (l_r == alpha_size) return false;
+
+ // the multiplication by 1.1 is sometimes necessary, presumably to
+ // prevent the upper bound from being too tight:
+ if (r_max_min > c_max_min)
+ *ub = 1.1 * log(1.0 * (alpha_size - l_r))/r_max_min;
+ else
+ *ub = 1.1 * log(1.0 * (alpha_size - l_c))/c_max_min;
+
+ return true;
+}
+
+bool LambdaCalculator::binary_search(double** matrix, int alpha_size, double lb, double ub, vector<double>& letprob1, vector<double>& letprob2, double* lambda, int maxiter)
+{
+ double l=0;
+ double r=0;
+ double l_sum=0;
+ double r_sum=0;
+ int iter=0;
+
+ while (iter < maxiter && (l>=r || (l_sum < 1.0 && r_sum < 1.0) || (l_sum > 1.0 && r_sum > 1.0)))
+ {
+ l = lb + (ub - lb)*rand()/RAND_MAX;
+ r = lb + (ub - lb)*rand()/RAND_MAX;
+
+ if (!calculate_inv_sum(matrix, alpha_size, l, &l_sum) || !calculate_inv_sum(matrix, alpha_size, r, &r_sum))
+ {
+ l = 0;
+ r = 0;
+ }
+ iter++;
+ }
+
+ if (iter >= maxiter)
+ return false;
+
+ while (l_sum != 1.0 && r_sum != 1.0 && (l+r)/2.0 != l && (l+r)/2.0 != r)
+ {
+ double mid = (l + r)/2.0;
+ double mid_sum;
+ if (!calculate_inv_sum(matrix, alpha_size, mid, &mid_sum))
+ return false;
+
+ if (fabs(mid_sum) >= DBL_MAX)
+ return false;
+
+ if ((l_sum < 1.0 && mid_sum >= 1.0) || (l_sum > 1.0 && mid_sum <= 1.0))
+ {
+ r = mid;
+ r_sum = mid_sum;
+ }
+
+ else if ((r_sum < 1.0 && mid_sum >= 1.0) || (r_sum > 1.0 && mid_sum <= 1.0))
+ {
+ l = mid;
+ l_sum = mid_sum;
+ }
+
+ else
+ return false;
+ }
+
+ if (fabs(l_sum - 1.0) < fabs(r_sum - 1.0))
+ {
+ if (check_lambda(matrix, l, alpha_size, letprob1, letprob2))
+ {
+ *lambda = l;
+ return true;
+ }
+ return false;
+ }
+
+ if (check_lambda(matrix, r, alpha_size, letprob1, letprob2))
+ {
+ *lambda = r;
+ return true;
+ }
+ return false;
+}
+
+double LambdaCalculator::calculate_lambda(double** matrix, int alpha_size, vector<double>& letprob1, vector<double>& letprob2, int maxiter, int max_boundary_search_iter, double lb_ratio)
+{
+ double ub;
+
+ if (!find_ub(matrix, alpha_size, &ub))
+ return -1;
+
+ double lb = ub*lb_ratio;
+ double lambda = -1;
+ int iter = 0;
+ bool flag = false;
+
+ while (!flag && iter < maxiter)
+ {
+ flag = binary_search(matrix, alpha_size, lb, ub, letprob1, letprob2, &lambda, max_boundary_search_iter);
+ iter++;
+ }
+
+ return lambda;
+}
+
+bool LambdaCalculator::check_lambda(double** matrix, double lambda, int alpha_size, vector<double>& letprob1, vector<double>& letprob2)
+{
+ double **m = makematrix(alpha_size, alpha_size, 0);
+ double **y = makematrix(alpha_size, alpha_size, 0);
+
+ for (int i=0; i<alpha_size; i++)
+ for (int j=0; j<alpha_size; j++)
+ m[i][j] = exp(lambda * matrix[i][j]);
+
+ invert(m, y, alpha_size);
+
+ for (int i=0; i<alpha_size; i++)
+ {
+ double p = 0;
+ for (int j=0;j<alpha_size; j++)
+ p += y[i][j];
+ if (p < 0 || p > 1)
+ {
+ letprob2.clear();
+ return false;
+ }
+ letprob2.push_back(roundToFewDigits(p));
+ }
+
+ for (int j=0; j<alpha_size; j++)
+ {
+ double q = 0;
+ for (int i=0; i<alpha_size; i++)
+ q += y[i][j];
+ if (q < 0 || q > 1)
+ {
+ letprob2.clear();
+ letprob1.clear();
+ return false;
+ }
+ letprob1.push_back(roundToFewDigits(q));
+ }
+
+ deletematrix(m, alpha_size);
+ deletematrix(y, alpha_size);
+
+ return true;
+}
+
+void LambdaCalculator::calculate(const const_int_ptr *matrix, int alphSize) {
+ assert(alphSize >= 0);
+ setBad();
+
+ int maxiter = 1000;
+ int max_boundary_search_iter = 100;
+ double lb_ratio = 1e-6;
+
+ double** mat = makematrix(alphSize, alphSize, 0);
+ for (int i=0; i<alphSize; i++)
+ for (int j=0; j<alphSize; j++)
+ mat[i][j] = matrix[i][j];
+ lambda_ = calculate_lambda(mat, alphSize, letterProbs1_, letterProbs2_, maxiter, max_boundary_search_iter, lb_ratio);
+ deletematrix(mat, alphSize);
+}
+}
=====================================
src/mcf_score_matrix_probs.hh → src/LambdaCalculator.hh
=====================================
@@ -1,4 +1,5 @@
-// Copyright 2010 Martin C. Frith
+// Copyright 2008 Michiaki Hamada
+// Modified 2015 Yutaro Konta
// This class calculates the scale factor (lambda), and the letter
// probabilities, that are implicit in a scoring matrix. The
@@ -9,23 +10,20 @@
// probabilities should be identical. With this code, they might
// differ minutely from exact identity.
-#ifndef MCF_SCORE_MATRIX_PROBS_HH
-#define MCF_SCORE_MATRIX_PROBS_HH
+#ifndef LAMBDA_CALCULATOR_HH
+#define LAMBDA_CALCULATOR_HH
#include <vector>
-namespace mcf {
+namespace cbrc{
typedef const int *const_int_ptr;
-class ScoreMatrixProbs {
+class LambdaCalculator{
public:
- ScoreMatrixProbs() { setBad(); }
+ LambdaCalculator() { setBad(); }
- ScoreMatrixProbs(const const_int_ptr *scoreMatrix, unsigned alphabetSize)
- { init(scoreMatrix, alphabetSize); }
-
- void init(const const_int_ptr *scoreMatrix, unsigned alphabetSize);
+ void calculate(const const_int_ptr *matrix, int alphSize);
// Put us in the bad/undefined state.
void setBad();
@@ -37,19 +35,24 @@ class ScoreMatrixProbs {
double lambda() const { return lambda_; }
// The probabilities of letters corresponding to matrix rows (1st index).
- // In the bad/undefined state, it is an empty vector.
- const std::vector<double>& letterProbs1() const { return letterProbs1_; }
+ // In the bad/undefined state, it is a null pointer.
+ const double *letterProbs1() const {return isBad() ? 0 : &letterProbs1_[0];}
// The probabilities of letters corresponding to matrix columns (2nd index).
- // In the bad/undefined state, it is an empty vector.
- const std::vector<double>& letterProbs2() const { return letterProbs2_; }
+ // In the bad/undefined state, it is a null pointer.
+ const double *letterProbs2() const {return isBad() ? 0 : &letterProbs2_[0];}
private:
double lambda_;
std::vector<double> letterProbs1_;
std::vector<double> letterProbs2_;
+
+ bool find_ub(double **matrix, int alpha_size, double *ub);
+ bool binary_search(double** matrix, int alpha_size, double lb, double ub, std::vector<double>& letprob1, std::vector<double>& letprob2, double* lambda, int maxiter);
+ double calculate_lambda(double** matrix, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2, int maxiter, int max_boundary_search_iter, double lb_ratio);
+ bool check_lambda(double** matrix, double lambda, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2);
};
-}
+} // end namespace
#endif
=====================================
src/Makefile
=====================================
@@ -2,20 +2,13 @@ CXXFLAGS = -O3 -Wall -W -Wcast-qual -Wswitch-enum -Wundef \
-Wcast-align -Wold-style-cast
# -Wconversion
-CFLAGS = -Wall
-
-COBJ = CA_code/lambda_calculator.o
-
all: tantan
-tantan: *.cc *.hh version.hh Makefile $(COBJ)
- $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ *.cc $(COBJ)
-
-$(COBJ): CA_code/*.c CA_code/*.h Makefile
- $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ CA_code/lambda_calculator.c
+tantan: *.cc *.hh version.hh Makefile
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ *.cc
clean:
- rm -f tantan $(COBJ)
+ rm -f tantan
VERSION = \"`hg id -n`\"
=====================================
src/mcf_score_matrix_probs.cc deleted
=====================================
@@ -1,45 +0,0 @@
-// Copyright 2010 Martin C. Frith
-
-#include "mcf_score_matrix_probs.hh"
-
-namespace lambda {
-extern "C" {
-#include "CA_code/lambda_calculator.h"
-}
-}
-
-namespace mcf {
-
-void ScoreMatrixProbs::setBad() {
- lambda_ = -1;
- letterProbs1_.clear();
- letterProbs2_.clear();
-}
-
-void ScoreMatrixProbs::init(const const_int_ptr *scoreMatrix,
- unsigned alphabetSize) {
- // We need to pass the parameters as 1-based pointers, hence the +1s
- // and -1s.
-
- std::vector<double> cells(alphabetSize * alphabetSize + 1);
- std::vector<const double*> pointers(alphabetSize + 1);
-
- for (unsigned i = 0; i < alphabetSize; ++i) {
- pointers[i+1] = &cells[i * alphabetSize];
-
- for (unsigned j = 0; j < alphabetSize; ++j) {
- cells[i * alphabetSize + j + 1] = scoreMatrix[i][j];
- }
- }
-
- letterProbs1_.resize(alphabetSize);
- letterProbs2_.resize(alphabetSize);
-
- lambda_ = lambda::calculate_lambda(&pointers[0], alphabetSize,
- &letterProbs1_[0] - 1,
- &letterProbs2_[0] - 1);
-
- if (lambda_ < 0) setBad();
-}
-
-}
=====================================
src/mcf_tantan_options.cc
=====================================
@@ -52,6 +52,8 @@ TantanOptions::TantanOptions() :
repeatEndProb(0.05),
maxCycleLength(-1), // depends on isProtein
repeatOffsetProbDecay(0.9),
+ matchScore(0),
+ mismatchCost(0),
gapExistenceCost(0),
gapExtensionCost(-1), // means: no gaps
minMaskProb(0.5),
@@ -75,6 +77,8 @@ Options (default settings):\n\
-w maximum tandem repeat period to consider (100, but -p selects 50)\n\
-d probability decay per period ("
+ stringify(repeatOffsetProbDecay) + ")\n\
+ -i match score (1, but -p selects BLOSUM62)\n\
+ -j mismatch cost (1, but -p selects BLOSUM62)\n\
-a gap existence cost ("
+ stringify(gapExistenceCost) + ")\n\
-b gap extension cost (infinite: no gaps)\n\
@@ -94,7 +98,7 @@ Home page: http://www.cbrc.jp/tantan/\n\
#include "version.hh"
"\n";
- const char *optstring = "px:cm:r:e:w:d:a:b:s:f:h";
+ const char *optstring = "px:cm:r:e:w:d:i:j:a:b:s:f:h";
int i;
while ((i = myGetopt(argc, argv, optstring, help, version)) != -1) {
@@ -132,6 +136,16 @@ Home page: http://www.cbrc.jp/tantan/\n\
if (repeatOffsetProbDecay <= 0 || repeatOffsetProbDecay > 1)
badopt(c, optarg);
break;
+ case 'i':
+ unstringify(matchScore, optarg);
+ if (matchScore <= 0)
+ badopt(c, optarg);
+ break;
+ case 'j':
+ unstringify(mismatchCost, optarg);
+ if (mismatchCost <= 0)
+ badopt(c, optarg);
+ break;
case 'a':
unstringify(gapExistenceCost, optarg);
break;
=====================================
src/mcf_tantan_options.hh
=====================================
@@ -18,6 +18,8 @@ struct TantanOptions {
double repeatEndProb;
int maxCycleLength;
double repeatOffsetProbDecay;
+ int matchScore;
+ int mismatchCost;
int gapExistenceCost;
int gapExtensionCost;
double minMaskProb;
=====================================
src/tantan.cc
=====================================
@@ -99,7 +99,7 @@ struct Tantan {
//f2g = firstGapProb;
//g2f = 1 - otherGapProb;
oneGapProb = firstGapProb * (1 - otherGapProb);
- endGapProb = firstGapProb * 1;
+ endGapProb = firstGapProb * (maxRepeatOffset > 1);
f2f0 = 1 - repeatEndProb;
f2f1 = 1 - repeatEndProb - firstGapProb;
f2f2 = 1 - repeatEndProb - firstGapProb * 2;
@@ -125,8 +125,7 @@ struct Tantan {
double forwardTotal() {
double fromForeground = std::accumulate(foregroundProbs.begin(),
foregroundProbs.end(), 0.0);
- fromForeground *= f2b;
- double total = backgroundProb * b2b + fromForeground;
+ double total = backgroundProb * b2b + fromForeground * f2b;
assert(total > 0);
return total;
}
@@ -148,36 +147,31 @@ struct Tantan {
double f = *foregroundPtr;
double fromForeground = f;
- if (insertionProbs.empty()) {
- *foregroundPtr = fromBackground + f * f2f0;
- } else {
- double *insertionPtr = &insertionProbs.back();
- double i = *insertionPtr;
- *foregroundPtr = fromBackground + f * f2f1 + i * endGapProb;
- double d = f;
- --foregroundPtr;
- fromBackground *= b2fGrowth;
-
- while (foregroundPtr > &foregroundProbs.front()) {
- f = *foregroundPtr;
- fromForeground += f;
- i = *(insertionPtr - 1);
- *foregroundPtr = fromBackground + f * f2f2 + (i + d) * oneGapProb;
- *insertionPtr = f + i * g2g;
- d = f + d * g2g;
- --foregroundPtr;
- --insertionPtr;
- fromBackground *= b2fGrowth;
- }
+ double *insertionPtr = &insertionProbs.back();
+ double i = *insertionPtr;
+ *foregroundPtr = fromBackground + f * f2f1 + i * endGapProb;
+ double d = f;
+ --foregroundPtr;
+ fromBackground *= b2fGrowth;
+ while (foregroundPtr > &foregroundProbs.front()) {
f = *foregroundPtr;
fromForeground += f;
- *foregroundPtr = fromBackground + f * f2f1 + d * endGapProb;
- *insertionPtr = f;
+ i = *(insertionPtr - 1);
+ *foregroundPtr = fromBackground + f * f2f2 + (i + d) * oneGapProb;
+ *insertionPtr = f + i * g2g;
+ d = f + d * g2g;
+ --foregroundPtr;
+ --insertionPtr;
+ fromBackground *= b2fGrowth;
}
- fromForeground *= f2b;
- backgroundProb = backgroundProb * b2b + fromForeground;
+ f = *foregroundPtr;
+ fromForeground += f;
+ *foregroundPtr = fromBackground + f * f2f1 + d * endGapProb;
+ *insertionPtr = f;
+
+ backgroundProb = backgroundProb * b2b + fromForeground * f2b;
}
void calcBackwardTransitionProbsWithGaps() {
@@ -186,37 +180,32 @@ struct Tantan {
double f = *foregroundPtr;
double toForeground = f;
- if (insertionProbs.empty()) {
- *foregroundPtr = toBackground + f2f0 * f;
- } else {
- double *insertionPtr = &insertionProbs.front();
- double i = *insertionPtr;
- *foregroundPtr = toBackground + f2f1 * f + i;
- double d = endGapProb * f;
- ++foregroundPtr;
- toForeground *= b2fGrowth;
-
- while (foregroundPtr < &foregroundProbs.back()) {
- f = *foregroundPtr;
- toForeground += f;
- i = *(insertionPtr + 1);
- *foregroundPtr = toBackground + f2f2 * f + (i + d);
- double oneGapProb_f = oneGapProb * f;
- *insertionPtr = oneGapProb_f + g2g * i;
- d = oneGapProb_f + g2g * d;
- ++foregroundPtr;
- ++insertionPtr;
- toForeground *= b2fGrowth;
- }
+ double *insertionPtr = &insertionProbs.front();
+ double i = *insertionPtr;
+ *foregroundPtr = toBackground + f2f1 * f + i;
+ double d = endGapProb * f;
+ ++foregroundPtr;
+ toForeground *= b2fGrowth;
+ while (foregroundPtr < &foregroundProbs.back()) {
f = *foregroundPtr;
toForeground += f;
- *foregroundPtr = toBackground + f2f1 * f + d;
- *insertionPtr = endGapProb * f;
+ i = *(insertionPtr + 1);
+ *foregroundPtr = toBackground + f2f2 * f + (i + d);
+ double oneGapProb_f = oneGapProb * f;
+ *insertionPtr = oneGapProb_f + g2g * i;
+ d = oneGapProb_f + g2g * d;
+ ++foregroundPtr;
+ ++insertionPtr;
+ toForeground *= b2fGrowth;
}
- toForeground *= b2fLast;
- backgroundProb = b2b * backgroundProb + toForeground;
+ f = *foregroundPtr;
+ toForeground += f;
+ *foregroundPtr = toBackground + f2f1 * f + d;
+ *insertionPtr = endGapProb * f;
+
+ backgroundProb = b2b * backgroundProb + b2fLast * toForeground;
}
void calcForwardTransitionProbs() {
@@ -235,8 +224,7 @@ struct Tantan {
fromBackground *= b2fGrowth;
}
- fromForeground *= f2b;
- backgroundProb = backgroundProb * b2b + fromForeground;
+ backgroundProb = backgroundProb * b2b + fromForeground * f2b;
}
void calcBackwardTransitionProbs() {
@@ -255,8 +243,7 @@ struct Tantan {
++foregroundPtr;
}
- toForeground *= b2fLast;
- backgroundProb = b2b * backgroundProb + toForeground;
+ backgroundProb = b2b * backgroundProb + b2fLast * toForeground;
}
void addEndCounts(double forwardProb,
@@ -281,12 +268,17 @@ struct Tantan {
}
}
- void calcEmissionProbs() {
- const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+ bool isNearSeqBeg() {
+ return seqPtr - seqBeg < maxRepeatOffset;
+ }
- bool isNearSeqBeg = (seqPtr - seqBeg < maxRepeatOffset);
- const uchar *seqStop = isNearSeqBeg ? seqBeg : seqPtr - maxRepeatOffset;
+ const uchar *seqFurthestBack() {
+ return isNearSeqBeg() ? seqBeg : seqPtr - maxRepeatOffset;
+ }
+ void calcEmissionProbs() {
+ const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+ const uchar *seqStop = seqFurthestBack();
double *foregroundPtr = BEG(foregroundProbs);
const uchar *offsetPtr = seqPtr;
=====================================
src/tantan_app.cc
=====================================
@@ -6,10 +6,10 @@
#include "mcf_alphabet.hh"
#include "mcf_fasta_sequence.hh"
#include "mcf_score_matrix.hh"
-#include "mcf_score_matrix_probs.hh"
#include "mcf_tantan_options.hh"
#include "mcf_util.hh"
#include "tantan.hh"
+#include "LambdaCalculator.hh"
#include <algorithm> // copy, fill_n
#include <cassert>
@@ -73,29 +73,42 @@ void initScoresAndProbabilities() {
ScoreMatrix scoreMatrix;
- if (options.scoreMatrixFileName)
+ if (options.scoreMatrixFileName) {
unfilify(scoreMatrix, options.scoreMatrixFileName);
- else if (options.isProtein)
- unstringify(scoreMatrix, ScoreMatrix::blosum62);
- else
- scoreMatrix.initMatchMismatch(1, 1, "ACGTU"); // allow for RNA
+ } else if (options.isProtein) {
+ if (options.matchScore || options.mismatchCost) {
+ scoreMatrix.initMatchMismatch(std::max(options.matchScore, 1),
+ std::max(options.mismatchCost, 1),
+ Alphabet::protein);
+ } else {
+ unstringify(scoreMatrix, ScoreMatrix::blosum62);
+ }
+ } else {
+ scoreMatrix.initMatchMismatch(std::max(options.matchScore, 1),
+ std::max(options.mismatchCost, 1),
+ "ACGTU"); // allow for RNA
+ }
scoreMatrix.makeFastMatrix(fastMatrixPointers, scoreMatrixSize,
alphabet.lettersToNumbers,
scoreMatrix.minScore(), false);
- ScoreMatrixProbs smp(fastMatrixPointers, alphabet.size);
- if (smp.isBad())
+ cbrc::LambdaCalculator matCalc;
+ matCalc.calculate(fastMatrixPointers, alphabet.size);
+ if (matCalc.isBad())
throw Error("can't calculate probabilities for this score matrix");
+ double matrixLambda = matCalc.lambda();
- for (int i = 0; i < scoreMatrixSize; ++i)
- for (int j = 0; j < scoreMatrixSize; ++j)
- probMatrix[i][j] = std::exp(smp.lambda() * fastMatrix[i][j]);
+ for (int i = 0; i < scoreMatrixSize; ++i) {
+ for (int j = 0; j < scoreMatrixSize; ++j) {
+ probMatrix[i][j] = std::exp(matrixLambda * fastMatrix[i][j]);
+ }
+ }
if (options.gapExtensionCost > 0) {
int firstGapCost = options.gapExistenceCost + options.gapExtensionCost;
- firstGapProb = std::exp(-smp.lambda() * firstGapCost);
- otherGapProb = std::exp(-smp.lambda() * options.gapExtensionCost);
+ firstGapProb = std::exp(-matrixLambda * firstGapCost);
+ otherGapProb = std::exp(-matrixLambda * options.gapExtensionCost);
// the gap existence cost includes the gap ending cost:
firstGapProb /= (1 - otherGapProb);
@@ -103,7 +116,7 @@ void initScoresAndProbabilities() {
// XXX check if firstGapProb is too high
}
- //std::cerr << "lambda: " << smp.lambda() << "\n";
+ //std::cerr << "lambda: " << matrixLambda << "\n";
//std::cerr << "firstGapProb: " << firstGapProb << "\n";
//std::cerr << "otherGapProb: " << otherGapProb << "\n";
}
=====================================
src/version.hh
=====================================
@@ -1 +1 @@
-"13"
+"18"
=====================================
test/tantan_test.out
=====================================
@@ -1487,3 +1487,5 @@ IIIIIIIIIIIIIIIEI6IIII7I8I&IBI(3<@1.3,&:9/'(-
GTGGTCCACATATACAATGGAATATTACTCAGCTATCAGAAAGAA
+SRR019778.100 I331_2_FC3076HAAXX:1:1:1682:1290 length=45
IIIIIIIIIIIIIIIIDIII2BDHIIHCF=/-6A1921*/1-*-7
+
+205
=====================================
test/tantan_test.sh
=====================================
@@ -8,7 +8,7 @@ cd $(dirname $0)
PATH=../src:$PATH
countLowercaseLetters () {
- grep -v '^>' "$@" | tr -cd a-z | wc -c
+ grep -v '^>' "$@" | tr -cd a-z | wc -c | tr -d ' '
}
{
@@ -27,5 +27,7 @@ countLowercaseLetters () {
tantan -p -a11 -b2 titin_human.fa | countLowercaseLetters
echo
tantan panda.fastq
-} |
-diff tantan_test.out -
+ echo
+ tantan -i2 -j3 hg19_chrM.fa | countLowercaseLetters
+} 2>&1 |
+diff -u tantan_test.out -
View it on GitLab: https://salsa.debian.org/med-team/tantan/compare/c90ce36c32c55871c44df10e957d3a8c35f32700...417b5568e6aa0e599bce53806701c49811a177e8
--
View it on GitLab: https://salsa.debian.org/med-team/tantan/compare/c90ce36c32c55871c44df10e957d3a8c35f32700...417b5568e6aa0e599bce53806701c49811a177e8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181217/0c115785/attachment-0001.html>
More information about the debian-med-commit
mailing list