[med-svn] [Git][med-team/mafft][upstream] New upstream version 7.505
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sat Jun 25 20:22:20 BST 2022
Nilesh Patra pushed to branch upstream at Debian Med / mafft
Commits:
244b1a0e by Nilesh Patra at 2022-06-26T00:43:15+05:30
New upstream version 7.505
- - - - -
17 changed files:
- core/Falign.c
- core/Galign11.c
- core/addfunctions.c
- core/addsingle.c
- core/defs.c
- core/disttbfast.c
- core/filter.c
- core/functions.h
- core/io.c
- core/mafft.tmpl
- core/makemergetable.rb
- core/mltaln.h
- core/mltaln9.c
- core/pairlocalalign.c
- core/restoreu.c
- core/tbfast.c
- readme
Changes:
=====================================
core/Falign.c
=====================================
@@ -812,6 +812,9 @@ double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
double totalscore;
double dumdb = 0.0;
int headgp, tailgp;
+ static TLS double *gstart = NULL;
+ static TLS double *gend = NULL;
+ static TLS double **codonscoremtx;
if( seq1 == NULL )
@@ -829,6 +832,7 @@ double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
A__align_variousdist( NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
D__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
G__align11( NULL, NULL, NULL, 0, 0, 0 );
+ G__align11psg( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL );
blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL );
if( crossscore ) FreeDoubleMtx( crossscore );
crossscore = NULL;
@@ -841,6 +845,9 @@ double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
free( segment2 );
free( sortedseg1 );
free( sortedseg2 );
+ if( gstart ) free( gstart );
+ if( gend ) free( gend );
+ if( codonscoremtx ) FreeDoubleMtx( codonscoremtx );
if( !kobetsubunkatsu )
{
FreeFukusosuuMtx ( seqVector1 );
@@ -858,6 +865,7 @@ double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
return( 0.0 );
}
+
len1 = strlen( seq1[0] );
len2 = strlen( seq2[0] );
nlentmp = MAX( len1, len2 );
@@ -875,6 +883,7 @@ double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen );
#endif
+
result1 = AllocateCharMtx( clus1, alloclen );
result2 = AllocateCharMtx( clus2, alloclen );
tmpres1 = AllocateCharMtx( clus1, alloclen );
@@ -914,7 +923,148 @@ double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
if ( scoremtx == -1 ) n20or4or2 = 1;
else if( fftscore ) n20or4or2 = 1;
else n20or4or2 = 20;
+
+
+ gstart = NULL;
+ gend = NULL;
+ if( codonpos ) // gstart to gend ha sudeni allocate sareteiru kamo
+ {
+ FILE *cfp;
+ int p;
+ char *buf = calloc( sizeof( char ), 1000 );
+
+ if( dorp != 'd' )
+ {
+ reporterr( "\n\nThe --codonpos and --codonscore options are only for DNA data.\n\n" );
+ exit( 1 );
+ }
+
+// reporterr( "\nIn Falign, loading position-specific gap costs\n" );
+ gstart = calloc( sizeof(double), len1+1 );
+ for( i=0; i<len1+1; i++ ) gstart[i] = 1.0 * 0.5; // init
+ gend = calloc( sizeof(double), len1+1 );
+ for( i=0; i<len1+1; i++ ) gend[i] = 1.0 * 0.5; // init
+#define STRONG 3.0
+
+ i = 0;
+ cfp = fopen( "_codonpos", "r" );
+ if( cfp == NULL )
+ {
+ reporterr( "Cannot open _codonpos file\n" );
+ exit( 1 );
+ }
+ while( fgets( buf, 1000, cfp ) )
+ {
+ if( i == len1 )
+ {
+ reporterr( "\n\nNumber of lines in the codonposition file must be the same as the length of the first sequences (%d).\n\n", len1 );
+ exit( 1 );
+ }
+
+ if( buf[0] == '#' )
+ {
+ continue;
+ }
+ else if( buf[0] == '0' )
+ {
+ gstart[i] = 1.0 * 0.5; // noncoding
+ gend[i] = 1.0 * 0.5; // noncoding
+ }
+ else if( buf[0] == '1' )
+ {
+ gstart[i] = STRONG * 0.5;
+ gend[i] = 1.0 * 0.5;
+ }
+ else if( buf[0] == '2' )
+ {
+ gstart[i] = STRONG * 0.5;
+ gend[i] = STRONG * 0.5;
+ }
+ else if( buf[0] == '3' )
+ {
+ gstart[i] = 1.0 * 0.5;
+ gend[i] = STRONG * 0.5;
+ }
+ else
+ {
+ reporterr( "In the codonposition file, 1st letter in a line must be either of:\n" );
+ reporterr( " 0 (noncoding)\n" );
+ reporterr( " 1 (1st position in codon)\n" );
+ reporterr( " 2 (2nd position in codon)\n" );
+ reporterr( " 3 (3rd position in codon)\n" );
+ reporterr( "When mutliple difference frames are used, set 2.\n" );
+ exit( 1 );
+ }
+ i++;
+ }
+ fclose( cfp );
+ free( buf );
+ if( i < len1 )
+ {
+ reporterr( "\n\nNumber of lines in the codonposition file (%d) is less than the length of the first sequences (%d).\n\n", i, len1 );
+ exit( 1 );
+ }
+
+ }
+
+ if( codonscore )
+ {
+ int i, j;
+ FILE *cfp;
+ double *codonfreq = calloc( sizeof( double ), 64 );
+ double totalcount, codonscore0av, codonscore1av;
+ if( !codonpos )
+ {
+ reporterr( "\n\n --codonpos is necessary for --codonscore\n\n" );
+ exit( 1 );
+ }
+ codonscoremtx = AllocateDoubleMtx( 64, 64 );
+ cfp = fopen( "_codonscore", "r" );
+ loadcodonscore( cfp, codonscoremtx );
+ fclose( cfp );
+ for( i=0; i<64; i++ ) codonfreq[i] = 0.0;
+ totalcount = 0.0;
+ for( i=3; i<len1; i++ )
+ {
+// reporterr( "i=%d\n", i );
+ if( gstart[i-2] == STRONG * 0.5 && gend[i-2] == 1.0 * 0.5 &&
+ gstart[i-1] == STRONG * 0.5 && gend[i-1] == STRONG * 0.5 &&
+ gstart[i-0] == 1.0 * 0.5 && gend[i-0] == STRONG * 0.5 )
+ {
+// reporterr( "codon=%.3s, id=%d\n", seq1[0]+i-2, codon2id(seq1[0]+i-2) );
+ codonfreq[codon2id(seq1[0]+i-2)] += 1.0;
+ totalcount += 1.0;
+ }
+ }
+ for( i=0; i<64; i++ ) codonfreq[i] /= totalcount;
+// for( i=0; i<64; i++ ) reporterr( "%d, %f\n", i, codonfreq[i] );
+
+ codonscore0av = 0.0;
+ for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) codonscore0av += codonfreq[i]*codonfreq[j] * codonscoremtx[i][j];
+// reporterr( "0av=%f\n", codonscore0av );
+ for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) codonscoremtx[i][j] -= codonscore0av;
+
+ codonscore1av = 0.0;
+ for( i=0; i<64; i++ ) codonscore1av += codonfreq[i] * codonscoremtx[i][i];
+// reporterr( "1av=%f\n", codonscore1av );
+ for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) codonscoremtx[i][j] /= codonscore1av;
+
+#if 0
+ codonscore1av = 0.0;
+ for( i=0; i<64; i++ ) codonscore1av += codonfreq[i] * codonscoremtx[i][i];
+ reporterr( "1av=%f\n", codonscore1av );
+
+ codonscore0av = 0.0;
+ for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) codonscore0av += codonfreq[i]*codonfreq[j] * codonscoremtx[i][j];
+ reporterr( "0av=%f\n", codonscore0av );
+#endif
+
+ free( codonfreq );
+ }
+ else
+ codonscoremtx = NULL;
}
+
if( localalloclen < nlen )
{
if( localalloclen )
@@ -1531,10 +1681,21 @@ system( "less seqVec2 < /dev/tty > /dev/tty" );
case( 'A' ):
if( clus1 == 1 && clus2 == 1 )
{
- totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp );
+ if( codonpos || codonscore )
+ {
+// reporterr( "calling G__align11psg\n" );
+ totalscore += G__align11psg( codonscoremtx, n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp, gstart+cut1[i], gend+cut1[i] );
+ }
+ else
+ totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp );
}
else
{
+ if( codonpos )
+ {
+ reporterr( "\n\ncodonpos will be soon supported for a reference MSA. For now, use a single sequence as reference.\n\n\n" );
+ exit( 1 );
+ }
if( scoringmatrices ) // called by tditeration.c
{
totalscore += A__align_variousdist( whichmtx, scoringmatrices, NULL, penalty, penalty_ex, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, 0, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
=====================================
core/Galign11.c
=====================================
@@ -9,6 +9,70 @@
#define TERMGAPFAC_EX 0.0
#if 1
+
+#if 0
+static void match_calc_mtx_codon( double **codonmtx, double **mtx, double *match, char **s1, char **s2, int **codonseq1, int **codonseq2, int i1, int lgth2 )
+// gstart, gend wo tsukatte coding ka kakuninn suru beki
+{
+ char *seq2 = s2[0];
+ double *doubleptr = mtx[(unsigned char)s1[0][i1]];
+ int codonid1, codonid2;
+// int codonid1p, codonid2p;
+// int codonid1pp, codonid2pp;
+ double codonmatch;
+// int *pt0, *pt1, *pt2;
+ int *pt0;
+
+ codonid1 = codonseq1[0][i1];
+// codonid1p = codonseq1[1][i1];
+// codonid1pp = codonseq1[2][i1];
+ pt0 = codonseq2[0];
+// pt1 = codonseq2[1];
+// pt2 = codonseq2[2];
+// posin2 = 0;
+ while( lgth2-- )
+ {
+// reporterr( "%c-%c -> %f\n", s1[0][i1], *seq2, doubleptr[(unsigned char)*seq2] );
+
+// track ha at de kokoromiru
+ codonid2 = *pt0++;
+// codonid2p = *pt1++;
+// codonid2pp = *pt2++;
+// posin2++;
+//
+// reporterr( "%c%c%c(%d)-%c%c%c(%d)\n", s1[0][i1], s1[0][i1+1], s1[0][i1+2], codonid1, *seq2, *(seq2+1), *(seq2+2), codonid2 );
+ codonmatch = 0.0;
+ if( codonid1 > -1 && codonid2 > -1 ) codonmatch = codonmtx[codonid1][codonid2] * 600.0;
+// if( codonid1p > -1 && codonid2p > -1 ) codonmatch -= codonmtx[codonid1p][codonid2p] * 600;
+// if( codonid1pp > -1 && codonid2pp > -1 ) codonmatch -= codonmtx[codonid1pp][codonid2pp] * 600;
+ *match++ = doubleptr[(unsigned char)*seq2++] + codonmatch;
+ }
+}
+#else
+static void match_calc_mtx_codon( double **codonmtx, double **mtx, double *match, char **s1, char **s2, int **codonseq1, int **codonseq2, int i1, int lgth2 )
+// gstart, gend wo tsukatte coding ka kakuninn suru beki
+{
+ char *seq2 = s2[0];
+ double *doubleptr = mtx[(unsigned char)s1[0][i1]];
+ int codonid1, codonid2;
+ double codonmatch;
+ int *pt0;
+
+ codonid1 = codonseq1[0][i1];
+ pt0 = codonseq2[0];
+ while( lgth2-- )
+ {
+// reporterr( "%c-%c -> %f\n", s1[0][i1], *seq2, doubleptr[(unsigned char)*seq2] );
+
+ codonid2 = *pt0++;
+// reporterr( "%c%c%c(%d)-%c%c%c(%d)\n", s1[0][i1], s1[0][i1+1], s1[0][i1+2], codonid1, *seq2, *(seq2+1), *(seq2+2), codonid2 );
+ codonmatch = 0.0;
+ if( codonid1 > -1 && codonid2 > -1 ) codonmatch = codonmtx[codonid1][codonid2] * 600.0; // codonid1>-1 ha iranai.
+ *match++ = doubleptr[(unsigned char)*seq2++] + codonmatch;
+ }
+}
+#endif
+
static void match_calc_mtx( double **mtx, double *match, char **s1, char **s2, int i1, int lgth2 )
{
char *seq2 = s2[0];
@@ -198,6 +262,654 @@ static double Atracking( double *lasthorizontalw, double *lastverticalw,
}
+double G__align11psg( double **codonmtx, double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp, double *gstart, double *gend )
+{
+
+
+
+// int k;
+ register int i, j;
+ int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lastj;
+ int lgth1, lgth2;
+ int resultlen;
+ double wm, wmo; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+ double fpenalty_tmp;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+ double fpenalty_ex_i;
+#endif
+#if 1
+ double *wtmp;
+ int *ijppt;
+ double *mjpt, *prept, *curpt;
+ int *mpjpt;
+#endif
+ static TLS double mi = 0.0;
+ static TLS double *m = NULL;
+ static TLS int **ijp = NULL;
+ static TLS int mpi = 0;
+ static TLS int *mp = NULL;
+ static TLS double *w1 = NULL;
+ static TLS double *w2 = NULL;
+ static TLS double *match = NULL;
+ static TLS double *initverticalw = NULL; /* kufuu sureba iranai */
+ static TLS double *lastverticalw = NULL; /* kufuu sureba iranai */
+ static TLS char **mseq1 = NULL;
+ static TLS char **mseq2 = NULL;
+ static TLS char **mseq = NULL;
+ static TLS int **intwork = NULL;
+ static TLS double **doublework = NULL;
+ static TLS int orlgth1 = 0, orlgth2 = 0;
+ static TLS double **amino_dynamicmtx = NULL; // ??
+ static TLS int **codonseq1 = NULL;
+ static TLS int **codonseq2 = NULL;
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+
+ if( seq1 == NULL )
+ {
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ orlgth1 = 0;
+ orlgth2 = 0;
+ if( mseq1 ) free( mseq1 ); mseq1 = NULL;
+ if( mseq2 ) free( mseq2 ); mseq2 = NULL;
+ if( w1 ) FreeFloatVec( w1 ); w1 = NULL;
+ if( w2 ) FreeFloatVec( w2 ); w2 = NULL;
+ if( match ) FreeFloatVec( match ); match = NULL;
+ if( initverticalw ) FreeFloatVec( initverticalw ); initverticalw = NULL;
+ if( lastverticalw ) FreeFloatVec( lastverticalw ); lastverticalw = NULL;
+
+ if( m ) FreeFloatVec( m ); m = NULL;
+ if( mp ) FreeIntVec( mp ); mp = NULL;
+
+ if( mseq ) FreeCharMtx( mseq ); mseq = NULL;
+
+
+
+ if( doublework ) FreeFloatMtx( doublework ); doublework = NULL;
+ if( intwork ) FreeIntMtx( intwork ); intwork = NULL;
+
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
+
+ if( codonseq1 ) FreeIntMtx( codonseq1 ); codonseq1 = NULL;
+ if( codonseq2 ) FreeIntMtx( codonseq2 ); codonseq2 = NULL;
+ }
+ orlgth1 = 0;
+ orlgth2 = 0;
+ return( 0.0 );
+ }
+
+
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+
+
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+ if( trywarp )
+ {
+// fprintf( stderr, "IN G__align11\n" );
+ if( headgp == 0 || tailgp == 0 )
+ {
+ fprintf( stderr, "At present, headgp and tailgp must be 1.\n" );
+ exit( 1 );
+ }
+
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
+#if 0
+ if( lgth1 <= 0 || lgth2 <= 0 )
+ {
+ fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
+ }
+#endif
+
+#if 1
+ if( lgth1 == 0 && lgth2 == 0 )
+ return( 0.0 );
+
+ if( lgth1 == 0 )
+ {
+ seq1[0][lgth2] = 0;
+ while( lgth2 ) seq1[0][--lgth2] = *newgapstr;
+// fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
+ return( 0.0 );
+ }
+
+ if( lgth2 == 0 )
+ {
+ seq2[0][lgth1] = 0;
+ while( lgth1 ) seq2[0][--lgth1] = *newgapstr;
+// fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
+ return( 0.0 );
+ }
+#endif
+
+
+ wm = 0.0;
+
+ if( orlgth1 == 0 )
+ {
+ mseq1 = AllocateCharMtx( 2, 0 ); // 2020/Apr
+ mseq2 = AllocateCharMtx( 2, 0 ); // 2020/Apr
+ }
+
+
+
+ if( lgth1 > orlgth1 || lgth2 > orlgth2 )
+ {
+ int ll1, ll2;
+
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+
+
+ FreeFloatMtx( doublework );
+ FreeIntMtx( intwork );
+ FreeDoubleMtx( amino_dynamicmtx );
+
+ if( codonseq1 ) FreeIntMtx( codonseq1 );
+ if( codonseq2 ) FreeIntMtx( codonseq2 );
+ }
+
+ ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
+ ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
+
+#if DEBUG
+ fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
+#endif
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+ match = AllocateFloatVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+ mp = AllocateIntVec( ll2+2 );
+
+ mseq = AllocateCharMtx( 2, ll1+ll2 ); // 2020/Apr
+
+
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 );
+
+ if( codonscore )
+ {
+ codonseq1 = AllocateIntMtx( 3, ll1 ); // Only codonseq1[0] is used at this point
+ codonseq2 = AllocateIntMtx( 3, ll2 ); // Only codonseq2[0] is used at this point
+ }
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n" );
+#endif
+
+ orlgth1 = ll1 - 100;
+ orlgth2 = ll2 - 100;
+ }
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
+ amino_dynamicmtx[(unsigned char)amino[i]][(unsigned char)amino[j]] = (double)n_dynamicmtx[i][j];
+
+
+ mseq1[0] = mseq[0];
+ mseq2[0] = mseq[1];
+
+
+ if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
+ {
+ int ll1, ll2;
+
+ if( commonAlloc1 && commonAlloc2 )
+ {
+ FreeIntMtx( commonIP );
+ }
+
+ ll1 = MAX( orlgth1, commonAlloc1 );
+ ll2 = MAX( orlgth2, commonAlloc2 );
+
+#if DEBUG
+ fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
+#endif
+
+ commonIP = AllocateIntMtx( ll1+10, ll2+10 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n\n" );
+#endif
+
+ commonAlloc1 = ll1;
+ commonAlloc2 = ll2;
+ }
+ ijp = commonIP;
+
+ if( codonscore )
+ {
+// codonseq[1..2] are not used yet.
+ for( i=0; i<3&&i<lgth1; i++ )
+ {
+ codonseq1[0][i] = -1;
+ codonseq1[1][i] = -1;
+ codonseq1[2][i] = -1;
+ }
+ for( i=0; i<3&&i<lgth2; i++ )
+ {
+ codonseq2[0][i] = -1;
+ codonseq2[1][i] = -1;
+ codonseq2[2][i] = -1;
+ }
+ for( i=3; i<lgth1; i++ )
+ {
+ codonseq1[2][i] = codon2id( seq1[0]+i );
+ codonseq1[1][i] = codon2id( seq1[0]+i-1 );
+ codonseq1[0][i] = codon2id( seq1[0]+i-2 );
+ }
+ for( i=3; i<lgth2; i++ )
+ {
+ codonseq2[2][i] = codon2id( seq2[0]+i );
+ codonseq2[1][i] = codon2id( seq2[0]+i-1 );
+ codonseq2[0][i] = codon2id( seq2[0]+i-2 );
+ }
+ }
+ else
+ {
+ codonseq1 = NULL;
+ codonseq2 = NULL;
+ }
+
+#if 0
+ for( i=0; i<lgth1; i++ )
+ fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
+#endif
+
+ currentw = w1;
+ previousw = w2;
+
+
+ if( codonscore )
+ {
+
+// for( i=0; i<5; i++ ) for( j=0; j<5; j++ ) reporterr( "score[%c][%c]=%f\n", amino[i], amino[j], amino_dynamicmtx[amino[i]][amino[j]] );
+// exit( 1 );
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
+// if( !codonpos || ! (gstart[0] == 0.5 && gend[0] == 0.5) ) // consider codon when unknown or coding
+// match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, 0, lgth2 );
+#if 1
+ if( gstart[0] == 0.5 && gend[0] > 0.5 ) // only at 3rd position
+ match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, 0, lgth2 );
+ else
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
+#else
+ match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, 0, lgth2, gstart[0] == 0.5 && gend[0] > 0.5 );
+#endif
+ }
+ else
+ {
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
+ }
+
+ if( headgp == 1 )
+ {
+ for( i=1; i<lgth1+1; i++ )
+ {
+ initverticalw[i] += fpenalty;
+#if USE_PENALTY_EX
+// initverticalw[i] += fpenalty_ex * i; // ato de fukkatsu
+// reporterr( "added _ex\n" );
+#endif
+ }
+ for( j=1; j<lgth2+1; j++ )
+ {
+ currentw[j] += fpenalty;
+#if USE_PENALTY_EX
+// currentw[j] += fpenalty_ex * j; // ato de fukkatsu
+// reporterr( "added _ex\n" );
+#endif
+ }
+ }
+ else
+ {
+ for( i=1; i<lgth1+1; i++ )
+ {
+ initverticalw[i] += fpenalty * TERMGAPFAC;
+#if USE_PENALTY_EX // 2018/Apr/22
+ initverticalw[i] += fpenalty_ex * i * TERMGAPFAC_EX;
+// reporterr( "added _ex\n" );
+#endif
+ }
+ for( j=1; j<lgth2+1; j++ )
+ {
+ currentw[j] += fpenalty * TERMGAPFAC;
+#if USE_PENALTY_EX // 2018/Apr/22
+ currentw[j] += fpenalty_ex * j * TERMGAPFAC_EX;
+// reporterr( "added _ex\n" );
+#endif
+ }
+ }
+
+
+ for( j=1; j<lgth2+1; ++j )
+ {
+ m[j] = currentw[j-1]; mp[j] = 0;
+ }
+
+ if( lgth2 == 0 )
+ lastverticalw[0] = 0.0; // lgth2==0 no toki error
+ else
+ lastverticalw[0] = currentw[lgth2-1]; // lgth2==0 no toki error
+
+ if( tailgp ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
+
+#if XXXXXXX
+fprintf( stderr, "currentw = \n" );
+for( i=0; i<lgth1+1; i++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[i] );
+}
+fprintf( stderr, "\n" );
+fprintf( stderr, "initverticalw = \n" );
+for( i=0; i<lgth2+1; i++ )
+{
+ fprintf( stderr, "%5.2f ", initverticalw[i] );
+}
+fprintf( stderr, "\n" );
+#endif
+
+
+ for( i=1; i<lasti; i++ )
+ {
+// reporterr( "seq1[0][%d]=%c, gstart[i]=%f, gend[i]=%f\n", i, seq1[0][i], gstart[i], gend[i] );
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+
+// if( codonscore && ( !codonpos || !( gstart[i] == 0.5 && gend[i] == 0.5 ) ) ) // unknown or coding
+// match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, i, lgth2 );
+#if 1
+ if( codonscore && ( gstart[i] == 0.5 && gend[i] > 0.5 ) ) // only at 3rd position in seq1
+ match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, i, lgth2 );
+ else
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
+#else
+ if( codonscore ) // only at 3rd position in seq1
+ match_calc_mtx_codon( codonmtx, amino_dynamicmtx, currentw, seq1, seq2, codonseq1, codonseq2, i, lgth2, gstart[i] == 0.5 && gend[i] > 0.5 );
+ else
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
+#endif
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ currentw[0] = initverticalw[i];
+
+ mi = previousw[0]; mpi = 0;
+
+ ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+ mpjpt = mp + 1;
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
+ if( i < lgth1 ) fpenalty_ex_i = fpenalty_ex;
+ else fpenalty_ex_i = 0.0; // 2018/May/11
+
+ for( j=1; j<lastj; j++ )
+ {
+
+ wm = *prept;
+ *ijppt = 0;
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( (g=mi+fpenalty*gend[i]) > wm )
+ {
+ wm = g;
+ *ijppt = -( j - mpi );
+// reporterr( "hit! jump from %d to %d: %c->%c\n", j, mpi, seq2[0][j], seq2[0][mpi] );
+ }
+ if( (g=*prept+fpenalty*gstart[i-1]) >= mi )
+// if( (g=*prept) > mi )
+ {
+ mi = g;
+ mpi = j-1;
+// reporterr( "hit! jump to %d: ->%c:%c\n", mpi, seq1[0][i-1], seq2[0][mpi] );
+ }
+#if USE_PENALTY_EX
+ mi += fpenalty_ex_i;
+// mi += fpenalty_ex;
+#endif
+
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( (g=*mjpt + fpenalty*gend[i]) > wm )
+ {
+ wm = g;
+ *ijppt = +( i - *mpjpt );
+ }
+ if( (g=*prept+fpenalty*gstart[i-1]) >= *mjpt )
+// if( (g=*prept) > *mjpt )
+ {
+ *mjpt = g;
+ *mpjpt = i-1;
+ }
+#if USE_PENALTY_EX
+ if( j < lgth2 ) // 2018/May/11
+ m[j] += fpenalty_ex;
+#endif
+#if 1
+ if( trywarp )
+ {
+ fpenalty_tmp = fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] );
+// fprintf( stderr, "fpenalty_shift = %f\n", fpenalty_tmp );
+
+// fprintf( stderr, "\n\n\nwarp to %c-%c (%d-%d) from %c-%c (%d-%d) ? prevwmrecords[%d] = %f + %f <- wm = %f\n", seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], prevwarpi[j-1], prevwarpj[j-1], seq1[0][i], seq2[0][j], i, j, j, prevwmrecords[j-1], fpenalty_tmp, wm );
+// if( (g=prevwmrecords[j-1] + fpenalty_shift )> wm )
+ if( ( g=*prevwmrecordspt++ + fpenalty_tmp )> wm ) // naka ha osokute kamawanai
+ {
+// fprintf( stderr, "Yes! Warp!! from %d-%d (%c-%c) to %d-%d (%c-%c) fpenalty_tmp = %f! warpn = %d\n", i, j, seq1[0][i], seq2[0][j-1], prevwarpi[j-1], prevwarpj[j-1],seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], fpenalty_tmp, warpn );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+ else
+ {
+ }
+
+ curm = *curpt + wm;
+
+// fprintf( stderr, "###### curm = %f at %c-%c, i=%d, j=%d\n", curm, seq1[0][i], seq2[0][j], i, j );
+
+// fprintf( stderr, "copy from i, j-1? %f > %f?\n", wmrecords[j-1], curm );
+// if( wmrecords[j-1] > wmrecords[j] )
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+// fprintf( stderr, "yes\n" );
+// wmrecords[j] = wmrecords[j-1];
+ *wmrecordspt = *wmrecords1pt;
+// warpi[j] = warpi[j-1];
+// warpj[j] = warpj[j-1];
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+// fprintf( stderr, "warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] );
+ }
+// else
+// {
+// fprintf( stderr, "no\n" );
+// }
+
+// fprintf( stderr, " curm = %f at %c-%c\n", curm, seq1[0][i], seq2[0][j] );
+// fprintf( stderr, " wmrecords[%d] = %f\n", j, wmrecords[j] );
+// fprintf( stderr, "replace?\n" );
+
+// if( curm > wmrecords[j] )
+ if( curm > *wmrecordspt )
+ {
+// fprintf( stderr, "yes at %d-%d (%c-%c), replaced warp: warpi[j]=%d, warpj[j]=%d warpn=%d, wmrecords[j] = %f -> %f\n", i, j, seq1[0][i], seq2[0][j], i, j, warpn, wmrecords[j], curm );
+// wmrecords[j] = curm;
+ *wmrecordspt = curm;
+// warpi[j] = i;
+// warpj[j] = j;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+// else
+// {
+// fprintf( stderr, "No! warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] );
+// }
+// fprintf( stderr, "%d-%d (%c-%c) curm = %5.0f, wmrecords[j]=%f\n", i, j, seq1[0][i], seq2[0][j], curm, wmrecords[j] );
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
+#endif
+
+ *curpt++ += wm;
+ ijppt++;
+ mjpt++;
+ prept++;
+ mpjpt++;
+ }
+ lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
+ }
+
+
+ if( trywarp )
+ {
+// fprintf( stderr, "\nwm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
+
+ wmo = Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, tailgp, warpis, warpjs, warpbase );
+ if( !tailgp ) wm = wmo;
+
+// reporterr( "wm (after tracking) = %f\n", wm );
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
+
+
+ resultlen = strlen( mseq1[0] );
+ if( alloclen < resultlen || resultlen > N )
+ {
+ fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
+ ErrorExit( "LENGTH OVER!\n" );
+ }
+
+
+ strcpy( seq1[0], mseq1[0] );
+ strcpy( seq2[0], mseq2[0] );
+#if 0
+ fprintf( stderr, "\n" );
+ fprintf( stderr, ">\n%s\n", mseq1[0] );
+ fprintf( stderr, ">\n%s\n", mseq2[0] );
+ fprintf( stderr, "wm = %f\n", wm );
+#endif
+ return( wm );
+}
double G__align11( double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp )
{
=====================================
core/addfunctions.c
=====================================
@@ -1520,9 +1520,148 @@ void restorecommongaps( int njob, int n0, char **seq, int *ex1, int *ex2, int *g
free( tmpgapmap );
}
-int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int **deletelist )
+#if 0 // mada
+int deletenewinsertions_difflist( int on, int an, char **oseq, char **aseq, GapPos **difflist )
{
- int i, j, p, q, allgap, ndel;
+ int i, j, p, q, allgap, ndel, istart, pstart;
+ int len = strlen( oseq[0] );
+ char *eqseq, tmpc;
+
+ reporterr( "In deletenewinsertions_difflist\n" );
+ for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
+ for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
+
+
+ eqseq = calloc( len+1, sizeof( char ) );
+ for( i=0; i<len; i++ )
+ {
+ allgap = 0;
+ for( j=0; j<on; j++ )
+ {
+ tmpc = oseq[j][i];
+ if( tmpc != '-' && tmpc != '=' ) break;
+ }
+ if( j == on )
+ allgap = 1;
+
+ if( allgap )
+ {
+ eqseq[i] = '=';
+ }
+ else
+ {
+ eqseq[i] = 'o';
+ }
+ }
+
+ for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
+ reporterr( "\ne = %s\n", eqseq );
+ for( j=0; j<1; j++ ) reporterr( "a = %s\n", aseq[j] );
+
+ if( difflist )
+ {
+ for( j=0; j<an; j++ )
+ {
+ ndel = 0;
+ for( i=0,q=0,p=0; i<len; i++ ) // 0 origin
+ {
+ tmpc = aseq[j][i];
+ if( eqseq[i] != '=' ) p++;
+ if( tmpc != '-' && tmpc != '=' ) q++;
+
+#if 0
+ if( eqseq[i] == '=' && ( tmpc != '-' && tmpc != '=' ) )
+ {
+ difflist[j] = realloc( difflist[j], sizeof( GapPos ) * (ndel+2) );
+ difflist[j][ndel].pos = -p;
+ difflist[j][ndel].len = 1;
+ ndel++;
+ }
+ if( eqseq[i] != '=' && ( tmpc == '-' || tmpc == '=' ) )
+ {
+// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
+ difflist[j] = realloc( difflist[j], sizeof( GapPos ) * (ndel+2) );
+ difflist[j][ndel].pos = p;
+ difflist[j][ndel].len = 1;
+ ndel++;
+ }
+#else
+ istart = i;
+ pstart = p;
+ while( eqseq[i] == '=' && ( tmpc != '-' && tmpc != '=' ) )
+ {
+ i++;
+ tmpc = aseq[j][i];
+ }
+ if( i != istart )
+ {
+ difflist[j] = realloc( difflist[j], sizeof( GapPos ) * (ndel+2) );
+ difflist[j][ndel].pos = pstart;
+ difflist[j][ndel].len = -(i-istart);
+ ndel++;
+ }
+ istart = i;
+ pstart = p;
+ while( eqseq[i] != '=' && ( tmpc == '-' || tmpc == '=' ) )
+ {
+ i++;
+ tmpc = aseq[j][i];
+ }
+ if( i != istart )
+ {
+ difflist[j] = realloc( difflist[j], sizeof( GapPos ) * (ndel+2) );
+ difflist[j][ndel].pos = pstart;
+ difflist[j][ndel].len = i-istart;
+ ndel++;
+ }
+#endif
+ }
+ difflist[j][ndel].pos = -1;
+ difflist[j][ndel].len = 0;
+ for( i=0; ; i++ )
+ {
+ if( difflist[j][i].pos == -1 ) break;
+ reporterr( "sequence %d,%d, pos=%d,len=%d\n", j, i, difflist[j][i].pos, difflist[j][i].len );
+ }
+ }
+
+ }
+exit( 1 );
+ for( i=0,p=0; i<len; i++ )
+ {
+
+// if( oseq[0][i] != '=' )
+// reporterr( "i=%d, p=%d, q=%d, originally, %c\n", i, p, q, originallygapped[p]);
+// if( eqseq[i] != '=' && originallygapped[p] != '-' ) // dame!!
+ if( eqseq[i] != '=' )
+ {
+// reporterr( "COPY! p=%d\n", p );
+ if( p != i )
+ {
+ for( j=0; j<on; j++ ) oseq[j][p] = oseq[j][i];
+ for( j=0; j<an; j++ ) aseq[j][p] = aseq[j][i];
+ }
+ p++;
+ }
+ }
+// reporterr( "deletemap = %s\n", deletemap );
+// reporterr( "eqseq = %s\n", eqseq );
+// reporterr( "originallygapped = %s\n", originallygapped );
+ for( j=0; j<on; j++ ) oseq[j][p] = 0;
+ for( j=0; j<an; j++ ) aseq[j][p] = 0;
+
+ free( eqseq );
+
+// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
+// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
+
+ return( i-p );
+}
+#endif
+
+int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, GapPos **deletelist )
+{
+ int i, j, p, q, allgap, ndel, qstart;
int len = strlen( oseq[0] );
char *eqseq, tmpc;
@@ -1551,6 +1690,7 @@ int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int
eqseq[i] = 'o';
}
}
+ eqseq[len] = 0; // hitsuyou
// for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
// reporterr( "\ne = %s\n", eqseq );
@@ -1564,19 +1704,40 @@ int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int
for( i=0,q=0; i<len; i++ )
{
tmpc = aseq[j][i];
+#if 0
if( tmpc != '-' && tmpc != '=' )
{
if( eqseq[i] == '=' )
{
// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
- deletelist[j] = realloc( deletelist[j], sizeof( int ) * (ndel+2) );
- deletelist[j][ndel] = q;
+ deletelist[j] = realloc( deletelist[j], sizeof( GapPos ) * (ndel+2) );
+ deletelist[j][ndel].pos = q;
+ deletelist[j][ndel].len = 1;
ndel++;
}
q++;
}
+#else
+ qstart = q;
+ while( (tmpc != '-' && tmpc != '=') && eqseq[i] == '=' )
+ {
+ i++;
+ q++;
+ tmpc = aseq[j][i];
+ }
+ if( qstart != q )
+ {
+// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
+ deletelist[j] = realloc( deletelist[j], sizeof( GapPos ) * (ndel+2) );
+ deletelist[j][ndel].pos = qstart;
+ deletelist[j][ndel].len = q-qstart;
+ ndel++;
+ }
+ if( tmpc != '-' && tmpc != '=' ) q++;
+#endif
}
- deletelist[j][ndel] = -1;
+ deletelist[j][ndel].pos = -1;
+ deletelist[j][ndel].len = 0;
}
}
for( i=0,p=0; i<len; i++ )
@@ -1610,9 +1771,10 @@ int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int
return( i-p );
}
-int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **deletelist )
+#if 1 // 2022/Jan. disttbfast to tbfast niha hitsuyou.
+int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, GapPos **deletelist )
{
- int i, j, p, q, allgap, ndel;
+ int i, j, p, q, allgap, ndel, qstart;
int len = strlen( oseq[0] );
char *eqseq, tmpc;
@@ -1641,6 +1803,7 @@ int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **d
eqseq[i] = 'o';
}
}
+ eqseq[len] = 0;
// for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
// reporterr( "\ne = %s\n", eqseq );
@@ -1654,6 +1817,7 @@ int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **d
for( i=0,q=0; i<len; i++ )
{
tmpc = aseq[j][i];
+#if 0
if( tmpc != '-' )
{
if( eqseq[i] == '=' )
@@ -1665,8 +1829,27 @@ int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **d
}
q++;
}
+#else
+ qstart = q;
+ while( tmpc != '-' && eqseq[i] == '=' )
+ {
+ i++;
+ q++;
+ tmpc = aseq[j][i];
+ }
+ if( qstart != q )
+ {
+// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
+ deletelist[j] = realloc( deletelist[j], sizeof( GapPos ) * (ndel+2) );
+ deletelist[j][ndel].pos = qstart;
+ deletelist[j][ndel].len = q-qstart;
+ ndel++;
+ }
+ if( tmpc != '-' ) q++;
+#endif
}
- deletelist[j][ndel] = -1;
+ deletelist[j][ndel].pos = -1;
+ deletelist[j][ndel].len = 0;
}
}
for( i=0,p=0; i<len; i++ )
@@ -1701,7 +1884,6 @@ int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **d
}
-#if 0
int maskoriginalgaps( char *repseq, char *originallygapped )
{
int i, p;
@@ -1800,9 +1982,9 @@ void restoreoriginalgaps( int n, char **seq, char *originalgaps )
free( tmpseq );
}
-void reconstructdeletemap( int nadd, char **addbk, int **deletelist, char **realn, FILE *fp, char **name )
+void reconstructdeletemap( int nadd, char **addbk, GapPos **deletelist, char **realn, FILE *fp, char **name )
{
- int i, j, p, len;
+ int i, j, p, len, gaplen;
char *gapped, *nameptr, *tmpptr;
for( i=0; i<nadd; i++ )
@@ -1821,12 +2003,22 @@ void reconstructdeletemap( int nadd, char **addbk, int **deletelist, char **real
fprintf( fp, ">%s\n", nameptr );
fprintf( fp, "# letter, position in the original sequence, position in the reference alignment\n" );
+#if 0
// reporterr( "addbk[%d] = %s\n", i, addbk[i] );
for( j=0; (p=deletelist[i][j])!=-1; j++ )
{
// reporterr( "deleting %d, %c\n", p, addbk[i][p] );
gapped[p] = '-';
}
+#else
+// reporterr( "addbk[%d] = %s\n", i, addbk[i] );
+ for( j=0; (p=deletelist[i][j].pos)!=-1; j++ )
+ {
+// reporterr( "deleting %d, %c\n", p, addbk[i][p] );
+ gaplen = deletelist[i][j].len;
+ while( gaplen-- ) gapped[p++] = '-';
+ }
+#endif
// reporterr( "addbk = %s\n", addbk[i] );
// reporterr( "gapped = %s\n", gapped );
@@ -1849,3 +2041,128 @@ void reconstructdeletemap( int nadd, char **addbk, int **deletelist, char **real
free( gapped );
}
}
+
+#define MODIFYNAME 1 // MODIFYNAME mo --anysymbol to ryouritsu, 7.502-
+
+void reconstructdeletemap_compact( int nadd, char **addbk, GapPos **deletelist, char **realn, FILE *fp, char **name )
+{
+ int i, j, p, len, nins, gaplen;
+ char *gapped, *nameptr, *tmpptr;
+ int status = 0;
+
+#if MODIFYNAME
+ char *newname, *insstr;
+ newname = calloc( B+100, sizeof( char ) );
+ insstr = calloc( 1000, sizeof( char ) );
+#endif
+ fprintf( fp, "# Insertion in added sequence > Position in reference\n" );
+ for( i=0; i<nadd; i++ )
+ {
+ len = strlen( addbk[i] );
+ gapped = calloc( len+1, sizeof( char ) );
+// reporterr( "len=%d", len );
+// for( j=0; j<len; j++ ) gapped[j] = 'o'; // iranai
+// gapped[len] = 0; // iranai
+
+ nameptr = name[i] + 1;
+ if( outnumber )
+ nameptr = strstr( nameptr, "_numo_e" ) + 8;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ status = 0;
+
+
+#if 0
+// reporterr( "addbk[%d] = %s\n", i, addbk[i] );
+ for( j=0; (p=deletelist[i][j])!=-1; j++ )
+ {
+// reporterr( "deleting %d, %c\n", p, addbk[i][p] );
+ gapped[p] = '-';
+ status = 1;
+ }
+#else
+// reporterr( "addbk[%d] = %s\n", i, addbk[i] );
+ for( j=0; (p=deletelist[i][j].pos)!=-1; j++ )
+ {
+// reporterr( "deleting %d-%d, %c\n", p, p+deletelist[i][j].len, addbk[i][p] );
+ gaplen = deletelist[i][j].len;
+ while( gaplen-- ) gapped[p++] = '-'; // origin??????????? 2022/Jan
+ status = 1;
+ }
+#endif
+
+// reporterr( "addbk = %s\n", addbk[i] );
+// reporterr( "gapped = %s\n", gapped );
+
+
+ if( status == 0 )
+ {
+ free( gapped );
+ continue;
+ }
+
+ fprintf( fp, ">%s\n", nameptr );
+
+ status = -1;
+#if MODIFYNAME
+ insstr[0] = 0;
+ nins = 0;
+#endif
+ for( j=0,p=0; j<len; j++ )
+ {
+ while( realn[i][p] == '-' )
+ p++;
+
+ if( gapped[j] == '-' )
+ {
+ if( status != 1 )
+ {
+ status = 1;
+ fprintf( fp, "%d%c - ", j+1, addbk[i][j] ); // 1origin
+#if MODIFYNAME
+ if( nins == 0 )
+ sprintf( insstr, "%d%c-", j+1, addbk[i][j] );
+ else if( nins == 1 )
+ sprintf( insstr+strlen(insstr), "etc," );
+ nins++;
+#endif
+ }
+// fprintf( fp, "%c, %d, -\n", addbk[i][j], j+1 ); // 1origin
+ }
+ else
+ {
+ if( status == 1 )
+ {
+ fprintf( fp, "%d%c > %dv%d\n", j, addbk[i][j-1], p, p+1 ); // 1origin
+#if MODIFYNAME
+ if( nins == 1 ) sprintf( insstr+strlen(insstr), "%d%c,", j, addbk[i][j-1] );
+#endif
+ }
+ status = 0;
+// fprintf( fp, "%c, %d, %d\n", addbk[i][j], j+1, p+1 ); // 1origin
+ p++;
+ }
+ }
+ if( status == 1 )
+ {
+ fprintf( fp, "%d%c > %dv%d\n", j, addbk[i][j-1], p, p+1 ); // 1origin
+#if MODIFYNAME
+ if( nins == 1 ) sprintf( insstr+strlen(insstr), "%d%c,", j, addbk[i][j-1] );
+#endif
+ }
+ free( gapped );
+
+#if MODIFYNAME
+ insstr[strlen(insstr)-1] = 0;
+ strcpy( newname, name[i] );
+ sprintf( newname+(nameptr-name[i]), "%dins:%s|%s", nins, insstr, nameptr );
+ newname[B] = 0;
+ strcpy( name[i], newname );
+#endif
+ }
+#if MODIFYNAME
+ free( newname );
+ free( insstr );
+#endif
+}
=====================================
core/addsingle.c
=====================================
@@ -14,7 +14,9 @@ static int noalign;
static int multidist;
static int maxdist = 2; // scale -> 2bai
static int allowlongadds;
+static int addtotop;
static int keeplength;
+static int diffout; // Incomplete. Not used
static int ndeleted;
static int mapout;
static int smoothing;
@@ -55,7 +57,8 @@ typedef struct _thread_arg
int ***topol;
double **len;
Addtree *addtree;
- int **deletelist;
+ GapPos **deletelist;
+ GapPos **difflist;
#ifdef enablemultithread
int *iaddshare;
int thread_no;
@@ -793,6 +796,8 @@ void arguments( int argc, char *argv[] )
int c;
nthread = 1;
+ codonpos = 0;
+ codonscore = 0;
outnumber = 0;
scoreout = 0;
treein = 0;
@@ -855,7 +860,9 @@ void arguments( int argc, char *argv[] )
tuplesize = -1;
legacygapcost = 0;
allowlongadds = 0;
+ addtotop = 0;
keeplength = 0;
+ diffout = 0;
mapout = 0;
smoothing = 0;
distout = 0;
@@ -949,6 +956,12 @@ void arguments( int argc, char *argv[] )
fprintf( stderr, "nthread = %d\n", nthread );
--argc;
goto nextoption;
+ case 'R':
+ codonpos = 1;
+ break;
+ case 'S':
+ codonscore = 1;
+ break;
#if 0
case 'R':
rnaprediction = 'r';
@@ -993,9 +1006,9 @@ void arguments( int argc, char *argv[] )
fftNoAnchStop = 1;
break;
#endif
- case 'S':
- scoreout = 1;
- break;
+// case 'S':
+// scoreout = 1;
+// break;
#if 0
case 'e':
fftscore = 0;
@@ -1057,6 +1070,9 @@ void arguments( int argc, char *argv[] )
case 'U':
treein = 1;
break;
+ case 'x':
+ addtotop = 1;
+ break;
case 'V':
allowlongadds = 1;
break;
@@ -1092,10 +1108,12 @@ void arguments( int argc, char *argv[] )
tbutree = 0;
break;
/* modification end. */
+#if 0
case 'z':
fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
+#endif
case 'w':
fftWinSize = myatoi( *++argv );
--argc;
@@ -1111,9 +1129,15 @@ void arguments( int argc, char *argv[] )
case 'Y':
keeplength = 1;
break;
+ case 'z':
+ mapout = 2;
+ break;
case 'Z':
mapout = 1;
break;
+ case '%':
+ diffout = 1;
+ break;
case ':':
nwildcard = 1;
break;
@@ -1398,6 +1422,11 @@ static double treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeo
if( constraint == 2 )
{
+ if( codonpos )
+ {
+ reporterr( "\n\nThe --codonpos option is supported only for --6merpair --addfragments at this point. Add the --6merpair flag, for now.\n\n\n" );
+ exit( 1 );
+ }
if( alg == 'M' )
{
fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" );
@@ -1598,7 +1627,8 @@ static void *addsinglethread( void *arg )
int ***topol = targ->topol;
double **len = targ->len;
Addtree *addtree = targ->addtree;
- int **deletelist = targ->deletelist;
+ GapPos **deletelist = targ->deletelist;
+ GapPos **difflist = targ->difflist;
double pscore;
int *alnleninnode = NULL;
char *targetseq;
@@ -1807,7 +1837,11 @@ static void *addsinglethread( void *arg )
// {
// }
// fixed_musclesupg_double_realloc_nobk_halfmtx( njobc, iscorec, topolc, lenc, depc, 0, 1 );
- neighbor = addonetip( njobc, topolc, lenc, iscorec, topol, len, dep, treeout, addtree, iadd, name, alnleninnode, nogaplenjusttodecideaddhereornot, noalign );
+
+ if( addtotop )
+ neighbor = addonetip2top( njobc, topolc, lenc, iscorec, topol, len, dep, treeout, addtree, iadd, name, alnleninnode, nogaplenjusttodecideaddhereornot, noalign );
+ else
+ neighbor = addonetip( njobc, topolc, lenc, iscorec, topol, len, dep, treeout, addtree, iadd, name, alnleninnode, nogaplenjusttodecideaddhereornot, noalign );
FreeFloatHalfMtx( iscorec, njobc );
@@ -1944,8 +1978,16 @@ static void *addsinglethread( void *arg )
// strcpy( seq[norg+iadd], bseq[norg] );
-
- if( keeplength )
+
+ if( diffout )
+ {
+ reporterr( "Not yet written\n" );
+ exit( 1 );
+// deletenewinsertions_difflist( norg, 1, bseq, bseq+norg, difflist+iadd );
+// strcpy( targetseq, bseq[norg] );
+// i = norg; // no new gap!!
+ }
+ else if( keeplength )
{
// reporterr( "deletelist = %p\n", deletelist );
// reporterr( "deletelist+iadd = %p\n", deletelist+iadd );
@@ -2942,7 +2984,8 @@ int main( int argc, char *argv[] )
double **len;
double **iscoreo, **nscore;
FILE *fp;
- int **deletelist = NULL;
+ GapPos **deletelist = NULL;
+ GapPos **difflist = NULL;
char **addbk = NULL;
char *originalgaps = NULL;
Addtree *addtree;
@@ -3269,32 +3312,51 @@ int main( int argc, char *argv[] )
singlerna = NULL;
- if( keeplength )
+ if( diffout )
+ {
+ lenfull = strlen( seq[0] );
+ originalgaps = (char *)calloc( lenfull+1, sizeof( char ) );
+ recordoriginalgaps( originalgaps, norg, seq );
+
+ difflist = (GapPos **)calloc( nadd+1, sizeof( GapPos * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ difflist[i] = calloc( 1, sizeof( GapPos ) );
+ difflist[i][0].pos = -1;
+ difflist[i][0].len = 0;
+ }
+ difflist[nadd] = NULL;
+ deletelist = NULL;
+ }
+
+ else if( keeplength )
{
lenfull = strlen( seq[0] );
- originalgaps = (char *)calloc( lenfull+1, sizeof( char) );
+ originalgaps = (char *)calloc( lenfull+1, sizeof( char ) );
recordoriginalgaps( originalgaps, norg, seq );
- deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+ deletelist = (GapPos **)calloc( nadd+1, sizeof( GapPos * ) );
for( i=0; i<nadd; i++ )
{
- deletelist[i] = calloc( 1, sizeof( int ) );
- deletelist[i][0] = -1;
+ deletelist[i] = calloc( 1, sizeof( GapPos ) );
+ deletelist[i][0].pos = -1;
+ deletelist[i][0].len = 0;
}
deletelist[nadd] = NULL;
-
+ difflist = NULL;
}
else
{
originalgaps = NULL;
deletelist = NULL;
+ difflist = NULL;
}
commongappick( norg, seq );
lenfull = strlen( seq[0] );
- if( ratioambiguouscolumn_simple( seq, norg, lenfull ) > 0.03 )
+ if( 0 && ratioambiguouscolumn_simple( seq, norg, lenfull ) > 0.03 )
{
fprintf( stderr, "################################################################################\n" );
fprintf( stderr, "# \n" );
@@ -3418,6 +3480,7 @@ int main( int argc, char *argv[] )
targ[i].len = len;
targ[i].addtree = addtree;
targ[i].deletelist = deletelist;
+ targ[i].difflist = difflist;
targ[i].mutex_counter = &mutex_counter;
pthread_create( handle+i, NULL, addsinglethread, (void *)(targ+i) );
}
@@ -3454,6 +3517,7 @@ int main( int argc, char *argv[] )
targ[0].len = len;
targ[0].addtree = addtree;
targ[0].deletelist = deletelist;
+ targ[0].difflist = difflist;
addsinglethread( targ );
free( targ );
}
@@ -3721,15 +3785,18 @@ int main( int argc, char *argv[] )
for( i=0; i<nadd; i++ )
{
if( deletelist[i] )
- for( j=0; deletelist[i][j]!=-1; j++ )
- fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ for( j=0; deletelist[i][j].pos!=-1; j++ )
+ fprintf( dlf, "%d %d %d\n", njob-nadd+i, deletelist[i][j].pos, deletelist[i][j].len ); // 0origin
}
fclose( dlf );
if( mapout )
{
dlf = fopen( "_deletemap", "w" );
- reconstructdeletemap( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
+ if( mapout == 1 )
+ reconstructdeletemap( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
+ else
+ reconstructdeletemap_compact( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
fclose( dlf );
}
}
@@ -3752,7 +3819,18 @@ int main( int argc, char *argv[] )
FreeCharMtx( tmpseq );
freeconstants();
if( addbk ) FreeCharMtx( addbk ); addbk = NULL;
- if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+ if( deletelist )
+ {
+ for( i=0; deletelist[i] != NULL; i++ ) free( deletelist[i] );
+ free( deletelist );
+ deletelist = NULL;
+ }
+ if( difflist )
+ {
+ for( i=0; difflist[i] != NULL; i++ ) free( difflist[i] );
+ free( difflist );
+ difflist = NULL;
+ }
if( originalgaps ) free( originalgaps ); originalgaps = NULL;
writeData_pointer( prep_g, njob, name, nlen, seq );
=====================================
core/defs.c
=====================================
@@ -131,6 +131,8 @@ double sueff_global = SUEFF;
double lenfaca, lenfacb, lenfacc, lenfacd;
int maxl, tsize;
+char codonpos = 0;
+char codonscore = 0;
void initglobalvariables()
=====================================
core/disttbfast.c
=====================================
@@ -459,10 +459,12 @@ void arguments( int argc, char *argv[] )
case 'J':
tbutree = 0;
break;
+#if 0
case 'z':
fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
+#endif
case 'w':
fftWinSize = myatoi( *++argv );
--argc;
@@ -479,6 +481,9 @@ void arguments( int argc, char *argv[] )
case 'Y':
keeplength = 1;
break;
+ case 'z':
+ mapout = 2;
+ break;
case 'Z':
mapout = 1;
break;
@@ -3466,7 +3471,7 @@ int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, cha
char algbackup;
char *originalgaps = NULL;
char **addbk = NULL;
- int **deletelist = NULL;
+ GapPos **deletelist = NULL;
FILE *dlf = NULL;
int randomseed;
int **localmem = NULL;
@@ -5031,11 +5036,12 @@ int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, cha
{
dlf = fopen( "_deletelist", "w" );
- deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+ deletelist = (GapPos **)calloc( nadd+1, sizeof( GapPos * ) );
for( i=0; i<nadd; i++ )
{
- deletelist[i] = calloc( 1, sizeof( int ) );
- deletelist[i][0] = -1;
+ deletelist[i] = calloc( 1, sizeof( GapPos ) );
+ deletelist[i][0].pos = -1;
+ deletelist[i][0].len = 0;
}
deletelist[nadd] = NULL;
ndeleted = deletenewinsertions_whole( njob-nadd, nadd, bseq, bseq+njob-nadd, deletelist );
@@ -5043,8 +5049,9 @@ int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, cha
for( i=0; i<nadd; i++ )
{
if( deletelist[i] )
- for( j=0; deletelist[i][j]!=-1; j++ )
- fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ for( j=0; deletelist[i][j].pos!=-1; j++ )
+// fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ fprintf( dlf, "%d %d %d\n", njob-nadd+i, deletelist[i][j].pos, deletelist[i][j].len ); // 0origin
}
fclose( dlf );
@@ -5054,13 +5061,19 @@ int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, cha
if( mapout )
{
dlf = fopen( "_deletemap", "w" );
- reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+ if( mapout == 1 )
+ reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+ else
+ reconstructdeletemap_compact( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
FreeCharMtx( addbk );
addbk = NULL;
fclose( dlf );
}
- FreeIntMtx( deletelist );
+// FreeIntMtx( deletelist );
+// deletelist = NULL;
+ for( i=0; deletelist[i] != NULL; i++ ) free( deletelist[i] );
+ free( deletelist );
deletelist = NULL;
}
@@ -5157,7 +5170,12 @@ int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, cha
closeFiles();
FreeCommonIP();
if( originalgaps ) free( originalgaps ); originalgaps = NULL;
- if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+ if( deletelist )
+ {
+ for( i=0; deletelist[i] != NULL; i++ ) free( deletelist[i] );
+ free( deletelist );
+ deletelist = NULL;
+ }
// use_getrusage();
@@ -5205,7 +5223,12 @@ chudan:
if( selfscore ) free( selfscore ); selfscore = NULL;
if( skiptable ) FreeIntMtx( skiptable ); skiptable = NULL;
if( originalgaps ) free( originalgaps ); originalgaps = NULL;
- if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+ if( deletelist )
+ {
+ for( i=0; deletelist[i] != NULL; i++ ) free( deletelist[i] );
+ free( deletelist );
+ deletelist = NULL;
+ }
if( subtable ) FreeIntMtx( subtable ); subtable = NULL;
=====================================
core/filter.c
=====================================
@@ -21,6 +21,32 @@ static double count_unusual( char *seq, char *usual )
return( (double)count / (pt-seq) );
}
+static void shortenN( char *seq, char unknown )
+{
+ int i;
+ int status;
+ char *out = seq;
+ int unknownU = toupper(unknown);
+ status = 0;
+ while( *seq )
+ {
+ if( unknownU != toupper(*seq) ) // hikouritsu?
+ {
+ *out++ = *seq++;
+ status = 0;
+ }
+ else if( status == 0 )
+ {
+ *out++ = unknown;
+ seq++;
+ status = 1;
+ }
+ else
+ seq++;
+ }
+ *out = 0;
+}
+
void arguments( int argc, char *argv[] )
{
@@ -79,6 +105,7 @@ int main( int argc, char *argv[] )
int *nlen;
int i;
char *usual;
+ char unknown;
int nout;
char *tmpseq;
@@ -132,17 +159,26 @@ int main( int argc, char *argv[] )
#endif
if( dorp == 'p' )
+ {
usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-";
+ unknown = 'X';
+ }
else
+ {
usual = "ATGCUatgcu-";
+ unknown = 'n';
+ }
nout = 0;
for( i=0; i<njob; i++ )
{
gappick0( tmpseq, seq[i] );
if( count_unusual( tmpseq, usual ) <= maxunusual )
{
+ shortenN( tmpseq, unknown ); // 2022/Apr
+
fprintf( stdout, ">%s\n", name[i]+1 );
- fprintf( stdout, "%s\n", seq[i] );
+// fprintf( stdout, "%s\n", seq[i] );
+ fprintf( stdout, "%s\n", tmpseq ); // 2022/Apr
nout++;
}
}
=====================================
core/functions.h
=====================================
@@ -162,6 +162,7 @@ extern void part_imp_match_init( double *imp, int clus1, int clus2, int lgth1, i
extern double partA__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *, int *, int, int * );
extern double partA__align_variousdist( int **which, double ***scoringmatrices, double **dummtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *, int *, int, int * );
extern double G__align11( double **scoringmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp );
+extern double G__align11psg( double **codonmtx, double **scoringmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp, double *gstart, double *gend );
extern double G__align11_noalign( double **scoringmtx, int penal, int penal_ex, char **seq1, char **seq2, int alloclen );
extern double L__align11( double **scoringmtx, double scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt );
extern double L__align11_noalign( double **scoringmtx, char **seq1, char **seq2 );
@@ -361,6 +362,7 @@ extern int overlapmember( int *mem1, int *mem2 );
extern void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg );
extern void sreverse( char *r, char *s );
extern int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign );
+extern int addonetip2top( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign );
extern void intcpy( int *s1, int *s2 );
extern void intncpy( int *s1, int *s2, int n );
extern void fltncpy( double *s1, double *s2, int n );
@@ -387,11 +389,13 @@ extern double sumofpairsscore( int nseq, char **seq );
//extern void restoregaponlysites( char *originallygapped, int n1, int n2, char **s1, char **s2, int rep );
extern int isallgap( char * );
-extern int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **deletelist );
-extern int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int **deletelist );
+extern int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, GapPos **deletelist );
+extern int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, GapPos **deletelist );
+extern int deletenewinsertions_difflist( int on, int an, char **oseq, char **aseq, GapPos **difflist );
extern int recordoriginalgaps( char *originallygapped, int n, char **s );
extern void restoreoriginalgaps( int n, char **seq, char *originalgaps );
-extern void reconstructdeletemap( int nadd, char ** addbk, int **deletelist, char **realn, FILE *fp, char **name );
+extern void reconstructdeletemap( int nadd, char ** addbk, GapPos **deletelist, char **realn, FILE *fp, char **name );
+extern void reconstructdeletemap_compact( int nadd, char ** addbk, GapPos **deletelist, char **realn, FILE *fp, char **name );
extern double D__align( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp );
extern double D__align_ls( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp );
extern double D__align_variousdist( int **whichmtx, double ***matrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, int constraint, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp );
@@ -425,3 +429,5 @@ extern void limitlh( int *uselh, Lennum *in, int size, int limit );
extern double distdp_noalign( char *s1, char *s2, double selfscore1, double selfscore2, int alloclen ); // tbfast.c kara yobareru
extern void getweightfromname( int n, double *w, char **name );
extern void readexternalanchors( ExtAnch **extanch, int nseq, int *nogaplen );
+extern double **loadcodonscore( FILE *, double **mtx );
+extern int codon2id( char * );
=====================================
core/io.c
=====================================
@@ -1742,6 +1742,7 @@ void readData_pointer( FILE *fp, char **name, int *nlen, char **seq )
name[i][j-1] = 0;
#else
myfgets( name[i]+1, B-2, fp );
+// reporterr( "B=%d, name[i]+1=%s\n", B, name[i]+1 );
#endif
#if 0
fprintf( stderr, "name[%d] = %s\n", i, name[i] );
@@ -2156,8 +2157,11 @@ void getnumlen_casepreserve( FILE *fp, int *nlenminpt )
tmp = strlen( tmpseq );
if( tmp > nlenmax ) nlenmax = tmp;
if( tmp < *nlenminpt ) *nlenminpt = tmp;
- atgcnum += countATGC( tmpseq, &nsite );
- total += nsite;
+ if( total < 1000000 )
+ {
+ atgcnum += countATGC( tmpseq, &nsite );
+ total += nsite;
+ }
free( tmpseq );
}
free( tmpname );
@@ -2201,10 +2205,14 @@ void getnumlen_nogap_countn( FILE *fp, int *nlenminpt, double *nfreq )
tmp = countnogaplen( tmpseq );
if( tmp > nlenmax ) nlenmax = tmp;
if( tmp < *nlenminpt ) *nlenminpt = tmp;
- atgcnum += countATGCandN( tmpseq, &nN, &nsite );
- total += nsite;
+ if( total < 100000 )
+ {
+ atgcnum += countATGCandN( tmpseq, &nN, &nsite );
+ total += nsite;
+ }
nnum += nN;
free( tmpseq );
+// if( i % 10000 == 0 ) reporterr( "atgcnum=%d, total=%d\n", atgcnum, total );
}
free( tmpname );
atgcfreq = (double)atgcnum / total;
@@ -2251,8 +2259,11 @@ void getnumlen_nogap( FILE *fp, int *nlenminpt )
tmp = countnogaplen( tmpseq );
if( tmp > nlenmax ) nlenmax = tmp;
if( tmp < *nlenminpt ) *nlenminpt = tmp;
- atgcnum += countATGC( tmpseq, &nsite );
- total += nsite;
+ if( total < 100000 )
+ {
+ atgcnum += countATGC( tmpseq, &nsite );
+ total += nsite;
+ }
free( tmpseq );
}
free( tmpname );
@@ -2302,8 +2313,11 @@ void getnumlen_nogap_outallreg( FILE *fp, int *nlenminpt )
fprintf( stdout, "%d\n", tmp );
if( tmp > nlenmax ) nlenmax = tmp;
if( tmp < *nlenminpt ) *nlenminpt = tmp;
- atgcnum += countATGC( tmpseq, &nsite );
- total += nsite;
+ if( total < 100000 )
+ {
+ atgcnum += countATGC( tmpseq, &nsite );
+ total += nsite;
+ }
free( tmpseq );
}
free( tmpname );
@@ -2529,8 +2543,11 @@ void getnumlen( FILE *fp )
tmpseq = load1SeqWithoutName_realloc( fp );
tmp = strlen( tmpseq );
if( tmp > nlenmax ) nlenmax = tmp;
- atgcnum += countATGC( tmpseq, &nsite );
- total += nsite;
+ if( total < 1000000 )
+ {
+ atgcnum += countATGC( tmpseq, &nsite );
+ total += nsite;
+ }
// fprintf( stderr, "##### total = %d\n", total );
free( tmpseq );
}
@@ -6370,3 +6387,93 @@ void readexternalanchors( ExtAnch **extanch, int nseq, int *nogaplen )
}
fclose( fp );
}
+
+static char id2nuc( int id ) // just for error message
+{
+ if( id == 0 ) return 't';
+ if( id == 1 ) return 'c';
+ if( id == 2 ) return 'a';
+ if( id == 3 ) return 'g';
+ return -1;
+}
+static void id2codon( int id, char *codon ) // just for error message
+{
+ codon[0] = id2nuc( id/16 );
+ id = id - 16 * id/16;
+ codon[1] = id2nuc( id/4 );
+ id = id - 4 * id/4;
+ codon[2] = id2nuc( id );
+ return;
+}
+
+static int nuc2id( char nuc )
+{
+ if( nuc == 't' ) return 0;
+ if( nuc == 'c' ) return 1;
+ if( nuc == 'a' ) return 2;
+ if( nuc == 'g' ) return 3;
+ return -1;
+}
+
+int codon2id( char *codon )
+{
+ int id0, id1, id2, id;
+ id0 = nuc2id( codon[0] );
+ id1 = nuc2id( codon[1] );
+ id2 = nuc2id( codon[2] );
+
+ if( id0 < 0 || id1 < 0 | id2 < 0 ) return( -1 );
+
+ return id0 * 16 + id1 * 4 + id2;
+}
+
+double **loadcodonscore( FILE *fp, double **scoremtx )
+{
+ int i, j;
+ char *buf = calloc( sizeof( char ), 1000 );
+ char codon1[3], codon2[3];
+ char aa1[1000], aa2[1000];
+ int id1, id2;
+ double score;
+ for( i=0; i<64; i++ ) for( j=0; j<64; j++ ) scoremtx[i][j] = -99999;
+ i = 0;
+ while( fgets( buf, 1000, fp ) )
+ {
+// reporterr( "%s", buf );
+ if( buf[0] == '#' ) continue;
+ if( buf[strlen(buf)-1] != '\n' )
+ {
+ reporterr( "%s: too long in codonscore file.\n", buf );
+ exit( 1 );
+ }
+ sscanf( buf, "%3s %s %3s %s %lf", codon1, aa1, codon2, aa2, &score );
+// reporterr( "codon1=%s, id=%d\n", codon1, codon2id( codon1 ) );
+// reporterr( "codon2=%s, id=%d\n", codon2, codon2id( codon2 ) );
+// reporterr( "score=%f\n", score );
+ id1 = codon2id( codon1 );
+ id2 = codon2id( codon2 );
+ if( id1 < 0 || id2 < 0 )
+ {
+ reporterr( "Cannot use codon pair %s - %s: Use small letter, a, c, g, t (instead of u)\n", codon1, codon2 );
+ exit( 1 );
+ }
+ scoremtx[id1][id2] = scoremtx[id2][id1] = score;
+ i++;
+ }
+ free( buf );
+
+ for( i=0; i<64; i++ ) for( j=0; j<64; j++ )
+ {
+ char codon[3];
+ if( scoremtx[i][j] == -99999 )
+ {
+ id2codon( i, codon );
+ reporterr( "\nCodon score for %s", codon );
+ id2codon( j, codon );
+ reporterr( "-%s (id%d-id%d) is not given.\n", codon, i, j );
+ exit( 1 );
+ }
+// reporterr( "id%d-id%d = %f\n", i, j, scoremtx[i][j] );
+ }
+ return( scoremtx );
+}
=====================================
core/mafft.tmpl
=====================================
@@ -1,7 +1,7 @@
#! /bin/bash
er=0;
myself=`dirname "$0"`/`basename "$0"`; export myself
-version="v7.490 (2021/Oct/30)"; export version
+version="v7.505 (2022/Apr/10)"; export version
LANG=C; export LANG
os=`uname`
progname=`basename "$0"`
@@ -252,12 +252,15 @@ strdir="$PWD"
scorematrix="/dev/null"
textmatrix="/dev/null"
treeinfile="/dev/null"
+codonposfile="/dev/null"
+codonscorefile="/dev/null"
rnascoremtx=" "
laraparams="/dev/null"
foldalignopt=" "
treealg=" -X 0.1 "
sueff="1.0"
maxambiguous="1.0"
+dofilter=0
scoreoutarg=" "
numthreads=0
numthreadsit=-1
@@ -410,6 +413,25 @@ if [ $# -gt 0 ]; then
elif [ "$1" = "--maxambiguous" ]; then
shift
maxambiguous="$1"
+ dofilter=1
+ elif [ "$1" = "--codonpos" ]; then
+ shift
+ codonposfile="$1"
+ if [ ! -e "$codonposfile" ]; then
+ echo "Cannot open $codonposfile" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ codonposopt=" -R "
+ elif [ "$1" = "--codonscore" ]; then
+ shift
+ codonscorefile="$1"
+ if [ ! -e "$codonscorefile" ]; then
+ echo "Cannot open $codonscorefile" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ codonscoreopt=" -S "
elif [ "$1" = "--noscore" ]; then
scorecalcopt=" -Z "
elif [ "$1" = "--6mermultipair" ]; then
@@ -645,10 +667,21 @@ if [ $# -gt 0 ]; then
addarg0="-K -I"
addfile="$1"
fragment=-2
+ elif [ "$1" = "--addtotop" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-3
elif [ "$1" = "--smoothing" ]; then
add2ndhalfarg=$add2ndhalfarg" -p "
elif [ "$1" = "--keeplength" ]; then
add2ndhalfarg=$add2ndhalfarg" -Y "
+ elif [ "$1" = "--compactmapout" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -z -Y "
+ elif [ "$1" = "--compactmapoutfile" ]; then
+ shift
+ add2ndhalfarg=$add2ndhalfarg" -z -Y "
+ mapoutfile="$1"
elif [ "$1" = "--mapout" ]; then
add2ndhalfarg=$add2ndhalfarg" -Z -Y "
elif [ "$1" = "--mapoutfile" ]; then
@@ -1079,9 +1112,9 @@ function removetmpfile() { # for MPI
echo "" >> "$TMPFILE/infile"
cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile"
- if [ $maxambiguous != "1.0" ]; then
- mv "$TMPFILE/infile" "$TMPFILE/_tofilter"
- "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1
+ if [ $dofilter -eq 1 ]; then
+# mv "$TMPFILE/infile" "$TMPFILE/_tofilter"
+# "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1
mv "$TMPFILE/_addfile" "$TMPFILE/_tofilter"
"$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/_addfile" 2>>"$progressfile" || exit 1
fi
@@ -1090,6 +1123,8 @@ function removetmpfile() { # for MPI
cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx"
cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable"
cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree"
+ cat "$codonposfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_codonpos"
+ cat "$codonscorefile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_codonscore"
cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_seedtablefile"
cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_lara.params"
cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/pdblist"
@@ -1104,6 +1139,8 @@ $addfile
$scorematrix
$mergetable
$treeinfile
+$codonposfile
+$codonscorefile
$seedtablefile
$laraparams
$pdblist
@@ -1335,10 +1372,11 @@ $ownlist"
exit 1;
fi
# nagasa check!
-#
- if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct
+# if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct
+ if [ $npair -gt 10000000 -o $nlen -gt 5000 -o $nadd -gt 500000 ]; then # 2021/Dec pairlocalalign to buntan
distance="ktuples"
echo "use ktuples, size=$tuplesize!" 1>>"$progressfile"
+# elif [ $npair -gt 3000000 -o $nlen -gt 5000 ]; then # 2017/Oct
elif [ $npair -gt 3000000 -o $nlen -gt 5000 ]; then # 2017/Oct
distance="multi"
weighti="0.0"
@@ -2000,6 +2038,11 @@ $ownlist"
laof=0.0 # 2015Jun01
lexp=0.0 # 2015Jun01
iterate=0
+ elif [ $fragment -eq "-3" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-x" # add to top, 2021/12/31
+ cycle=1 # chuui 2014Aug25
+ iterate=0
else
addarg="$addarg0 $nadd"
addsinglearg=""
@@ -2032,6 +2075,21 @@ $ownlist"
fi
fi
+ if [ "$codonposfile" != "/dev/null" -o "$codonscorefile" != "/dev/null" ]; then
+ if [ $nadd -eq "0" -o $fragment -eq "0" ]; then
+ echo '' 1>>"$progressfile"
+ echo "'--codonpos' and '--codonscore' options are supported only with the '--6merpair --addfragments' option." 1>>"$progressfile"
+ echo '' 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $distance != "ktuples" ]; then # ato de taiou
+ echo '' 1>>"$progressfile"
+ echo "'--codonpos' and '--codonscore' options are supported only with the '--6merpair --addfragments' option, at this point." 1>>"$progressfile"
+ echo '' 1>>"$progressfile"
+ exit 1
+ fi
+ fi
+
if [ -z "$localparam" -a $fragment -eq 0 -a $distance != "parttree" ]; then
# echo "use disttbfast"
@@ -2143,6 +2201,8 @@ $ownlist"
strategy=$strategy"full"
elif [ $fragment -eq -2 ]; then
strategy=$strategy"long"
+ elif [ $fragment -eq -3 ]; then
+ strategy=$strategy"top"
else
strategy=$strategy$cycle
fi
@@ -2502,7 +2562,7 @@ $ownlist"
"$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
"$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
fi
@@ -2518,7 +2578,7 @@ $ownlist"
"$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
"$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
# addarg wo watasanai
@@ -2534,7 +2594,7 @@ $ownlist"
mv hat2 hat2n
mv hatx hat3
if [ $fragment -ne 0 ]; then
- "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
echo "Impossible" 1>&2
exit 1
@@ -2547,7 +2607,7 @@ $ownlist"
mv hat2 hat2n
mv hatx hat3
if [ $fragment -ne 0 ]; then
- "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
echo "Impossible" 1>&2
exit 1
@@ -2558,7 +2618,7 @@ $ownlist"
mv hatx hat3
"$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
if [ $fragment -ne 0 ]; then
- "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
"$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
fi
@@ -2574,15 +2634,15 @@ $ownlist"
# "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
# mv hat2 hat2n
if [ $fragment -ne 0 ]; then
- "$prefix/addsingle" -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
-# "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
echo "Impossible" 1>&2
exit 1
fi
else
if [ $fragment -ne 0 ]; then
- "$prefix/addsingle" -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/addsingle" $codonposopt $codonscoreopt -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg < infile > /dev/null 2>>"$progressfile" || exit 1
else
"$prefix/disttbfast" -q $npickup -E $cycledisttbfast -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreads-$numthreadstb $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -g $gexp -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg $anchoropt -x $maxanchorseparation $oneiterationopt < infile > pre 2>>"$progressfile" || exit 1
mv hat3.seed hat3
=====================================
core/makemergetable.rb
=====================================
@@ -1,21 +1,26 @@
#!/bin/env ruby
-require 'getopts'
seedoffset = 0
-if getopts( "s:" ) == nil || ARGV.length == 0 || $OPT_h then
- puts "Usage: #{$0} [-s number_of_seeds] input_files"
- exit
-end
-
-if $OPT_s
- seedoffset = $OPT_s.to_i
-end
+#require 'getopts'
+#if getopts( "s:" ) == nil || ARGV.length == 0 || $OPT_h then
+# puts "Usage: #{$0} [-s number_of_seeds] input_files"
+# exit
+#end
+#
+#if $OPT_s
+# seedoffset = $OPT_s.to_i
+#end
+require 'optparse'
+opt = OptionParser.new
+OPTS = {}
+opt.on('-s VAL') {|v| OPTS[:s] = v}
+opt.parse!(ARGV)
+seedoffset = OPTS[:s].to_i
files = ARGV
-
num = seedoffset + 1
for file in files
output = ""
=====================================
core/mltaln.h
=====================================
@@ -36,7 +36,7 @@
-#define VERSION "7.490"
+#define VERSION "7.505"
#define SHOWVERSION reporterr( "%s (%s) Version " VERSION "\nalg=%c, model=%s, amax=%3.1f\n%d thread(s)\n\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, specificityconsideration, nthread )
#define FFT_THRESHOLD 80
@@ -331,6 +331,13 @@ typedef struct _extanch
int score;
} ExtAnch;
+typedef struct _gappos
+{
+ int pos;
+ int len;
+} GapPos;
+
+
#include "fft.h"
#include "dp.h"
#include "functions.h"
@@ -376,6 +383,9 @@ extern double sueff_global;
extern double lenfaca, lenfacb, lenfacc, lenfacd;
extern int maxl, tsize;
+extern char codonpos;
+extern char codonscore;
+
/* for --large */
extern int compacttree;
=====================================
core/mltaln9.c
=====================================
@@ -13930,6 +13930,513 @@ double plainscore( int nseq, char **s )
return( v );
}
+int addonetip2top( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign )
+{
+ int i, j, mem0, mem1, posinnew, m;
+ int nstep;
+ int norg;
+ double minscore, minscoreo, eff0, eff1, addedlen, tmpmin;
+ int nearest, nearesto;
+ int repnorg;
+ int *leaf2node;
+ int *additionaltopol;
+// double (*clusterfuncpt[1])(double,double);
+ Bchain *ac, *acpt, *acori, *acnext, *acprev;
+ int neighbor;
+ char *neighborlist;
+ char *npt;
+ int reflen, nearestnode, nogaplentoadd;
+ int *topoldum0 = NULL;
+ int *topoldum1 = NULL;
+ int *topolo0;
+ int *topolo1;
+ int seqlengthcondition;
+ double sueff1_double_local = 1.0 - sueff_global;
+ double sueff05_double_local = sueff_global * 0.5;
+// char **tree; //static?
+// char *treetmp; //static?
+
+// for( i=0; i<njobc; i++ ) reporterr( "nogaplen of %d = %d\n", i+1, nogaplen[i] );
+//exit( 1 );
+
+
+// treetmp = AllocateCharVec( njob*150 );
+// tree = AllocateCharMtx( njob, njob*150 );
+
+// sueff1_double = 1.0 - sueff_global;
+// sueff05_double = sueff_global * 0.5;
+// if ( treemethod == 'X' )
+// clusterfuncpt[0] = cluster_mix_double;
+// else if ( treemethod == 'E' )
+// clusterfuncpt[0] = cluster_average_double;
+// else if ( treemethod == 'q' )
+// clusterfuncpt[0] = cluster_minimum_double;
+// else
+// {
+// reporterr( "Unknown treemethod, %c\n", treemethod );
+// exit( 1 );
+// }
+
+ norg = njobc-1;
+ nstep = njobc-2;
+
+ additionaltopol = (int *)calloc( 2, sizeof( int ) );
+ leaf2node= (int *)calloc( norg, sizeof( int ) );
+ if( treeout )
+ {
+ neighborlist = calloc( norg * 30, sizeof( char ) );
+ }
+// for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
+ if( !leaf2node )
+ {
+ reporterr( "Cannot allocate leaf2node.\n" );
+ exit( 1 );
+ }
+ additionaltopol[0] = norg;
+ additionaltopol[1] = -1;
+
+ ac = (Bchain *)malloc( norg * sizeof( Bchain ) );
+ for( i=0; i<norg; i++ )
+ {
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
+ }
+ ac[norg-1].next = NULL;
+
+
+ acori = (Bchain *)malloc( 1 * sizeof( Bchain ) );
+ acori->next = ac;
+ acori->pos = -1;
+ ac[0].prev = acori;
+
+
+// for( i=0; i<nstep; i++ )
+// {
+// reporterr( "distfromtip = %f\n", dep[i].distfromtip );
+// }
+//
+// for( i=0; i<norg; i++ )
+// {
+// reporterr( "disttofrag(%d,%d) = %f\n", i, njobc-1, iscorec[i][norg-i] );
+// }
+
+
+#if 0
+ minscore = 9999.9;
+ nearest = -1;
+ for( i=0; i<norg; i++ )
+ {
+ tmpmin = iscorec[i][norg-i];
+ if( minscore > tmpmin )
+ {
+ minscore = tmpmin;
+ nearest = i;
+ }
+ }
+#else
+ nearest = 0;
+ minscore = 0.0;
+#endif
+
+ nearesto = nearest;
+ minscoreo = minscore;
+
+
+
+// for( i=0; i<njobc-1; i++ ) for( j=i+1; j<njobc; j++ )
+// reporterr( "iscorec[%d][%d] = %f\n", i, j, iscorec[i][j-i] );
+// reporterr( "nearest = %d\n", nearest+1 );
+// reporterr( "nearesto = %d\n", nearesto+1 );
+
+ posinnew = 0;
+ repnorg = -1;
+ nogaplentoadd = nogaplen[norg];
+
+
+
+ for( i=0; i<norg; i++ ) leaf2node[i] = -1;
+ for( i=0; i<nstep; i++ )
+ {
+ mem0 = topol[i][0][0];
+ mem1 = topol[i][1][0];
+#if 0
+ reporterr( "\n\nstep %d (old) \n", i );
+
+ reporterr( "group0 = \n" );
+ for( j=0; topol[i][0][j]>-1; j++ )
+ {
+ reporterr( "%d ", topol[i][0][j]+1 );
+ }
+ reporterr( "\n" );
+ reporterr( "len=%f\n", len[i][0] );
+ reporterr( "group1 = \n" );
+ for( j=0; topol[i][1][j]>-1; j++ )
+ {
+ reporterr( "%d ", topol[i][1][j]+1 );
+ }
+ reporterr( "\n" );
+ reporterr( "len=%f\n", len[i][1] );
+
+ reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 );
+ reporterr( "i = %d\n", i );
+ if( leaf2node[nearest] == -1 )
+ {
+ reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] );
+ }
+ else
+ {
+ reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
+ reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] );
+ }
+#endif
+ nearestnode = leaf2node[nearest];
+ if( nearestnode == -1 )
+ reflen = nogaplen[nearest];
+ else
+ reflen = alnleninnode[nearestnode];
+// reflen = alnleninnode[i]; // BUG!!
+
+ if( noalign ) seqlengthcondition = 1;
+ else seqlengthcondition = ( nogaplentoadd <= reflen );
+
+//seqlengthcondition = 1; // CHUUI
+//seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI
+
+// if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku.
+ if( repnorg == -1 && dep[i].distfromtip * 2 >= minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau.
+ {
+// reporterr( "INSERT HERE, %d-%d\n", nearest, norg );
+// reporterr( "nearest = %d\n", nearest );
+// reporterr( "\n\n\nminscore = %f\n", minscore );
+// reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 );
+// reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] );
+
+ if( nearestnode == -1 )
+ {
+// reporterr( "INSERTING to 0!!!\n" );
+// reporterr( "lastlength = %d\n", nogaplen[norg] );
+// reporterr( "reflength = %d\n", nogaplen[nearest] );
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) );
+ topolc[posinnew][0][0] = nearest;
+ topolc[posinnew][0][1] = -1;
+
+ addedlen = lenc[posinnew][0] = minscore / 2;
+
+ }
+ else
+ {
+// reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) );
+// reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] );
+// reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
+
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) );
+// reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] );
+ intcpy( topolc[posinnew][0], topol[nearestnode][0] );
+ intcat( topolc[posinnew][0], topol[nearestnode][1] );
+// addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!!
+ addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10
+// fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] );
+
+ }
+ neighbor = lastmem( topolc[posinnew][0] );
+
+ if( treeout )
+ {
+#if 0
+ fp = fopen( "infile.tree", "a" ); // kyougou!!
+ if( fp == 0 )
+ {
+ reporterr( "File error!\n" );
+ exit( 1 );
+ }
+ fprintf( fp, "\n" );
+ fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
+ fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
+ fprintf( fp, " distance: %f\n", minscore );
+ fprintf( fp, " cousin: " );
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
+ fprintf( fp, "\n" );
+ fclose( fp );
+#else
+ addtree[iadd].nearest = nearesto;
+ addtree[iadd].dist1 = minscoreo;
+ addtree[iadd].dist2 = minscore;
+ neighborlist[0] = 0;
+ npt = neighborlist;
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ {
+ sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
+ npt += strlen( npt );
+ }
+ addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
+ strcpy( addtree[iadd].neighbors, neighborlist );
+#endif
+ }
+
+// reporterr( "INSERTING to 1!!!\n" );
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) );
+ topolc[posinnew][1][0] = norg;
+ topolc[posinnew][1][1] = -1;
+ lenc[posinnew][1] = minscore / 2;
+
+// reporterr( "STEP %d (newnew)\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][1] );
+
+ repnorg = nearest;
+
+// reporterr( "STEP %d\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
+// reporterr( "\n len=%f\n", lenc[i][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
+// reporterr( "\n len=%f\n", lenc[i][1] );
+
+// im = topolc[posinnew][0][0];
+// jm = topolc[posinnew][1][0];
+// sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
+// strcpy( tree[im], treetmp );
+
+ posinnew++;
+ }
+
+// reporterr( "minscore = %f\n", minscore );
+// reporterr( "distfromtip = %f\n", dep[i].distfromtip );
+// reporterr( "Modify matrix, %d-%d\n", nearest, norg );
+ eff0 = iscorec[mem0][norg-mem0];
+ eff1 = iscorec[mem1][norg-mem1];
+
+// iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 );
+ iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local;
+ iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda
+
+ acprev = ac[mem1].prev;
+ acnext = ac[mem1].next;
+ acprev->next = acnext;
+ if( acnext != NULL ) acnext->prev = acprev;
+
+ if( ( nearest == mem1 || nearest == mem0 ) )
+ {
+ minscore = 9999.9;
+// for( j=0; j<norg; j++ ) // sukoshi muda
+// {
+// if( minscore > iscorec[j][norg-j] )
+// {
+// minscore = iscorec[j][norg-j];
+// nearest = j;
+// }
+// }
+// reporterr( "searching on modified ac " );
+ for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda
+ {
+// reporterr( "." );
+ j = acpt->pos;
+ tmpmin = iscorec[j][norg-j];
+ if( minscore > tmpmin )
+ {
+ minscore = tmpmin;
+ nearest = j;
+ }
+ }
+// reporterr( "done\n" );
+ }
+
+// reporterr( "posinnew = %d\n", posinnew );
+
+
+ if( topol[i][0][0] == repnorg )
+ {
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][0], topol[i][0] );
+ intcat( topolc[posinnew][0], additionaltopol );
+ lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10
+// fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
+// fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] );
+// fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] );
+ addedlen = 0.0;
+ }
+ else
+ {
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][0], topol[i][0] );
+ lenc[posinnew][0] = len[i][0];
+ }
+
+ if( topol[i][1][0] == repnorg )
+ {
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][1], topol[i][1] );
+ intcat( topolc[posinnew][1], additionaltopol );
+ lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10
+// fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
+// fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] );
+// fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] );
+ addedlen = 0.0;
+
+ repnorg = topolc[posinnew][0][0]; // juuyou
+ }
+ else
+ {
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][1], topol[i][1] );
+ lenc[posinnew][1] = len[i][1];
+ }
+
+// reporterr( "\nSTEP %d (new)\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][1] );
+
+// reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] );
+
+ for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i;
+ for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i;
+
+// reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] );
+
+// im = topolc[posinnew][0][0];
+// jm = topolc[posinnew][1][0];
+// sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
+// strcpy( tree[im], treetmp );
+//
+// reporterr( "%s\n", treetmp );
+
+ posinnew++;
+ }
+
+ if( nstep )
+ {
+ i--;
+ topolo0 = topol[i][0];
+ topolo1 = topol[i][1];
+ }
+ else
+ {
+// i = 0;
+// free( topol[i][0] );//?
+// free( topol[i][1] );//?
+// topol[i][0] = calloc( 2, sizeof( int ) );
+// topol[i][1] = calloc( 1, sizeof( int ) );
+// topol[i][0][0] = 0;
+// topol[i][0][1] = -1;
+// topol[i][1][0] = -1;
+
+ topoldum0 = calloc( 2, sizeof( int ) );
+ topoldum1 = calloc( 1, sizeof( int ) );
+ topoldum0[0] = 0;
+ topoldum0[1] = -1;
+ topoldum1[0] = -1;
+
+ topolo0 = topoldum0;
+ topolo1 = topoldum1;
+ }
+ if( repnorg == -1 )
+ {
+// topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
+// intcpy( topolc[posinnew][0], topol[i][0] );
+// intcat( topolc[posinnew][0], topol[i][1] );
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][0], topolo0 );
+ intcat( topolc[posinnew][0], topolo1 );
+// lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken
+ if( nstep )
+ lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21
+ else
+ lenc[posinnew][0] = minscore / 2;
+
+// reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip );
+// reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] );
+
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) );
+ intcpy( topolc[posinnew][1], additionaltopol );
+ lenc[posinnew][1] = minscore / 2;
+
+// neighbor = lastmem( topolc[posinnew][0] );
+ neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji
+
+ if( treeout )
+ {
+#if 0
+ fp = fopen( "infile.tree", "a" ); // kyougou!!
+ if( fp == 0 )
+ {
+ reporterr( "File error!\n" );
+ exit( 1 );
+ }
+ fprintf( fp, "\n" );
+ fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
+ fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
+ fprintf( fp, " cousin: " );
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
+ fprintf( fp, "\n" );
+ fclose( fp );
+#else
+ addtree[iadd].nearest = nearesto;
+ addtree[iadd].dist1 = minscoreo;
+ addtree[iadd].dist2 = minscore;
+ neighborlist[0] = 0;
+ npt = neighborlist;
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ {
+ sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
+ npt += strlen( npt );
+ }
+ addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
+ strcpy( addtree[iadd].neighbors, neighborlist );
+#endif
+ }
+
+// reporterr( "STEP %d\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
+// reporterr( "\n len=%f", lenc[posinnew][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
+// reporterr( "\n len=%f\n", lenc[posinnew][1] );
+ }
+
+ if( topoldum0 ) free( topoldum0 );
+ if( topoldum1 ) free( topoldum1 );
+ free( leaf2node );
+ free( additionaltopol );
+ free( ac );
+ free( acori );
+ if( treeout ) free( neighborlist );
+
+#if 0 // create a newick tree for CHECK
+ char **tree;
+ char *treetmp;
+ int im, jm;
+
+ treetmp = AllocateCharVec( njob*150 );
+ tree = AllocateCharMtx( njob, njob*150 );
+ for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
+
+ for( i=0; i<njobc-1; i++ )
+ {
+ reporterr( "\nSTEP %d\n", i );
+ for( j=0; topolc[i][0][j]!=-1; j++ ) reporterr( " %d", topolc[i][0][j]+1 );
+ reporterr( "\n len=%f\n", lenc[i][0] );
+ for( j=0; topolc[i][1][j]!=-1; j++ ) reporterr( " %d", topolc[i][1][j]+1 );
+ reporterr( "\n len=%f\n", lenc[i][1] );
+
+ im = topolc[i][0][0];
+ jm = topolc[i][1][0];
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[i][0], tree[jm], lenc[i][1] );
+ strcpy( tree[im], treetmp );
+
+ }
+
+ reporterr( "%s\n", treetmp );
+ FreeCharMtx( tree );
+ free( treetmp );
+#endif
+
+ return( neighbor );
+}
int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign )
{
@@ -14098,7 +14605,7 @@ int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int **
//seqlengthcondition = 1; // CHUUI
//seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI
- if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku.
+ if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku.
// if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau.
{
// reporterr( "INSERT HERE, %d-%d\n", nearest, norg );
=====================================
core/pairlocalalign.c
=====================================
@@ -3133,7 +3133,7 @@ int pairlocalalign( int ngui, int lgui, char **namegui, char **seqgui, double **
if( njob > M )
{
fprintf( stderr, "The number of sequences must be < %d\n", M );
- fprintf( stderr, "Please try the splittbfast program for such large data.\n" );
+ fprintf( stderr, "Please try --6merpair --addfragments for such large data.\n" );
exit( 1 );
}
}
=====================================
core/restoreu.c
=====================================
@@ -4,6 +4,7 @@
char *alignmentfile;
int keeplength;
+int mapout;
static void fillorichar( int nseq, int *oripos, char **a, char **s )
{
@@ -38,6 +39,7 @@ void arguments( int argc, char *argv[] )
int c;
keeplength = 0;
+ mapout = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
@@ -53,7 +55,11 @@ void arguments( int argc, char *argv[] )
alignmentfile = *++argv;
--argc;
goto nextoption;
+ case 'z': // add2ndharfarg wo tsukau tame.
+ mapout = 2;
+ break;
case 'Z': // add2ndharfarg wo tsukau tame.
+ mapout = 1;
break;
case 'p': // add2ndharfarg wo tsukau tame.
break;
@@ -91,12 +97,13 @@ int main( int argc, char *argv[] )
int *nlen;
int *oripos;
char *npt, *npt0, *npt2, *pt, *pt2;
- int i, o, prelen;
+ int i, o, l, prelen;
int nlenmin;
int njobs, njoba;
// int **dlist;
// int *ndel;
char *gett;
+ char *insname;
arguments( argc, argv );
@@ -156,6 +163,7 @@ int main( int argc, char *argv[] )
aseq = AllocateCharMtx( njob, nlenmax+1 );
aname = AllocateCharMtx( njob, B+1 );
oname = AllocateCharMtx( njob, B+1 );
+ insname = calloc( njob, sizeof( char ) );
readData_pointer( alfp, aname, nlen, aseq );
fclose( alfp );
@@ -176,16 +184,20 @@ int main( int argc, char *argv[] )
{
fgets( gett, 999, dlfp );
if( feof( dlfp ) ) break;
- sscanf( gett, "%d %d", &i, &o );
+// sscanf( gett, "%d %d", &i, &o );
+ sscanf( gett, "%d %d %d", &i, &o, &l );
// reporterr( "%d, %d\n", i, o );
// dlist[i] = realloc( dlist[i], sizeof( int ) * (ndel[i]+2) );
// dlist[i][ndel[i]] = o;
// ndel[i]++;
- seq[i][o] = '-';
+ while( l-- ) seq[i][o++] = '-';
+// seq[i][o] = '-';
+ insname[i] = 1;
}
fclose( dlfp );
+ free( gett );
}
for( i=0; i<njob; i++ )
@@ -245,6 +257,7 @@ int main( int argc, char *argv[] )
exit( 1 );
}
npt += 4;
+
pt2 = npt;
pt = npt2 - 4;
@@ -254,6 +267,11 @@ int main( int argc, char *argv[] )
strncpy( oname[i], aname[i], prelen ); oname[i][prelen] = 0;
strcat( oname[i], npt0 );
+ if( mapout == 2 && insname[i] )
+ {
+ npt0 = strstr( npt0, "|" );
+ npt0 += 1;
+ }
#if 0
pt = strstr( aname[i], "_numo_e" );
if( pt ) pt += 8;
@@ -269,11 +287,11 @@ int main( int argc, char *argv[] )
// reporterr( "oname[i] = %s\n", oname[i] );
// reporterr( "aname[i] = %s\n", aname[i] );
// reporterr( " name[i] = %s\n", name[i] );
-
-// fprintf( stderr, "aname[i] = :%s:\n", aname[i] );
-// fprintf( stderr, "pt = :%s:\n", pt );
-// fprintf( stderr, "oname[i] = :%s:\n", oname[i] );
-// fprintf( stderr, "name[o] = :%s:\n", name[o] );
+//
+// fprintf( stderr, "aname[%d] = :%s:\n", i, aname[i] );
+// fprintf( stderr, "npt0 = :%s:\n", npt0 );
+// fprintf( stderr, "oname[%d] = :%s:\n", i, oname[i] );
+// fprintf( stderr, "name[%d] = :%s:\n", o, name[o] );
if( strncmp( npt0, name[o]+1, 10 ) )
{
@@ -314,6 +332,7 @@ int main( int argc, char *argv[] )
FreeCharMtx( oname );
free( nlen );
free( oripos );
+ free( insname );
return( 0 );
}
=====================================
core/tbfast.c
=====================================
@@ -475,10 +475,12 @@ static void arguments( int argc, char *argv[], int *pac, char **pav, int *tac, c
tbutree = 0;
break;
/* modification end. */
+#if 0
case 'z':
fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
+#endif
case 'w':
fftWinSize = myatoi( *++argv );
--argc;
@@ -496,6 +498,9 @@ static void arguments( int argc, char *argv[], int *pac, char **pav, int *tac, c
case 'Y':
keeplength = 1;
break;
+ case 'z':
+ mapout = 2;
+ break;
case 'Z':
mapout = 1;
break;
@@ -1905,7 +1910,7 @@ int main( int argc, char *argv[] )
char ***subalnpt;
char *originalgaps = NULL;
char **addbk = NULL;
- int **deletelist = NULL;
+ GapPos **deletelist = NULL;
FILE *dlf = NULL;
int **localmem = NULL;
int posinmem;
@@ -3434,11 +3439,12 @@ int main( int argc, char *argv[] )
{
dlf = fopen( "_deletelist", "w" );
- deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+ deletelist = (GapPos **)calloc( nadd+1, sizeof( GapPos * ) );
for( i=0; i<nadd; i++ )
{
- deletelist[i] = calloc( 1, sizeof( int ) );
- deletelist[i][0] = -1;
+ deletelist[i] = calloc( 1, sizeof( GapPos ) );
+ deletelist[i][0].pos = -1;
+ deletelist[i][0].len = 0;
}
deletelist[nadd] = NULL;
ndeleted = deletenewinsertions_whole( njob-nadd, nadd, bseq, bseq+njob-nadd, deletelist );
@@ -3446,8 +3452,8 @@ int main( int argc, char *argv[] )
for( i=0; i<nadd; i++ )
{
if( deletelist[i] )
- for( j=0; deletelist[i][j]!=-1; j++ )
- fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ for( j=0; deletelist[i][j].pos!=-1; j++ )
+ fprintf( dlf, "%d %d %d\n", njob-nadd+i, deletelist[i][j].pos, deletelist[i][j].len ); // 0origin
}
fclose( dlf );
@@ -3457,13 +3463,19 @@ int main( int argc, char *argv[] )
if( mapout )
{
dlf = fopen( "_deletemap", "w" );
- reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+ if( mapout == 1 )
+ reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+ else
+ reconstructdeletemap_compact( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
FreeCharMtx( addbk );
addbk = NULL;
fclose( dlf );
}
- FreeIntMtx( deletelist );
+// FreeIntMtx( deletelist );
+// deletelist = NULL;
+ for( i=0; deletelist[i] != NULL; i++ ) free( deletelist[i] );
+ free( deletelist );
deletelist = NULL;
}
=====================================
readme
=====================================
@@ -1,6 +1,6 @@
-----------------------------------------------------------------------
MAFFT: a multiple sequence alignment program
- version 7.490, 2021/Oct/30
+ version 7.505, 2022/Apr/10
http://mafft.cbrc.jp/alignment/software/
katoh at ifrec.osaka-u.ac.jp
View it on GitLab: https://salsa.debian.org/med-team/mafft/-/commit/244b1a0e8a1d40cbe5e06fa92b1fba8014a07574
--
View it on GitLab: https://salsa.debian.org/med-team/mafft/-/commit/244b1a0e8a1d40cbe5e06fa92b1fba8014a07574
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220625/19040fa6/attachment-0001.htm>
More information about the debian-med-commit
mailing list