[med-svn] [Git][med-team/mafft][upstream] New upstream version 7.475
Steffen Möller
gitlab at salsa.debian.org
Mon Dec 7 17:24:07 GMT 2020
Steffen Möller pushed to branch upstream at Debian Med / mafft
Commits:
e8272592 by Steffen Moeller at 2020-12-07T18:22:12+01:00
New upstream version 7.475
- - - - -
8 changed files:
- core/Makefile
- + core/filter.c
- core/io.c
- core/mafft.tmpl
- core/mltaln.h
- core/pairlocalalign.c
- core/replaceu.c
- readme
Changes:
=====================================
core/Makefile
=====================================
@@ -57,7 +57,7 @@ STRIP = strip
PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \
multi2hat3s pairash addsingle maffttext2hex hex2maffttext \
splittbfast disttbfast tbfast nodepair mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \
- seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version \
+ seq2regtable regtable2seq score getlag dndpre setcore filter replaceu restoreu setdirection makedirectionlist version \
$(DASH_CLIENT)
SOS = libdisttbfast.so
DLLS = libdisttbfast.dll
@@ -66,6 +66,7 @@ DYLIBS = libdisttbfast.dylib
PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl
SCRIPTS = mafft mafft-homologs.rb mafft-sparsecore.rb
OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o
+OBJFILTER = mtxutl.o io.o filter.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o
OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o
OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o
OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o Galign11.o Lalign11.o genalign11.o
@@ -161,8 +162,9 @@ dylibs : $(DYLIBS)
dlls : $(DLLS)
$(DASH_CLIENT): dash_client.go
- go build dash_client.go
-# go build --ldflags '-extldflags "-static"' dash_client.go
+# go build dash_client.go
+ env CGO_ENABLED=0 go build --ldflags '-extldflags "-static"' dash_client.go # for conda
+
univscript: univscript.tmpl Makefile
sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript
@@ -241,6 +243,9 @@ setdirection : $(OBJSETDIRECTION)
replaceu : $(OBJREPLACEU)
$(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+filter : $(OBJFILTER)
+ $(CC) -o $@ $(OBJFILTER) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
restoreu : $(OBJRESTOREU)
$(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
=====================================
core/filter.c
=====================================
@@ -0,0 +1,164 @@
+#include "mltaln.h"
+
+#define DEBUG 0
+
+double maxunusual;
+
+static double count_unusual( char *seq, char *usual )
+{
+ int i;
+ char *pt;
+ int count, len;
+ count = 0;
+ pt = seq;
+ while( *pt )
+ {
+ if( !strchr( usual, *pt ) )
+ count++;
+ pt++;
+ }
+// reporterr( "%d/%d=%f\n", count, pt-seq, ((double)count/(pt-seq)) );
+ return( (double)count / (pt-seq) );
+}
+
+
+void arguments( int argc, char *argv[] )
+{
+ int c;
+
+ maxunusual = 0.05;
+ inputfile = NULL;
+ dorp = NOTSPECIFIED;
+
+ while( --argc > 0 && (*++argv)[0] == '-' )
+ {
+ while ( (c = *++argv[0]) )
+ {
+ switch( c )
+ {
+ case 'm':
+ maxunusual = myatof( *++argv );
+ fprintf( stderr, "maxunusual = %f\n", maxunusual );
+ --argc;
+ goto nextoption;
+ case 'i':
+ inputfile = *++argv;
+// fprintf( stderr, "inputfile = %s\n", inputfile );
+ --argc;
+ goto nextoption;
+ case 'D':
+ dorp = 'd';
+ break;
+ case 'P':
+ dorp = 'p';
+ break;
+ default:
+ fprintf( stderr, "illegal option %c\n", c );
+ argc = 0;
+ break;
+ }
+ }
+ nextoption:
+ ;
+ }
+ if( argc != 0 )
+ {
+ fprintf( stderr, "options: Check source file !\n" );
+ exit( 1 );
+ }
+}
+
+
+
+int main( int argc, char *argv[] )
+{
+ FILE *infp;
+ int nlenmin;
+ char **name;
+ char **seq;
+ int *nlen;
+ int i;
+ char *usual;
+ int nout;
+ char *tmpseq;
+
+ arguments( argc, argv );
+
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ fprintf( stderr, "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+
+// dorp = NOTSPECIFIED;
+ getnumlen_casepreserve( infp, &nlenmin );
+
+// fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp );
+
+ seq = AllocateCharMtx( njob, nlenmax+1 );
+ name = AllocateCharMtx( njob, B+1 );
+ nlen = AllocateIntVec( njob );
+ tmpseq = AllocateCharVec( nlenmax+1 );
+
+// readData_pointer( infp, name, nlen, seq );
+ readData_pointer_casepreserve( infp, name, nlen, seq );
+ fclose( infp );
+
+// for( i=0; i<njob; i++ ) gappick_samestring( seq[i] );
+
+#if 0
+ FILE *origfp;
+ origfp = fopen( "_original", "w" );
+ if( !origfp )
+ {
+ fprintf( stderr, "Cannot open _original\n" );
+ exit( 1 );
+ }
+ for( i=0; i<njob; i++ )
+ {
+ nlen[i] = strlen( seq[i] );
+ fprintf( origfp, ">%s\n", name[i]+1 );
+ if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0;
+ fprintf( origfp, "%s\n", seq[i] );
+ }
+ fclose( origfp );
+#endif
+
+ if( dorp == 'p' )
+ usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-";
+ else
+ usual = "ATGCUatgcu-";
+ nout = 0;
+ for( i=0; i<njob; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ if( count_unusual( tmpseq, usual ) <= maxunusual )
+ {
+ fprintf( stdout, ">%s\n", name[i]+1 );
+ fprintf( stdout, "%s\n", seq[i] );
+ nout++;
+ }
+ }
+
+ if( nout < njob )
+ {
+ if( dorp == 'p' )
+ fprintf( stderr, "\n\nRemoved %d sequence(s) where the frequency of ambiguous amino acids > %5.3f\n\n\n", njob-nout, maxunusual );
+ else
+ fprintf( stderr, "\n\nRemoved %d sequence(s) where the frequency of ambiguous bases > %5.3f\n\n\n", njob-nout, maxunusual );
+ }
+
+ free( nlen );
+ free( tmpseq );
+ FreeCharMtx( seq );
+ FreeCharMtx( name );
+
+ return( 0 );
+}
=====================================
core/io.c
=====================================
@@ -2664,7 +2664,9 @@ void readhat2_doublehalf_part_pointer( FILE *fp, int nseq, int nadd, char **name
char b[B];
fgets( b, B, fp );
- fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 )
+// fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 )
+ fgets( b, B, fp ); nseq0 = atoi( b ); // 2020/Oct/23
+ if( nseq != nseq0 )
{
fprintf( stderr, "%d != %d\n", nseq, nseq0 );
ErrorExit( "hat2 is wrong." );
=====================================
core/mafft.tmpl
=====================================
@@ -1,7 +1,7 @@
#! /bin/bash
er=0;
myself=`dirname "$0"`/`basename "$0"`; export myself
-version="v7.471 (2020/Jul/3)"; export version
+version="v7.475 (2020/Nov/23)"; export version
LANG=C; export LANG
os=`uname`
progname=`basename "$0"`
@@ -257,6 +257,7 @@ laraparams="/dev/null"
foldalignopt=" "
treealg=" -X 0.1 "
sueff="1.0"
+maxambiguous="1.0"
scoreoutarg=" "
numthreads=0
numthreadsit=-1
@@ -405,6 +406,9 @@ if [ $# -gt 0 ]; then
shift
sueff="$1"
treealg=" -X $1"
+ elif [ "$1" = "--maxambiguous" ]; then
+ shift
+ maxambiguous="$1"
elif [ "$1" = "--noscore" ]; then
scorecalcopt=" -Z "
elif [ "$1" = "--6mermultipair" ]; then
@@ -1046,7 +1050,7 @@ function removetmpfile() { # for MPI
if [ $debug -eq 1 ]; then
# trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys
# trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0 15
- trap "tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15
+ trap "popd > /dev/null 2>&1; tar cfv - \"$TMPFILE\" | gzip -c > debuginfo.tgz; removetmpfile" 0 15
else
# trap "rm -rf $TMPFILE" 0 15
trap "removetmpfile" 0 15
@@ -1065,8 +1069,16 @@ function removetmpfile() { # for MPI
cat "$1" | tr "\r" "\n" > "$TMPFILE/infile"
echo "" >> "$TMPFILE/infile"
- cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> "$TMPFILE/infile"
cat "$addfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_addfile"
+
+ if [ $maxambiguous != "1.0" ]; then
+ mv "$TMPFILE/infile" "$TMPFILE/_tofilter"
+ "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/infile" 2>>"$progressfile" || exit 1
+ mv "$TMPFILE/_addfile" "$TMPFILE/_tofilter"
+ "$prefix/filter" -m $maxambiguous $seqtype -i "$TMPFILE/_tofilter" > "$TMPFILE/_addfile" 2>>"$progressfile" || exit 1
+ fi
+ cat "$TMPFILE/_addfile" >> "$TMPFILE/infile"
+
cat "$scorematrix" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_aamtx"
cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_subalignmentstable"
cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > "$TMPFILE/_guidetree"
@@ -1190,7 +1202,9 @@ $ownlist"
exit 1;
fi
+
if [ "$addarg0" != " " ]; then
+
# iterate=0 # 2013/03/23 -> commented out, 2017/12
"$prefix/countlen" < "$TMPFILE/_addfile" > "$TMPFILE/addsize" 2>>"$progressfile"
nadd=`awk '{print $1}' "$TMPFILE/addsize"`
@@ -1305,6 +1319,13 @@ $ownlist"
echo "npair = " $npair 1>>"$progressfile"
echo "nseq = " $nseq 1>>"$progressfile"
echo "nlen = " $nlen 1>>"$progressfile"
+
+ if [ $norg -eq 0 ]; then
+ echo "" 1>>"$progressfile"
+ echo "The reference sequence was removed because of ambiguous letters?" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ exit 1;
+ fi
# nagasa check!
#
if [ $npair -gt 10000000 -o $nlen -gt 5000 ]; then # 2017/Oct
@@ -1327,6 +1348,11 @@ $ownlist"
exit 1;
fi
+ if [ `awk "BEGIN {print( 0.0+\"$maxambiguous\" < 0.0 || 0.0+\"$maxambiguous\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The argument of --maxambiguous must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
if [ $allowshift -eq 1 ]; then
if [ $unalignspecified -ne 1 ]; then
unalignlevel="0.8"
@@ -1945,6 +1971,7 @@ $ownlist"
fi
+
if [ $nadd -gt "0" ]; then
if [ $fragment -eq "1" ]; then
addarg="$addarg0 $nadd -g -0.01"
@@ -1972,12 +1999,13 @@ $ownlist"
bunkatsuopt=" -B " # fftnsi demo bunktasu shinai
if [ "$add2ndhalfarg" != " " ]; then
if [ $auto -eq 1 -o $iterate -gt 0 ]; then
- echo '' 1>>"$progressfile"
- echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile"
- echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile"
- echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile"
- echo '' 1>>"$progressfile"
- exit 1
+# echo '' 1>>"$progressfile"
+# echo 'The --keeplength and --mapout options are not supported' 1>>"$progressfile"
+# echo 'with the --auto or --maxiterate >0 options.' 1>>"$progressfile"
+# echo 'Use the --maxiterate 0 option (= progressive method).' 1>>"$progressfile"
+# echo '' 1>>"$progressfile"
+# exit 1
+ iterate=0
fi
fi
fi
@@ -2179,6 +2207,7 @@ $ownlist"
# echo "iterate = " $iterate 1>>"$progressfile"
# echo "cycle = " $cycle 1>>"$progressfile"
+
if [ $anysymbol -eq 1 ]; then
mv infile orig
"$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1
=====================================
core/mltaln.h
=====================================
@@ -36,7 +36,7 @@
-#define VERSION "7.471"
+#define VERSION "7.475"
#define SHOWVERSION reporterr( "%s (%s) Version " VERSION "\nalg=%c, model=%s, amax=%3.1f\n%d thread(s)\n\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, specificityconsideration, nthread )
#define FFT_THRESHOLD 80
=====================================
core/pairlocalalign.c
=====================================
@@ -2475,7 +2475,8 @@ static void pairalign( char **name, int *nlen, char **seq, char **aseq, char **d
if( ngui == 0 )
{
if( alg == 'Y' || alg == 'r' )
- distancemtx = AllocateDoubleMtx( njob, nadd );
+// distancemtx = AllocateDoubleMtx( njob, nadd );
+ distancemtx = AllocateDoubleMtx( njob-nadd, nadd ); // 2020/Oct/23
else
distancemtx = AllocateDoubleHalfMtx( njob );
// distancemtx = AllocateDoubleMtx( njob, njob );
@@ -3051,7 +3052,13 @@ static void pairalign( char **name, int *nlen, char **seq, char **aseq, char **d
}
free( distseq1 );
free( distseq2 );
- if( store_dist && ngui == 0 ) FreeDoubleHalfMtx( distancemtx, njob );
+ if( store_dist && ngui == 0 )
+ {
+ if( alg == 'Y' || alg == 'r' )
+ FreeDoubleMtx( distancemtx ); // 2020/Oct/23
+ else
+ FreeDoubleHalfMtx( distancemtx, njob );
+ }
free( targetmap );
free( targetmapr );
=====================================
core/replaceu.c
=====================================
@@ -104,6 +104,7 @@ int main( int argc, char *argv[] )
nlen = AllocateIntVec( njob );
readData_pointer_casepreserve( infp, name, nlen, seq );
+ fclose( infp );
// for( i=0; i<njob; i++ ) gappick_samestring( seq[i] );
=====================================
readme
=====================================
@@ -1,6 +1,6 @@
-----------------------------------------------------------------------
MAFFT: a multiple sequence alignment program
- version 7.471, 2020/Jul/3
+ version 7.475, 2020/Nov/23
http://mafft.cbrc.jp/alignment/software/
katoh at ifrec.osaka-u.ac.jp
View it on GitLab: https://salsa.debian.org/med-team/mafft/-/commit/e8272592c5f6005900af6cac62dd099793d19be2
--
View it on GitLab: https://salsa.debian.org/med-team/mafft/-/commit/e8272592c5f6005900af6cac62dd099793d19be2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201207/a8d8d82e/attachment-0001.html>
More information about the debian-med-commit
mailing list