[med-svn] [Git][med-team/fasttree][master] 7 commits: New upstream version 2.1.11
Andreas Tille
gitlab at salsa.debian.org
Wed Jul 31 14:46:28 BST 2019
Andreas Tille pushed to branch master at Debian Med / fasttree
Commits:
adfb2ab5 by Andreas Tille at 2019-07-31T13:42:49Z
New upstream version 2.1.11
- - - - -
e33b16cb by Andreas Tille at 2019-07-31T13:42:49Z
Update upstream source from tag 'upstream/2.1.11'
Update to upstream version '2.1.11'
with Debian dir 492c70d439d176259be713f2c788f05d18d3523f
- - - - -
01cbc787 by Andreas Tille at 2019-07-31T13:43:15Z
New upstream version
- - - - -
daaa20a3 by Andreas Tille at 2019-07-31T13:43:32Z
debhelper-compat 12
- - - - -
140fffa0 by Andreas Tille at 2019-07-31T13:43:36Z
Standards-Version: 4.4.0
- - - - -
08bdb6ac by Andreas Tille at 2019-07-31T13:43:36Z
Remove trailing whitespace in debian/rules
- - - - -
77bb79e6 by Andreas Tille at 2019-07-31T13:45:07Z
Upload to unstable
- - - - -
6 changed files:
- changelog
- debian/changelog
- − debian/compat
- debian/control
- debian/rules
- fasttree.c
Changes:
=====================================
changelog
=====================================
@@ -1,3 +1,9 @@
+Version 2.1.11: May 17, 2019
+
+ Added the -trans option to read a transition matrix (and
+ stationary distribution) from a file. This is for amino acid
+ alignments only.
+
Version 2.1.10: April 11, 2017
Fix a bug when using GTR models with huge alignments with over 2
@@ -6,7 +12,7 @@ Version 2.1.10: April 11, 2017
negative frequencies and eventually to a crash with
"FastTree.c:9769: tqli: Assertion `iter < 30' failed.". SetMLGtr()
now uses 64-bit counters. Also, more information about the
- optimization of the GTR model is savd in the log file (if using
+ optimization of the GTR model is saved in the log file (if using
the -log option). To support this, gtr_opt_t now includes fpLog.
Version 2.1.9: March 29, 2016
=====================================
debian/changelog
=====================================
@@ -1,8 +1,15 @@
-fasttree (2.1.10-3) UNRELEASED; urgency=medium
+fasttree (2.1.11-1) unstable; urgency=medium
+ [ Jelmer Vernooij ]
* Trim trailing whitespace.
- -- Jelmer Vernooij <jelmer at debian.org> Sat, 20 Oct 2018 20:48:57 +0000
+ [ Andreas Tille ]
+ * New upstream version
+ * debhelper-compat 12
+ * Standards-Version: 4.4.0
+ * Remove trailing whitespace in debian/rules
+
+ -- Andreas Tille <tille at debian.org> Wed, 31 Jul 2019 15:43:36 +0200
fasttree (2.1.10-2) unstable; urgency=medium
=====================================
debian/compat deleted
=====================================
@@ -1 +0,0 @@
-11
=====================================
debian/control
=====================================
@@ -6,8 +6,8 @@ Uploaders: Steffen Moeller <moeller at debian.org>,
Roland Fehrenbacher <rf at q-leap.de>
Section: science
Priority: optional
-Build-Depends: debhelper (>= 11~)
-Standards-Version: 4.2.1
+Build-Depends: debhelper-compat (= 12)
+Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/fasttree
Vcs-Git: https://salsa.debian.org/med-team/fasttree.git
Homepage: http://www.microbesonline.org/fasttree/
=====================================
debian/rules
=====================================
@@ -5,7 +5,7 @@
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
%:
- dh $@
+ dh $@
override_dh_installchangelogs:
dh_installchangelogs changelog
=====================================
fasttree.c
=====================================
@@ -343,7 +343,7 @@ typedef float numeric_t;
#endif /* USE_SSE3 */
-#define FT_VERSION "2.1.10"
+#define FT_VERSION "2.1.11"
char *usage =
" FastTree protein_alignment > tree\n"
@@ -395,8 +395,9 @@ char *expertUsage =
" [-mlacc 2] [-cat 20 | -nocat] [-gamma]\n"
" [-slow | -fastest] [-2nd | -no2nd] [-slownni] [-seed 1253] \n"
" [-top | -notop] [-topm 1.0 [-close 0.75] [-refresh 0.8]]\n"
+ " [-gtr] [-gtrrates ac ag at cg ct gt] [-gtrfreq A C G T]\n"
+ " [ -lg | -wag | -trans transitionmatrixfile ]\n"
" [-matrix Matrix | -nomatrix] [-nj | -bionj]\n"
- " [-lg] [-wag] [-nt] [-gtr] [-gtrrates ac ag at cg ct gt] [-gtrfreq A C G T]\n"
" [ -constraints constraintAlignment [ -constraintWeight 100.0 ] ]\n"
" [-log logfile]\n"
" [ alignment_file ]\n"
@@ -436,6 +437,8 @@ char *expertUsage =
" To specify a different matrix, use -matrix FilePrefix or -nomatrix\n"
" Use -rawdist to turn the log-correction off\n"
" or to use %different instead of Jukes-Cantor\n"
+ " (These options affect minimum-evolution computations only;\n"
+ " use -trans to affect maximum-likelihoood computations)\n"
"\n"
" -pseudo [weight] -- Use pseudocounts to estimate distances between\n"
" sequences with little or no overlap. (Off by default.) Recommended\n"
@@ -467,6 +470,11 @@ char *expertUsage =
" -gtr -- generalized time-reversible instead of (default) Jukes-Cantor (nt only)\n"
" -cat # -- specify the number of rate categories of sites (default 20)\n"
" -nocat -- no CAT model (just 1 category)\n"
+ " - trans filename -- use the transition matrix from filename\n"
+ " This is supported for amino acid alignments only\n"
+ " The file must be tab-delimited with columns in the order ARNDCQEGHILKMFPSTWYV*\n"
+ " The additional column named * is for the stationary distribution\n"
+ " Each row must have a row name in the same order ARNDCQEGHILKMFPSTWYV\n"
" -gamma -- after the final round of optimizing branch lengths with the CAT model,\n"
" report the likelihood under the discrete gamma model with the same\n"
" number of categories. FastTree uses the same branch lengths but\n"
@@ -930,6 +938,7 @@ void FreeAlignmentSeqs(/*IN/OUT*/alignment_t *);
transition_matrix_t *CreateTransitionMatrix(/*IN*/double matrix[MAXCODES][MAXCODES],
/*IN*/double stat[MAXCODES]);
transition_matrix_t *CreateGTR(double *gtrrates/*ac,ag,at,cg,ct,gt*/, double *gtrfreq/*ACGT*/);
+transition_matrix_t *ReadAATransitionMatrix(/*IN*/char *filename);
/* For converting profiles from 1 rotation to another, or converts NULL to NULL */
distance_matrix_t *TransMatToDistanceMat(transition_matrix_t *transmat);
@@ -1648,6 +1657,7 @@ int main(int argc, char **argv) {
int nAlign = 1; /* number of alignments to read */
int iArg;
char *matrixPrefix = NULL;
+ char *transitionFile = NULL;
distance_matrix_t *distance_matrix = NULL;
bool make_matrix = false;
char *constraintsFile = NULL;
@@ -1849,6 +1859,9 @@ int main(int argc, char **argv) {
bUseWag = true;
} else if (strcmp(argv[iArg], "-gtr") == 0) {
bUseGtr = true;
+ } else if (strcmp(argv[iArg], "-trans") == 0 && iArg < argc-1) {
+ iArg++;
+ transitionFile = argv[iArg];
} else if (strcmp(argv[iArg], "-gtrrates") == 0 && iArg < argc-6) {
bUseGtr = true;
bUseGtrRates = true;
@@ -1905,6 +1918,16 @@ int main(int argc, char **argv) {
codesString = nCodes == 20 ? codesStringAA : codesStringNT;
if (nCodes == 4 && matrixPrefix == NULL)
useMatrix = false; /* no default nucleotide matrix */
+ if (transitionFile && nCodes != 20) {
+ fprintf(stderr, "The -trans option is only supported for amino acid alignments\n");
+ exit(1);
+ }
+#ifndef USE_DOUBLE
+ if (transitionFile)
+ fprintf(stderr,
+ "Warning: custom matrices may create numerical problems for single-precision FastTree.\n"
+ "You may want to recompile with -DUSE_DOUBLE\n");
+#endif
char *fileName = iArg == (argc-1) ? argv[argc-1] : NULL;
@@ -1996,10 +2019,9 @@ int main(int argc, char **argv) {
if (MLnni != 0 || MLlen) {
fprintf(fp, "ML Model: %s,",
(nCodes == 4) ?
- (bUseGtr ? "Generalized Time-Reversible" : "Jukes-Cantor") :
- (bUseLg ? "Le-Gascuel 2008" : (bUseWag ? "Whelan-And-Goldman" : "Jones-Taylor-Thorton"))
-
- );
+ (bUseGtr ? "Generalized Time-Reversible" : "Jukes-Cantor") :
+ (transitionFile ? transitionFile :
+ (bUseLg ? "Le-Gascuel 2008" : (bUseWag ? "Whelan-And-Goldman" : "Jones-Taylor-Thorton"))));
if (nRateCats == 1)
fprintf(fp, " No rate variation across sites");
else
@@ -2147,9 +2169,10 @@ int main(int argc, char **argv) {
transition_matrix_t *transmat = NULL;
if (nCodes == 20) {
- transmat = bUseLg? CreateTransitionMatrix(matrixLG08,statLG08) :
- (bUseWag? CreateTransitionMatrix(matrixWAG01,statWAG01) :
- CreateTransitionMatrix(matrixJTT92,statJTT92));
+ transmat = transitionFile? ReadAATransitionMatrix(transitionFile) :
+ (bUseLg? CreateTransitionMatrix(matrixLG08,statLG08) :
+ (bUseWag? CreateTransitionMatrix(matrixWAG01,statWAG01) :
+ CreateTransitionMatrix(matrixJTT92,statJTT92)));
} else if (nCodes == 4 && bUseGtr && (bUseGtrRates || bUseGtrFreq)) {
transmat = CreateGTR(gtrrates,gtrfreq);
}
@@ -10017,6 +10040,114 @@ void matrixt_by_vector4(/*IN*/numeric_t mat[4][MAXCODES], /*IN*/numeric_t vec[4]
#endif
}
+transition_matrix_t *ReadAATransitionMatrix(/*IN*/char *filename) {
+ assert(nCodes==20);
+ double stat[20];
+ static double matrix[MAXCODES][MAXCODES];
+ static char buf[BUFFER_SIZE];
+ FILE *fp = fopen(filename, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "Cannot read transition matrix file %s\n", filename);
+ exit(1);
+ }
+ char expected[2*MAXCODES+20];
+ int posE = 0;
+ int i, j;
+ for (i = 0; i < 20; i++) {
+ expected[posE++] = codesStringAA[i];
+ expected[posE++] = '\t';
+ }
+ expected[posE++] = '*';
+ expected[posE++] = '\n';
+ expected[posE++] = '\0';
+
+ if (fgets(buf, sizeof(buf), fp) == NULL) {
+ fprintf(stderr, "Error reading header line from transition matrix file\n");
+ exit(1);
+ }
+ if (strcmp(buf, expected) != 0) {
+ fprintf(stderr, "Invalid header line in transition matrix file, it must match:\n%s\n", expected);
+ exit(1);
+ }
+ for (i = 0; i < 20; i++) {
+ if (fgets(buf, sizeof(buf), fp) == NULL) {
+ fprintf(stderr, "Error reading matrix line\n");
+ exit(1);
+ }
+ char *field = strtok(buf,"\t\r\n");
+ if (field == NULL || strlen(field) != 1 || field[0] != codesStringAA[i]) {
+ fprintf(stderr, "Line for amino acid %c does not have the expected beginning\n", codesStringAA[i]);
+ exit(1);
+ }
+ for (j = 0; j < 20; j++) {
+ field = strtok(NULL, "\t\r\n");
+ if (field == NULL) {
+ fprintf(stderr, "Not enough fields for amino acid %c\n", codesStringAA[i]);
+ exit(1);
+ }
+ matrix[i][j] = atof(field);
+ }
+ field = strtok(NULL, "\t\r\n");
+ if (field == NULL) {
+ fprintf(stderr, "Not enough fields for amino acid %c\n", codesStringAA[i]);
+ exit(1);
+ }
+ stat[i] = atof(field);
+ }
+
+ double tol = 1e-5;
+ /* Verify that stat is positive and sums to 1 */
+ double statTot = 0;
+ for (i = 0; i < 20; i++) {
+ if (stat[i] < tol) {
+ fprintf(stderr, "stationary frequency for amino acid %c must be positive\n", codesStringAA[i]);
+ exit(1);
+ }
+ statTot += stat[i];
+ }
+ if (fabs(statTot - 1) > tol) {
+ fprintf(stderr, "stationary frequencies must sum to 1 -- actual sum is %g\n", statTot);
+ exit(1);
+ }
+
+ /* Verify that diagonals are negative and dot product of stat and diagonals is -1 */
+ double totRate = 0;
+ for (i = 0; i < 20; i++) {
+ double diag = matrix[i][i];
+ if (diag > -tol) {
+ fprintf(stderr, "transition rate(%c,%c) must be negative\n",
+ codesStringAA[i], codesStringAA[i]);
+ exit(1);
+ }
+ totRate += stat[i] * diag;
+ }
+ if (fabs(totRate + 1) > tol) {
+ fprintf(stderr, "Dot product of matrix diagonal and stationary frequencies must be -1 -- actual dot product is %g\n",
+ totRate);
+ exit(1);
+ }
+
+ /* Verify that each off-diagonal entry is nonnegative and that each column sums to 0 */
+ for (j = 0; j < 20; j++) {
+ double colSum = 0;
+ for (i = 0; i < 20; i++) {
+ double value = matrix[i][j];
+ colSum += value;
+ if (i != j && value < 0) {
+ fprintf(stderr, "Off-diagonal matrix entry for (%c,%c) is negative\n",
+ codesStringAA[i], codesStringAA[j]);
+ exit(1);
+ }
+ }
+ if (fabs(colSum) > tol) {
+ fprintf(stderr, "Sum of column %c must be zero -- actual sum is %g\n",
+ codesStringAA[j], colSum);
+ exit(1);
+ }
+ }
+ return CreateTransitionMatrix(matrix, stat);
+}
+
distance_matrix_t matrixBLOSUM45 =
{
/*distances*/
View it on GitLab: https://salsa.debian.org/med-team/fasttree/compare/2d2f62c21773e8fdb4831a166544ba058668e439...77bb79e6f7cde548ea199c62a355a030ceceadee
--
View it on GitLab: https://salsa.debian.org/med-team/fasttree/compare/2d2f62c21773e8fdb4831a166544ba058668e439...77bb79e6f7cde548ea199c62a355a030ceceadee
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190731/89eef112/attachment-0001.html>
More information about the debian-med-commit
mailing list