[med-svn] [Git][med-team/muscle][upstream] New upstream version 5.1.0

Lance Lin (@linqigang) gitlab at salsa.debian.org
Sat Nov 5 12:21:22 GMT 2022



Lance Lin pushed to branch upstream at Debian Med / muscle


Commits:
9696cf25 by Lance Lin at 2022-11-05T19:11:32+07:00
New upstream version 5.1.0
- - - - -


16 changed files:

- .github/workflows/build_linux.yml
- README.md
- src/Makefile
- src/allocflat.cpp
- src/countsort.h
- src/ensemble.cpp
- src/gitver.bash
- src/gitver.bat
- src/help.h
- + src/help.txt
- src/myopts.h
- src/myutils.cpp
- src/myutils.h
- src/testlog.cpp
- src/timing.h
- src/usage.h


Changes:

=====================================
.github/workflows/build_linux.yml
=====================================
@@ -21,7 +21,7 @@ jobs:
           cd $GITHUB_WORKSPACE
           ls -lh
           cd src
-          make
+          make LDFLAGS2=-static
       - name: Upload binary artifact
         uses: actions/upload-artifact at v2
         with:


=====================================
README.md
=====================================
@@ -17,6 +17,10 @@ https://github.com/rcedgar/muscle/releases
 [Muscle v5 home page](https://drive5.com/muscle5)   
 [Manual](https://drive5.com/muscle5/manual)   
 
+### Building MUSCLE from source
+
+[https://github.com/rcedgar/muscle/wiki/Building-MUSCLE](https://github.com/rcedgar/muscle/wiki/Building-MUSCLE)
+
 
 ### Reference
 R.C. Edgar (2021) "MUSCLE v5 enables improved estimates of phylogenetic tree confidence by ensemble bootstrapping"    


=====================================
src/Makefile
=====================================
@@ -26,10 +26,7 @@ endif
 
 CXXFLAGS := $(CXXFLAGS) -O3 -fopenmp -ffast-math
 
-LDFLAGS := $(LDFLAGS) -O3 -fopenmp -pthread -lpthread
-ifeq ($(OS),Linux)
-    LDFLAGS += -static
-endif
+LDFLAGS := $(LDFLAGS) -O3 -fopenmp -pthread -lpthread ${LDFLAGS2}
 
 HDRS := $(shell echo *.h)
 OBJS := $(shell echo *.cpp | sed "-es/^/$(OS)\//" | sed "-es/ / $(OS)\//g" | sed "-es/\.cpp/.o/g")


=====================================
src/allocflat.cpp
=====================================
@@ -3,6 +3,8 @@
 uint64 GetFBSize(uint LX, uint LY)
 	{
 	uint64 Size64 = uint64(LX + 1)*uint64(LY + 1)*HMMSTATE_COUNT;
+	if (double(Size64) > 4e9)
+		Die("Memory object too large due to sequence lengths %u, %u", LX, LY);
 	uint Size = uint(Size64);
 	asserta(Size == uint(Size64));
 	return Size;


=====================================
src/countsort.h
=====================================
@@ -20,7 +20,7 @@ public:
 	CountSortMem()
 		{
 		m_MaxValueCount = 0;
-		zero(m_Vecs, NVEC);
+		memset_zero(m_Vecs, NVEC);
 		}
 
 	void Free()


=====================================
src/ensemble.cpp
=====================================
@@ -41,7 +41,7 @@ void Ensemble::SortMSA(MSA &M)
 	M.GetLabelToSeqIndex(Labels2, LabelToSeqIndex2);
 
 	char **szSeqsSorted = myalloc(char *, SeqCount);
-	zero(szSeqsSorted, SeqCount);
+	memset_zero(szSeqsSorted, SeqCount);
 	for (uint SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
 		{
 		const string &Label = Labels2[SeqIndex];


=====================================
src/gitver.bash
=====================================
@@ -1,5 +1,13 @@
 #!/bin/bash
 
+if [ ! -d ../.git ] ; then
+  if [ ! -f gitver.txt ] ; then
+    echo "0" > gitver.txt
+  fi
+  echo "Repo not found, git hash set to zero"
+  exit 0
+fi
+
 PATH=$PATH:/usr/bin
 
 git describe --abbrev=6 --dirty --long --always \


=====================================
src/gitver.bat
=====================================
@@ -3,7 +3,7 @@
 if exist gitver.txt (
 	echo gitver.txt found
 ) else (
-	echo 000 > gitver.txt
+	echo "-" > gitver.txt
 )
 
 if exist c:\cygwin64\bin\bash.exe (


=====================================
src/help.h
=====================================
@@ -1 +1,66 @@
-"This is help.txt\n"
+"Align FASTA input, write aligned FASTA (AFA) output:\n"
+"    muscle -align input.fa -output aln.afa\n"
+"\n"
+"Align large input using Super5 algorithm if -align is too expensive,\n"
+"typically needed with more than a few hundred sequences:\n"
+"    muscle -super5 input.fa -output aln.afa\n"
+"\n"
+"Single replicate alignment:\n"
+"    muscle -align input.fa -perm PERM -perturb SEED -output aln.afa\n"
+"    muscle -super5 input.fa -perm PERM -perturb SEED -output aln.afa\n"
+"        PERM is guide tree permutation none, abc, acb, bca (default none).\n"
+"        SEED is perturbation seed 0, 1, 2... (default 0 = don't perturb).\n"
+"\n"
+"Ensemble of replicate alignments, output in Ensemble FASTA (EFA) format,\n"
+"EFA has one aligned FASTA for each replicate with header line \"<PERM.SEED\":\n"
+"    muscle -align input.fa -stratified -output stratified_ensemble.efa\n"
+"    muscle -align input.fa -diversified -output diversified_ensemble.afa\n"
+"\n"
+"    -replicates N\n"
+"        Number of replicates, defaults 4, 100, 100 for stratified,\n"
+"          diversified, resampled. With -stratified there is one\n"
+"          replicate per guide tree permutation, total is 4 x N.\n"
+"\n"
+"Generate resampled ensemble from existing ensemble by sampling columns\n"
+"with replacement:\n"
+"    muscle -resample ensemble.efa -output resampled.efa\n"
+"\n"
+"    -max_gap_fract F\n"
+"       Maximum fraction of gaps in a column (F=0..1, default 0.5).\n"
+"\n"
+"    -minconf CC\n"
+"       Minimum column confidence (CC=0..1, default 0.5).\n"
+"\n"
+"If ensemble output filename has @, then one FASTA file is generated\n"
+"for each replicate where @ is replaced by perm.s, otherwise all replicates\n"
+"are written to one EFA file.\n"
+"\n"
+"Calculate disperson of an ensemble:\n"
+"    muscle -disperse ensemble.efa\n"
+"\n"
+"Extract replicate with highest total CC (diversified input recommended):\n"
+"    muscle -maxcc ensemble.efa -output maxcc.afa\n"
+"\n"
+"Extract aligned FASTA files from EFA file:\n"
+"    muscle -efa_explode ensemble.efa\n"
+"\n"
+"Convert FASTA to EFA, input has one filename per line:\n"
+"    muscle -fa2efa filenames.txt -output ensemble.efa\n"
+"\n"
+"Update ensemble by adding two sequences of digits to each replicate, digits\n"
+"are column confidence (CC) values, e.g. \"73\" means CC=0.73, \"++\" is CC=1.0:\n"
+"    muscle -addconfseq ensemble.efa -output ensemble_cc.efa\n"
+"\n"
+"Calculate letter confidence (LC) values, -ref specifies the alignment to\n"
+"compare against the ensemble (e.g. from -maxcc), output is in aligned\n"
+"FASTA format with LC values 0, 1 ... 9 instead of letters:\n"
+"    muscle -letterconf ensemble.efa -ref aln.afa -output letterconf.afa\n"
+"\n"
+"    -html aln.html\n"
+"        Alignment colored by LC in HTML format.\n"
+"\n"
+"    -jalview aln.features\n"
+"        Jalview feature file with LC values and colors.\n"
+"\n"
+"More documentation at:\n"
+"    https://drive5.com/muscle\n"


=====================================
src/help.txt
=====================================
@@ -0,0 +1,66 @@
+Align FASTA input, write aligned FASTA (AFA) output:
+    muscle -align input.fa -output aln.afa
+
+Align large input using Super5 algorithm if -align is too expensive,
+typically needed with more than a few hundred sequences:
+    muscle -super5 input.fa -output aln.afa
+
+Single replicate alignment:
+    muscle -align input.fa -perm PERM -perturb SEED -output aln.afa
+    muscle -super5 input.fa -perm PERM -perturb SEED -output aln.afa
+        PERM is guide tree permutation none, abc, acb, bca (default none).
+        SEED is perturbation seed 0, 1, 2... (default 0 = don't perturb).
+
+Ensemble of replicate alignments, output in Ensemble FASTA (EFA) format,
+EFA has one aligned FASTA for each replicate with header line "<PERM.SEED":
+    muscle -align input.fa -stratified -output stratified_ensemble.efa
+    muscle -align input.fa -diversified -output diversified_ensemble.afa
+
+    -replicates N
+        Number of replicates, defaults 4, 100, 100 for stratified,
+          diversified, resampled. With -stratified there is one
+          replicate per guide tree permutation, total is 4 x N.
+
+Generate resampled ensemble from existing ensemble by sampling columns
+with replacement:
+    muscle -resample ensemble.efa -output resampled.efa
+
+    -max_gap_fract F
+       Maximum fraction of gaps in a column (F=0..1, default 0.5).
+
+    -minconf CC
+       Minimum column confidence (CC=0..1, default 0.5).
+
+If ensemble output filename has @, then one FASTA file is generated
+for each replicate where @ is replaced by perm.s, otherwise all replicates
+are written to one EFA file.
+
+Calculate disperson of an ensemble:
+    muscle -disperse ensemble.efa
+
+Extract replicate with highest total CC (diversified input recommended):
+    muscle -maxcc ensemble.efa -output maxcc.afa
+
+Extract aligned FASTA files from EFA file:
+    muscle -efa_explode ensemble.efa
+
+Convert FASTA to EFA, input has one filename per line:
+    muscle -fa2efa filenames.txt -output ensemble.efa
+
+Update ensemble by adding two sequences of digits to each replicate, digits
+are column confidence (CC) values, e.g. "73" means CC=0.73, "++" is CC=1.0:
+    muscle -addconfseq ensemble.efa -output ensemble_cc.efa
+
+Calculate letter confidence (LC) values, -ref specifies the alignment to
+compare against the ensemble (e.g. from -maxcc), output is in aligned
+FASTA format with LC values 0, 1 ... 9 instead of letters:
+    muscle -letterconf ensemble.efa -ref aln.afa -output letterconf.afa
+
+    -html aln.html
+        Alignment colored by LC in HTML format.
+
+    -jalview aln.features
+        Jalview feature file with LC values and colors.
+
+More documentation at:
+    https://drive5.com/muscle


=====================================
src/myopts.h
=====================================
@@ -1,5 +1,5 @@
 #ifndef MY_VERSION
-#define MY_VERSION	"5.1"
+#define MY_VERSION	"5.2"
 #endif
 
 #define PROGRAM_NAME	"muscle"


=====================================
src/myutils.cpp
=====================================
@@ -807,7 +807,7 @@ static char *GetThreadStr()
 		{
 		unsigned NewThreadStrCount = ThreadIndex + 4;
 		char **NewThreadStrs = myalloc(char *, NewThreadStrCount);
-		zero(NewThreadStrs, NewThreadStrCount);
+		memset_zero(NewThreadStrs, NewThreadStrCount);
 		if (g_ThreadStrCount > 0)
 			memcpy(NewThreadStrs, g_ThreadStrs, g_ThreadStrCount*sizeof(char *));
 		g_ThreadStrs = NewThreadStrs;
@@ -898,6 +898,8 @@ void Log(const char *Format, ...)
 
 void Die_(const char *Format, ...)
 	{
+	va_list ArgList;
+	va_start(ArgList, Format);
 #pragma omp critical
 	{
 	static bool InDie = false;
@@ -908,10 +910,7 @@ void Die_(const char *Format, ...)
 
 	if (g_fLog != 0)
 		setbuf(g_fLog, 0);
-	va_list ArgList;
-	va_start(ArgList, Format);
 	myvstrprintf(Msg, Format, ArgList);
-	va_end(ArgList);
 
 	fprintf(stderr, "\n\n");
 	Log("\n");
@@ -944,6 +943,7 @@ void Die_(const char *Format, ...)
 
 	exit(1);
 	}
+	va_end(ArgList);
 	}
 
 void Warning_(const char *Format, ...)
@@ -2076,7 +2076,8 @@ void GetVersionString(string &s)
 	"T"
 #endif
 	;
-
+	if (GIT_VER == 0)
+		GIT_VER = "-";
 	Ps(s, "%s %s.%s%s [%s]", PROGRAM_NAME, MY_VERSION, GetPlatform(), Flags, GIT_VER);
 	}
 
@@ -2398,15 +2399,8 @@ void LogAllocSummary()
 void *mymalloc64(unsigned BytesPerObject, uint64 N)
 	{
 	uint64 Bytes = N*BytesPerObject;
-	//byte *p = 0;
-	//try
-	//	{
-	//	p = new byte[Bytes];
-	//	}
-	//catch (...)
-	//	{
-	//	Die("myalloc64(%u, %.3g) failed", BytesPerObject, double(N));
-	//	}
+	if (Bytes >= UINT32_MAX - 1024)
+		Die("Memory object >4Gb, probably due to long seqences");
 	byte *p = (byte *) malloc(Bytes);
 	if (p == 0)
 		Die("myalloc64(%u, %.3g) failed", BytesPerObject, double(N));


=====================================
src/myutils.h
=====================================
@@ -358,7 +358,6 @@ inline bool feq(double x, double y)
 #define asserteq(x, y)	assert(feq(x, y))
 #define assertaeq(x, y)	asserta(feq(x, y))
 
-#define	zero(a, n)	memset((a), 0, (n)*sizeof(a[0]))
 #define	memset_zero(a, n)	memset((a), 0, (n)*sizeof(a[0]))
 
 void ResetRand(unsigned Seed);


=====================================
src/testlog.cpp
=====================================
@@ -1,6 +1,10 @@
 #include "muscle.h"
 #include "timing.h"
 
+void cmd_testlog() {}
+
+#if 0
+
 inline float HACK(float x)
 	{
 	assert(x >= 0.00f);
@@ -151,3 +155,4 @@ Next:
 	ProgressLog("LOG_ADD %.3g sum_log_prob %.3g lookup %.3g\n",
 	  double(t2 - t1), double(t4 - t3), double(t6 - t5));
 	}
+#endif // 0


=====================================
src/timing.h
=====================================
@@ -1,6 +1,8 @@
 #ifndef getticks_h
 #define getticks_h
 
+#if 0
+
 // ~3 x 10^9 ticks/sec
 
 #ifdef _MSC_VER
@@ -31,4 +33,6 @@ __inline__ uint64_t GetClockTicks()
 #error	"getticks_h, unknown compiler"
 #endif
 
+#endif
+
 #endif // getticks_h


=====================================
src/usage.h
=====================================
@@ -49,7 +49,7 @@
 "\n"
 "Update ensemble by adding two sequences of digits to each replicate, digits\n"
 "are column confidence (CC) values, e.g. \"73\" means CC=0.73, \"++\" is CC=1.0:\n"
-"    muscle -addconfseqs ensemble.efa -output ensemble_cc.efa\n"
+"    muscle -addconfseq ensemble.efa -output ensemble_cc.efa\n"
 "\n"
 "Calculate letter confidence (LC) values, -ref specifies the alignment to\n"
 "compare against the ensemble (e.g. from -maxcc), output is in aligned\n"



View it on GitLab: https://salsa.debian.org/med-team/muscle/-/commit/9696cf2541762a27d1745699a24346f2933cd08c

-- 
View it on GitLab: https://salsa.debian.org/med-team/muscle/-/commit/9696cf2541762a27d1745699a24346f2933cd08c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221105/8e861650/attachment-0001.htm>


More information about the debian-med-commit mailing list