[med-svn] [Git][med-team/biobambam2][upstream] New upstream version 2.0.177+ds

Étienne Mollier gitlab at salsa.debian.org
Wed Nov 18 21:02:16 GMT 2020



Étienne Mollier pushed to branch upstream at Debian Med / biobambam2


Commits:
b36bccd1 by Étienne Mollier at 2020-11-12T16:17:53+01:00
New upstream version 2.0.177+ds
- - - - -


15 changed files:

- .gitignore
- ChangeLog
- configure.ac
- release.sh
- + removespace.sh
- src/Makefile.am
- src/biobambam2/BamBamConfig.hpp.in
- src/biobambam2/UpdateNumericalIndex.cpp
- src/programs/bamcollate2.cpp
- src/programs/bamconsensus.cpp
- + src/programs/bamdifference.cpp
- src/programs/bammerge.cpp
- src/programs/bamtofastq.cpp
- src/programs/blastnxmltobam.cpp
- + src/programs/fastaselectreg.cpp


Changes:

=====================================
.gitignore
=====================================
@@ -70,3 +70,5 @@ src/vcffiltersamples
 src/vcfdiff
 src/vcffilterfilterflags
 src/vcfreplacecontigsmap
+src/fastaselectreg
+src/bamdifference


=====================================
ChangeLog
=====================================
@@ -1,3 +1,30 @@
+biobambam2 (2.0.177-1) unstable; urgency=medium
+
+  * Versioning cleanup
+
+ -- German Tischler-Höhle <germant at miltenyibiotec.de>  Thu, 12 Nov 2020 10:39:59 +0100
+
+biobambam2 (2.0.176-1) unstable; urgency=medium
+
+  * Adapt UpdateNumericalIndex to new libmaus2 api
+  * Fix output of orphaned read 2 instances in bamtofastq when splitting by readgroup
+  * Remove references to non functional IRODS interface in libmaus2
+  * Add bamdifference program
+
+ -- German Tischler-Höhle <germant at miltenyibiotec.de>  Thu, 12 Nov 2020 09:43:13 +0100
+
+biobambam2 (2.0.175-1) unstable; urgency=medium
+
+  * Add fastaselectreg
+
+ -- German Tischler-Höhle <germant at miltenyibiotec.de>  Thu, 27 Aug 2020 10:08:12 +0200
+
+biobambam2 (2.0.174-1) unstable; urgency=medium
+
+  * Fix wrong operator for deallocating memory in bamconsensus
+
+ -- German Tischler-Höhle <germant at miltenyibiotec.de>  Mon, 10 Aug 2020 11:26:08 +0200
+
 biobambam2 (2.0.173-1) unstable; urgency=medium
 
   * Fix for libmaus2 update


=====================================
configure.ac
=====================================
@@ -1,4 +1,4 @@
-AC_INIT(biobambam2,2.0.173,[germant at miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
+AC_INIT(biobambam2,2.0.177,[germant at miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
 AC_CANONICAL_SYSTEM
 AC_PROG_LIBTOOL
 
@@ -165,7 +165,7 @@ if test ! -z "${with_libmaus2}" ; then
 	fi
 fi
 
-PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.740])
+PKG_CHECK_MODULES([libmaus2],[libmaus2 >= 2.0.749])
 
 if test ! -z "${with_libmaus2}" ; then
 	if test ! -z "${PKGCONFIGPATHSAVE}" ; then
@@ -300,7 +300,7 @@ if test "${have_libmaus2_irods}" = "yes" ; then
 		fi
 	fi
 
-	PKG_CHECK_MODULES([libmaus2irods],[libmaus2irods >= 2.0.740])
+	PKG_CHECK_MODULES([libmaus2irods],[libmaus2irods >= 2.0.749])
 
 	LIBMAUS2IRODSCPPFLAGS="${libmaus2irods_CFLAGS}"
 	LIBMAUS2IRODSLIBS="${libmaus2irods_LIBS}"
@@ -463,7 +463,7 @@ AC_ARG_ENABLE(install_uncommon,
         AS_HELP_STRING([--enable-install-uncommon],[enable installation of some uncommon programs (default no)]),
         [install_uncommon=${enableval}],[install_uncommon=no])
 
-UNCOMMON="bamfilter bamfilterbyname bamfixmatecoordinates bamfixmatecoordinatesnamesorted bamtoname bamdisthist fastabgzfextract bamheap bamfrontback bamrandomtag bamheap2 bamheap3 bamtagconversion fastqtobampar bambisect vcffilterinfo vcfpatchcontigprepend vcfconcat vcfsort filtergtf bamconsensus vcfreplacecontigs vcffiltersamples bamexploderg bamexondepth bamheadercat bammarkduplicatesoptdist vcfdiff bamsimpledepth bamdepthmerge bamcountflags vcffilterfilterflags vcfreplacecontigsmap"
+UNCOMMON="bamfilter bamfilterbyname bamfixmatecoordinates bamfixmatecoordinatesnamesorted bamtoname bamdisthist fastabgzfextract bamheap bamfrontback bamrandomtag bamheap2 bamheap3 bamtagconversion fastqtobampar bambisect vcffilterinfo vcfpatchcontigprepend vcfconcat vcfsort filtergtf bamconsensus vcfreplacecontigs vcffiltersamples bamexploderg bamexondepth bamheadercat bammarkduplicatesoptdist vcfdiff bamsimpledepth bamdepthmerge bamcountflags vcffilterfilterflags vcfreplacecontigsmap fastaselectreg"
 UNCOMMONINSTALLED=
 UNCOMMONUNINSTALLED=
 if test "${install_uncommon}" = "yes" ; then


=====================================
release.sh
=====================================
@@ -1,4 +1,5 @@
 #! /bin/bash
+set -euxo pipefail
 
 # update branches
 git checkout experimental


=====================================
removespace.sh
=====================================
@@ -0,0 +1,11 @@
+#! /bin/bash
+for i in `find src -regex .*\\\.[ch]pp` `find src -regex .*\\\.[ch]` ; do
+	ORIG=`cat $i`
+	PATCHED=`perl -p -e "s/(\s*)($)/\n/" < ${i}`
+	
+	if [ "$ORIG" != "$PATCHED" ] ; then
+		echo "${PATCHED}" > ${i}
+		# git add ${i}
+		echo ${i}
+	fi
+done


=====================================
src/Makefile.am
=====================================
@@ -152,7 +152,8 @@ EXTRA_PROGRAMS = blastnxmltobam \
 	vcfdiff \
 	bamsimpledepth \
 	bamdepthmerge \
-	bamcountflags
+	bamcountflags \
+	fastaselectreg
 
 populaterefcache_SOURCES = programs/populaterefcache.cpp biobambam2/Licensing.cpp
 populaterefcache_LDADD = ${LIBMAUS2LIBS}
@@ -703,3 +704,8 @@ bamcountflags_SOURCES = programs/bamcountflags.cpp biobambam2/Licensing.cpp
 bamcountflags_LDADD = ${LIBMAUS2LIBS}
 bamcountflags_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
 bamcountflags_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS}
+
+fastaselectreg_SOURCES = programs/fastaselectreg.cpp biobambam2/Licensing.cpp
+fastaselectreg_LDADD = ${LIBMAUS2LIBS}
+fastaselectreg_LDFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS} ${LIBMAUS2LDFLAGS} ${AM_LDFLAGS}
+fastaselectreg_CPPFLAGS = ${AM_CPPFLAGS} ${LIBMAUS2CPPFLAGS}


=====================================
src/biobambam2/BamBamConfig.hpp.in
=====================================
@@ -22,7 +22,6 @@
 @LIBMAUS2IOLIBDEFINE@
 @BIOBAMBAM_HAVE_XERCES_C@
 @BIOBAMBAM_HAVE_GMP@
- at LIBMAUS2IRODSDEFINE@
 @HAVE_PTHREAD_MUTEX_RECURSIVE_NP@
 @HAVE_PTHREAD_MUTEX_RECURSIVE@
 


=====================================
src/biobambam2/UpdateNumericalIndex.cpp
=====================================
@@ -28,6 +28,13 @@ namespace biobambam2
 
 		if ( libmaus2::util::GetFileSize::fileExists(indexfn) )
 		{
+			std::string const replfn = indexfn + ".repl";
+			libmaus2::aio::OutputStreamInstance::unique_ptr_type prepl(
+				new libmaus2::aio::OutputStreamInstance(
+					replfn
+				)
+			);
+
 			// get index stats
 			uint64_t alcnt, mod, numblocks;
 			{
@@ -59,20 +66,33 @@ namespace biobambam2
 
 					// std::cerr << "replacing index position for " << i << " by " << start.first << "," << start.second << std::endl;
 
+					libmaus2::bambam::BamNumericalIndexGenerator::ReplaceObject(
+						blockid,start.first,start.second
+					).serialise(*prepl);
+
+					#if 0
 					libmaus2::bambam::BamNumericalIndexGenerator::replaceValue(
 						indexfn,
 						blockid,
 						start.first,
 						start.second
 					);
+					#endif
 
 					highestSet = blockid;
 				}
 
+			prepl->flush();
+			prepl.reset();
+
+			libmaus2::bambam::BamNumericalIndexGenerator::replaceValues(indexfn,replfn);
+
+			libmaus2::aio::FileRemoval::removeFile(replfn);
+
 			// std::cerr << "shifting index positions for [" << highestSet+1 << "," << numblocks << ")" << " by " << static_cast<int64_t>(compdata.second.size()) - static_cast<int64_t>(compdata.first) << std::endl;
 
 			// shift values in part we moved as is
-			libmaus2::bambam::BamNumericalIndexGenerator::shiftValues(
+			libmaus2::bambam::BamNumericalIndexGenerator::shiftValuesStreaming(
 				indexfn,
 				highestSet + 1,numblocks,
 				static_cast<int64_t>(compdata.second.size()) - static_cast<int64_t>(compdata.first)


=====================================
src/programs/bamcollate2.cpp
=====================================
@@ -1403,29 +1403,14 @@ void bamcollate2(libmaus2::util::ArgInfo const & arginfo)
 	}
 }
 
-#if defined(LIBMAUS2_HAVE_IRODS)
-#include <libmaus2/irods/IRodsInputStreamFactory.hpp>
-#endif
-
 int main(int argc, char * argv[])
 {
 	try
 	{
-		#if defined(LIBMAUS2_HAVE_IRODS)
-                libmaus2::irods::IRodsInputStreamFactory::registerHandler();
-                #endif
-
 		libmaus2::timing::RealTimeClock rtc; rtc.start();
 
 		::libmaus2::util::ArgInfo arginfo(argc,argv);
 
-		#if defined(LIBMAUS2_HAVE_IRODS)
-		// set program name for iRODS identification
-		std::stringstream irods_id;
-		irods_id  << PACKAGE_NAME << ":" << arginfo.getProgFileName(arginfo.progname) << ":" << PACKAGE_VERSION;
-		setenv(SP_OPTION, irods_id.str().c_str(), 1);
-		#endif
-
 		for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
 			if (
 				arginfo.restargs[i] == "-v"
@@ -1526,14 +1511,6 @@ int main(int argc, char * argv[])
 
 		bamcollate2(arginfo);
 
-		#if defined(LIBMAUS2_HAVE_IRODS)
-		// need a explicit call to disconnect to avoid atexit deallocation problems in iRODS 4.19+
-    		if (libmaus2::irods::IRodsSystem::defaultIrodsSystem)
-		{
-    	        	(libmaus2::irods::IRodsSystem::getDefaultIRodsSystem())->disconnect();
-		}
-		#endif
-
 		if ( arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()) )
 			std::cerr << "[V] " << libmaus2::util::MemUsage() << " wall clock time " << rtc.formatTime(rtc.getElapsedSeconds()) << std::endl;
 	}


=====================================
src/programs/bamconsensus.cpp
=====================================
@@ -3307,7 +3307,7 @@ struct ReferenceCache
 
 		if ( ! --VuseCnt.at(id) )
 		{
-			Vref.at(id) == RefEntry::shared_ptr_type();
+			Vref.at(id) = RefEntry::shared_ptr_type();
 			std::cerr << "[V] reference cache deallocating " << ptr->name << std::endl;
 		}
 


=====================================
src/programs/bamdifference.cpp
=====================================
@@ -0,0 +1,222 @@
+/**
+    bambam
+    Copyright (C) 2009-2020 German Tischler-Höhle
+    Copyright (C) 2011-2013 Genome Research Limited
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>..
+**/
+#include <config.h>
+#include <libmaus2/bambam/BamBlockWriterBaseFactory.hpp>
+#include <libmaus2/bambam/BamWriter.hpp>
+#include <libmaus2/bambam/BamHeaderUpdate.hpp>
+#include <libmaus2/util/ArgInfo.hpp>
+#include <biobambam2/Licensing.hpp>
+#include <libmaus2/bambam/BamMultiAlignmentDecoderFactory.hpp>
+#include <libmaus2/bambam/BamPeeker.hpp>
+#include <libmaus2/lz/BgzfDeflateOutputCallbackMD5.hpp>
+#include <libmaus2/bambam/BgzfDeflateOutputCallbackBamIndex.hpp>
+
+static int getDefaultMD5() { return 0; }
+
+static void printVerbose(std::ostream & errstr, uint64_t const c0, uint64_t const c1, uint64_t const k, bool const verbose, uint64_t const mod)
+{
+	if ( verbose && ((c0+c1)%mod==0) )
+	{
+		errstr << "[V] " << c0 << "/" << c1 << "/" << c0+c1 << "/" << k << std::endl;
+	}
+}
+
+/*
+ * compute difference of two name sorted alignments files (SAM/BAM/CRAM)
+ */
+int bamintersect(libmaus2::util::ArgParser const & arg)
+{
+	std::ostream & verbstr = std::cerr;
+	static uint64_t const mod = 1024*1024;
+
+	libmaus2::util::ArgParser arg0 = arg;
+	arg0.replaceArg("I",arg0[0]);
+	libmaus2::util::ArgParser arg1 = arg;
+	arg1.replaceArg("I",arg0[1]);
+
+	bool const verbose = arg.argPresent("verbose");
+
+	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper0(libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arg0));
+	libmaus2::bambam::BamAlignmentDecoder & BD0 = decwrapper0->getDecoder();
+	libmaus2::bambam::BamPeeker BP0(BD0);
+	libmaus2::bambam::BamAlignment algn0;
+
+	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper1(libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arg1));
+	libmaus2::bambam::BamAlignmentDecoder & BD1 = decwrapper1->getDecoder();
+	libmaus2::bambam::BamPeeker BP1(BD1);
+	libmaus2::bambam::BamAlignment algn1;
+
+	std::string md5filename;
+
+	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
+	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
+	if ( arg.getParsedArgOrDefault<uint64_t>("md5",getDefaultMD5()) )
+	{
+		if ( libmaus2::bambam::BamBlockWriterBaseFactory::getMD5FileName(arg) != std::string() )
+			md5filename = libmaus2::bambam::BamBlockWriterBaseFactory::getMD5FileName(arg);
+		else
+			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;
+
+		if ( md5filename.size() )
+		{
+			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
+			Pmd5cb = std::move(Tmd5cb);
+			cbs.push_back(Pmd5cb.get());
+		}
+	}
+	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
+	if ( cbs.size() )
+		Pcbs = &cbs;
+
+	// construct writer
+	libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Pwriter(libmaus2::bambam::BamBlockWriterBaseFactory::construct(BD0.getHeader(),arg,Pcbs));
+	libmaus2::bambam::BamBlockWriterBase & wr = *Pwriter;
+
+	uint64_t c0 = 0, c1 = 0, k = 0;
+
+	while ( BP0.peekNext(algn0) && BP1.peekNext(algn1) )
+	{
+		char const * name0 = algn0.getName();
+		char const * name1 = algn1.getName();
+		int const r = libmaus2::bambam::StrCmpNum::strcmpnum(name0,name1);
+
+		// name is in file0 but no in file1
+		if ( r < 0 )
+		{
+			std::string const name0 = algn0.getName();
+
+			while ( BP0.peekNext(algn0) && algn0.getName() == name0 )
+			{
+				BP0.getNext(algn0);
+				wr.writeAlignment(algn0);
+
+				++c0;
+				++k;
+
+				printVerbose(verbstr, c0, c1, k, verbose, mod);
+			}
+		}
+		// name is in both files, drop data
+		else if ( r == 0 )
+		{
+			std::string const name = algn0.getName();
+
+			while ( BP0.peekNext(algn0) && algn0.getName() == name )
+			{
+				BP0.getNext(algn0);
+
+				++c0;
+				printVerbose(verbstr, c0, c1, k, verbose, mod);
+			}
+			while ( BP1.peekNext(algn1) && algn1.getName() == name )
+			{
+				BP1.getNext(algn1);
+
+				++c1;
+				printVerbose(verbstr, c0, c1, k, verbose, mod);
+			}
+		}
+		// name is only in file1, drop data
+		else
+		{
+			std::string const name1 = algn1.getName();
+
+			while ( BP1.peekNext(algn1) && algn1.getName() == name1 )
+			{
+				BP1.getNext(algn1);
+				wr.writeAlignment(algn1);
+
+				++c1;
+				printVerbose(verbstr, c0, c1, k, verbose,mod);
+			}
+		}
+	}
+
+	// names only in file0 at end
+	while ( BP0.getNext(algn0) )
+	{
+		wr.writeAlignment(algn0);
+
+		++c0;
+		++k;
+
+		printVerbose(verbstr, c0, c1, k, verbose,mod);
+	}
+
+	Pwriter.reset();
+
+	if ( Pmd5cb )
+		Pmd5cb->saveDigestAsFile(md5filename);
+
+	return EXIT_SUCCESS;
+}
+
+int main(int argc, char * argv[])
+{
+	try
+	{
+		std::vector<libmaus2::util::ArgParser::ArgumentDefinition> Vformatcons;
+		Vformatcons.push_back(libmaus2::util::ArgParser::ArgumentDefinition("h","help",false));
+		Vformatcons.push_back(libmaus2::util::ArgParser::ArgumentDefinition("v","version",false));
+		Vformatcons.push_back(libmaus2::util::ArgParser::ArgumentDefinition("","verbose",false));
+
+		std::vector<libmaus2::util::ArgParser::ArgumentDefinition> const Vformatin = libmaus2::bambam::BamAlignmentDecoderInfo::getArgumentDefinitions();
+		std::vector<libmaus2::util::ArgParser::ArgumentDefinition> const Vformatout = libmaus2::bambam::BamBlockWriterBaseFactory::getArgumentDefinitions();
+
+		std::vector<libmaus2::util::ArgParser::ArgumentDefinition> Vformat =
+			libmaus2::util::ArgParser::mergeFormat(libmaus2::util::ArgParser::mergeFormat(Vformatin,Vformatout),Vformatcons);
+
+		libmaus2::util::ArgParser const arg(argc,argv,Vformat);
+
+		if ( arg.argPresent("version") )
+		{
+			std::cerr << ::biobambam2::Licensing::license();
+			return EXIT_SUCCESS;
+		}
+		else if ( arg.argPresent("help") || arg.size() < 2 )
+		{
+			std::cerr << ::biobambam2::Licensing::license();
+			std::cerr << std::endl;
+			std::cerr << "usage: " << arg.progname << " full.bam partial.bam" << std::endl;
+			std::cerr << std::endl;
+			std::cerr << "Argument:" << std::endl;
+			std::cerr << std::endl;
+
+			std::vector< std::pair<std::string,std::string> > V;
+
+			V.push_back ( std::pair<std::string,std::string> ( "-v/--verbose", "print progress report" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "--md5 <["+::biobambam2::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "--md5filename <filename>", "file name for md5 check sum (default: extend output file name)" ) );
+
+			::biobambam2::Licensing::printMap(std::cerr,V);
+
+			if ( arg.argPresent("help") )
+				return EXIT_SUCCESS;
+			else
+				return EXIT_FAILURE;
+		}
+
+		return bamintersect(arg);
+	}
+	catch(std::exception const & ex)
+	{
+		std::cerr << ex.what() << std::endl;
+		return EXIT_FAILURE;
+	}
+}


=====================================
src/programs/bammerge.cpp
=====================================
@@ -38,10 +38,6 @@ static std::string getDefaultSortOrder() { return "coordinate"; }
 static int getDefaultMD5() { return 0; }
 static int getDefaultIndex() { return 0; }
 
-#if defined(LIBMAUS2_HAVE_IRODS)
-#include <libmaus2/irods/IRodsInputStreamFactory.hpp>
-#endif
-
 ::libmaus2::bambam::BamHeader::unique_ptr_type updateHeader(
 	::libmaus2::util::ArgInfo const & arginfo,
 	::libmaus2::bambam::BamHeader const & header
@@ -239,14 +235,6 @@ int bammerge(libmaus2::util::ArgInfo const & arginfo)
 		Pindex->flush(std::string(indexfilename));
 	}
 
-	#if defined(LIBMAUS2_HAVE_IRODS)
-	// need a explicit call to disconnect to avoid atexit deallocation problems in iRODS 4.19+
-    	if (libmaus2::irods::IRodsSystem::defaultIrodsSystem)
-	{
-    	        (libmaus2::irods::IRodsSystem::getDefaultIRodsSystem())->disconnect();
-	}
-	#endif
-
 	return EXIT_SUCCESS;
 }
 
@@ -254,19 +242,8 @@ int main(int argc, char * argv[])
 {
 	try
 	{
-		#if defined(LIBMAUS2_HAVE_IRODS)
-                libmaus2::irods::IRodsInputStreamFactory::registerHandler();
-                #endif
-
 		::libmaus2::util::ArgInfo const arginfo(argc,argv);
 
-		#if defined(LIBMAUS2_HAVE_IRODS)
-		// set program name for iRODS identification
-		std::stringstream irods_id;
-		irods_id  << PACKAGE_NAME << ":" << arginfo.getProgFileName(arginfo.progname) << ":" << PACKAGE_VERSION;
-		setenv(SP_OPTION, irods_id.str().c_str(), 1);
-		#endif
-
 		for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
 			if (
 				arginfo.restargs[i] == "-v"


=====================================
src/programs/bamtofastq.cpp
=====================================
@@ -761,7 +761,7 @@ void bamtofastqCollating(
 				}
 
 				AOS[rgfshift + O2map]->write(reinterpret_cast<char const *>(T.begin()),la);
-				filefrags[rgfshift + Omap]++;
+				filefrags[rgfshift + O2map]++;
 
 				combs.orphans2 += 1;
 				cnt += 1;
@@ -1240,29 +1240,14 @@ void bamtofastq(libmaus2::util::ArgInfo const & arginfo)
 	}
 }
 
-#if defined(LIBMAUS2_HAVE_IRODS)
-#include <libmaus2/irods/IRodsInputStreamFactory.hpp>
-#endif
-
 int main(int argc, char * argv[])
 {
 	try
 	{
-		#if defined(LIBMAUS2_HAVE_IRODS)
-		libmaus2::irods::IRodsInputStreamFactory::registerHandler();
-		#endif
-
 		libmaus2::timing::RealTimeClock rtc; rtc.start();
 
 		::libmaus2::util::ArgInfo arginfo(argc,argv);
 
-		#if defined(LIBMAUS2_HAVE_IRODS)
-		// set program name for iRODS identification
-		std::stringstream irods_id;
-		irods_id  << PACKAGE_NAME << ":" << arginfo.getProgFileName(arginfo.progname) << ":" << PACKAGE_VERSION;
-		setenv(SP_OPTION, irods_id.str().c_str(), 1);
-		#endif
-
 		for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
 			if (
 				arginfo.restargs[i] == "-v"
@@ -1369,14 +1354,6 @@ int main(int argc, char * argv[])
 
 		bamtofastq(arginfo);
 
-		#if defined(LIBMAUS2_HAVE_IRODS)
-		// need a explicit call to disconnect to avoid atexit deallocation problems in iRODS 4.19+
-    		if (libmaus2::irods::IRodsSystem::defaultIrodsSystem)
-		{
-    	        	(libmaus2::irods::IRodsSystem::getDefaultIRodsSystem())->disconnect();
-		}
-		#endif
-
 		std::cerr << "[V] " << libmaus2::util::MemUsage() << " wall clock time " << rtc.formatTime(rtc.getElapsedSeconds()) << std::endl;
 	}
 	catch(std::exception const & ex)


=====================================
src/programs/blastnxmltobam.cpp
=====================================
@@ -54,7 +54,7 @@ static std::string stripAfterSpace(std::string const & s)
 struct XercesUtf8Transcoder
 {
 	typedef XercesUtf8Transcoder this_type;
-	typedef libmaus2::util::unique_ptr<this_type>::type unique_ptr_type;
+	typedef std::unique_ptr<this_type> unique_ptr_type;
 
 	xercesc::XMLTransService * ts;
 	xercesc::XMLTranscoder * utf8transcoder;
@@ -1136,12 +1136,12 @@ int main(int argc, char * argv[])
 			std::string const reffn = arginfo.restargs.at(0);
 			std::string const queriesfn = arginfo.restargs.at(1);
 
-			libmaus2::util::unique_ptr< std::vector<libmaus2::bambam::CramRange> >::type Pranges;
+			std::unique_ptr< std::vector<libmaus2::bambam::CramRange> > Pranges;
 			std::vector<libmaus2::bambam::CramRange> * ranges = 0;
 
 			if ( arginfo.hasArg("range") )
 			{
-				libmaus2::util::unique_ptr< std::vector<libmaus2::bambam::CramRange> >::type Tranges(
+				std::unique_ptr< std::vector<libmaus2::bambam::CramRange> > Tranges(
 					new std::vector<libmaus2::bambam::CramRange>
 				);
 				Pranges = std::move(Tranges);


=====================================
src/programs/fastaselectreg.cpp
=====================================
@@ -0,0 +1,156 @@
+/*
+    biobambam2
+    Copyright (C) 2020 German Tischler
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>..
+*/
+
+#include <iostream>
+#include <libmaus2/util/ArgParser.hpp>
+#include <libmaus2/fastx/StreamFastAReader.hpp>
+#include <libmaus2/lz/PlainOrGzipStream.hpp>
+
+#include <biobambam2/BamBamConfig.hpp>
+#include <biobambam2/Licensing.hpp>
+
+#include <regex>
+
+#include <libmaus2/lz/GzipOutputStream.hpp>
+
+static uint64_t getDefaultLineLength()
+{
+	return 80;
+}
+
+/**
+ * read a FastA file (possibly gziped) from stdin, select sequences with (short)
+ * names matching a given regular expression (argument, Posix extended regex)
+ * and output selected sequences on stdout (gziped if -g or --gzip is set).
+ *
+ * Options:
+ * - singleline: output a single line of sequence data per record (i.e. do not wrap)
+ * - longname: do not shorten the sequence name line
+ * - dataonly: output data only, drop FastA headers (lines starting with >)
+ * - up: transform all sequence symbols to upper case
+ * - gzip: compress output using gzip
+ * - verbose: print which sequences are kept and which are discarded
+ * - l<len>: wrap sequence lines after this number of symbols (default 80)
+ **/
+int fastaselectreg(libmaus2::util::ArgParser const & arg)
+{
+	libmaus2::lz::PlainOrGzipStream POS(std::cin);
+	libmaus2::fastx::StreamFastAReaderWrapper SFA(POS);
+	libmaus2::fastx::StreamFastAReaderWrapper::pattern_type pattern;
+	uint64_t const linelength = arg.uniqueArgPresent("l") ? arg.getUnsignedNumericArg<uint64_t>("l") : getDefaultLineLength();
+	std::regex reg(arg[0],std::regex_constants::extended);
+
+	bool const singleline = arg.argPresent("s") || arg.argPresent("singleline");
+	bool const longname = arg.argPresent("L") || arg.argPresent("longname");
+	bool const dataonly = arg.argPresent("d") || arg.argPresent("dataonly");
+	bool const up = arg.argPresent("u") || arg.argPresent("toupper");
+	bool const gzip = arg.argPresent("g") || arg.argPresent("gzip");
+	bool const verbose = arg.argPresent("verbose");
+
+	libmaus2::lz::GzipOutputStream::unique_ptr_type gzptr;
+	std::ostream & ostr = std::cout;
+	if ( gzip )
+	{
+		libmaus2::lz::GzipOutputStream::unique_ptr_type tgzptr(new libmaus2::lz::GzipOutputStream(ostr));
+		gzptr = std::move(tgzptr);
+	}
+	std::ostream & OSI = gzptr ? *gzptr : ostr;
+	while ( SFA.getNextPatternUnlocked(pattern) )
+	{
+		std::string & spat = pattern.spattern;
+
+		if ( up )
+			for ( uint64_t i = 0; i < spat.size(); ++i )
+				spat[i] = toupper(spat[i]);
+
+		std::string const shortname = pattern.getShortStringId();
+
+		if ( std::regex_match(shortname,reg) )
+		{
+			if ( verbose )
+				std::cerr << "[K] keeping " << shortname << std::endl;
+
+			if ( !longname )
+				pattern.sid = shortname;
+
+			if ( dataonly )
+				OSI.write(pattern.spattern.c_str(),pattern.spattern.size());
+			else if ( singleline )
+				OSI << pattern;
+			else
+				pattern.printMultiLine(OSI,linelength);
+		}
+		else if ( verbose )
+		{
+			std::cerr << "[K] discarding " << shortname << std::endl;
+		}
+	}
+
+	return EXIT_SUCCESS;
+}
+
+int main(int argc, char * argv[])
+{
+	try
+	{
+		libmaus2::util::ArgParser const arg(argc,argv);
+
+		if (
+			arg.uniqueArgPresent("v") || arg.uniqueArgPresent("version")
+		)
+		{
+			std::cerr << ::biobambam2::Licensing::license();
+			return EXIT_SUCCESS;
+		}
+		else if (
+			arg.uniqueArgPresent("h") || arg.uniqueArgPresent("help") || arg.size() < 1
+		)
+		{
+			std::cerr << ::biobambam2::Licensing::license();
+			std::cerr << std::endl;
+			std::cerr << "usage: " << arg.progname << " <regex> <in.fasta > out.fasta" << std::endl;
+			std::cerr << std::endl;
+			std::cerr << "options:" << std::endl;
+			std::cerr << std::endl;
+
+			std::vector< std::pair<std::string,std::string> > V;
+
+			V.push_back ( std::pair<std::string,std::string> ( "-v/--version", "print version number and quit" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-h/--help", "print help message and quit" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-l<cols>", "line length (default: "+libmaus2::util::NumberSerialisation::formatNumber(getDefaultLineLength(),0)+")" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-s/--singleline", "do not wrap sequence data lines" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-L/--longname", "do not shorten name" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-d/--dataonly", "do not print FastA header (data only)" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-u/--toupper", "convert sequence symbols to upper case" ) );
+			V.push_back ( std::pair<std::string,std::string> ( "-g/--gzip", "compress output" ) );
+
+			::biobambam2::Licensing::printMap(std::cerr,V);
+
+			std::cerr << std::endl;
+			return EXIT_SUCCESS;
+
+		}
+
+		return fastaselectreg(arg);
+	}
+	catch(std::exception const & ex)
+	{
+		std::cerr << ex.what() << std::endl;
+		return EXIT_FAILURE;
+	}
+}



View it on GitLab: https://salsa.debian.org/med-team/biobambam2/-/commit/b36bccd15b79ea86c241f0014673225bb1d34854

-- 
View it on GitLab: https://salsa.debian.org/med-team/biobambam2/-/commit/b36bccd15b79ea86c241f0014673225bb1d34854
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201118/e59ec68e/attachment-0001.html>


More information about the debian-med-commit mailing list