[med-svn] [Git][med-team/biobambam2][master] 5 commits: New upstream version 2.0.184+ds
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sat Oct 8 09:35:34 BST 2022
Étienne Mollier pushed to branch master at Debian Med / biobambam2
Commits:
b0eeb102 by Étienne Mollier at 2022-10-07T21:23:40+02:00
New upstream version 2.0.184+ds
- - - - -
2f579df9 by Étienne Mollier at 2022-10-07T21:23:40+02:00
routine-update: New upstream version
- - - - -
da6fae1b by Étienne Mollier at 2022-10-07T21:23:41+02:00
Update upstream source from tag 'upstream/2.0.184+ds'
Update to upstream version '2.0.184+ds'
with Debian dir 2da64f39e89223caf892c61aaad6cd4068ff2510
- - - - -
7105c096 by Étienne Mollier at 2022-10-07T21:23:41+02:00
routine-update: Standards-Version: 4.6.1
- - - - -
41211d24 by Étienne Mollier at 2022-10-07T21:35:02+02:00
routine-update: Ready to upload to unstable
- - - - -
6 changed files:
- ChangeLog
- configure.ac
- debian/changelog
- debian/control
- src/programs/bamsort.1
- src/programs/bamsort.cpp
Changes:
=====================================
ChangeLog
=====================================
@@ -1,3 +1,10 @@
+biobambam2 (2.0.184-1) unstable; urgency=medium
+
+ * Add tagonly sort option in bamsort
+ * Adapt to change in libmaus2 (renaming of GetObject to GetCObject)
+
+ -- German Tischler-Höhle <germant at miltenyibiotec.de> Mon, 19 Sep 2022 13:40:22 +0200
+
biobambam2 (2.0.183-1) unstable; urgency=medium
* Adapt to change in libmaus2 (rename GetObject to GetCObject)
=====================================
configure.ac
=====================================
@@ -1,4 +1,4 @@
-AC_INIT(biobambam2,2.0.183,[germant at miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
+AC_INIT(biobambam2,2.0.184,[germant at miltenyibiotec.de],[biobambam2],[https://gitlab.com/german.tischler/biobambam2])
AC_CANONICAL_SYSTEM
AC_PROG_LIBTOOL
=====================================
debian/changelog
=====================================
@@ -1,9 +1,11 @@
-biobambam2 (2.0.183+ds-2) UNRELEASED; urgency=medium
+biobambam2 (2.0.184+ds-1) unstable; urgency=medium
* d/t/run-unit-test: skip bamsormadup test on i386; allows substancial
autopkgtest on that architecture as well.
+ * New upstream version
+ * Standards-Version: 4.6.1 (routine-update)
- -- Étienne Mollier <emollier at debian.org> Fri, 27 Aug 2021 16:40:22 +0200
+ -- Étienne Mollier <emollier at debian.org> Fri, 07 Oct 2022 21:23:51 +0200
biobambam2 (2.0.183+ds-1) unstable; urgency=medium
=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Priority: optional
Build-Depends: debhelper-compat (= 13),
pkg-config,
libmaus2-dev (>= 2.0.800)
-Standards-Version: 4.6.0
+Standards-Version: 4.6.1
Vcs-Browser: https://salsa.debian.org/med-team/biobambam2
Vcs-Git: https://salsa.debian.org/med-team/biobambam2.git
Homepage: https://gitlab.com/german.tischler/biobambam2
=====================================
src/programs/bamsort.1
=====================================
@@ -44,7 +44,7 @@ than the input itself.
.PP
The following key=value pairs can be given:
.PP
-.B SO=<coordinate|queryname|hash|tag|queryname_HI|queryname_lexicographic>:
+.B SO=<coordinate|queryname|hash|tag|tagonly|queryname_HI|queryname_lexicographic>:
set the sort order. Valid values are
.IP coordinate:
sort alignments by coordinate
@@ -55,7 +55,12 @@ sort alignments by (Murmur3) hash of query name. This effectively puts them
in a random order.
.IP tag
sort alignments by string aux field. The tag of the aux fields need to be
-provided using the sorttag key.
+provided using the sorttag key. Entries with identical tag are sorted by
+coordinate.
+.IP tagonly
+sort alignments by string aux field. The tag of the aux fields need to be
+provided using the sorttag key. Entries with identical tag are left in the
+same order as they were in the input.
.IP queryname_HI
sort alignments by query name. Alignments with identical query name are
sorted by the value of their HI aux field.
=====================================
src/programs/bamsort.cpp
=====================================
@@ -1,6 +1,6 @@
/**
bambam
- Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2009-2022 German Tischler-Höhle
Copyright (C) 2011-2013 Genome Research Limited
This program is free software: you can redistribute it and/or modify
@@ -30,6 +30,7 @@
#include <libmaus2/bambam/BamAlignmentPosComparator.hpp>
#include <libmaus2/bambam/BamAlignmentHashComparator.hpp>
#include <libmaus2/bambam/BamAlignmentTagComparator.hpp>
+#include <libmaus2/bambam/BamAlignmentTagOnlyComparator.hpp>
#include <libmaus2/bambam/BamBlockWriterBaseFactory.hpp>
#include <libmaus2/bambam/BamEntryContainer.hpp>
#include <libmaus2/bambam/BamMultiAlignmentDecoderFactory.hpp>
@@ -83,7 +84,7 @@ void printHelpMessage(libmaus2::util::ArgInfo const & /* arginfo */)
std::vector< std::pair<std::string,std::string> > V;
V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam2::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus2::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) );
- V.push_back ( std::pair<std::string,std::string> ( "SO=<["+getDefaultSortOrder()+"]>", "sorting order (coordinate, queryname, hash, tag, queryname_HI or queryname_lexicographic)" ) );
+ V.push_back ( std::pair<std::string,std::string> ( "SO=<["+getDefaultSortOrder()+"]>", "sorting order (coordinate, queryname, hash, tag, tagonly, queryname_HI or queryname_lexicographic)" ) );
V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) );
V.push_back ( std::pair<std::string,std::string> ( "blockmb=<["+::biobambam2::Licensing::formatNumber(getDefaultBlockSize())+"]>", "size of internal memory buffer used for sorting in MiB" ) );
V.push_back ( std::pair<std::string,std::string> ( "disablevalidation=<["+::biobambam2::Licensing::formatNumber(getDefaultDisableValidation())+"]>", "disable input validation (default is 0)" ) );
@@ -188,7 +189,7 @@ static std::string getSortTag(libmaus2::util::ArgInfo const & arginfo)
return sorttag;
}
-enum sort_order_type { sort_order_coordinate, sort_order_queryname, sort_order_hash, sort_order_tag, sort_order_queryname_HI, sort_order_queryname_lexicographic };
+enum sort_order_type { sort_order_coordinate, sort_order_queryname, sort_order_hash, sort_order_tag, sort_order_tagonly, sort_order_queryname_HI, sort_order_queryname_lexicographic };
struct BamSortWriter
{
@@ -254,6 +255,7 @@ struct BamSortWriter
break;
case sort_order_hash:
case sort_order_tag:
+ case sort_order_tagonly:
uphead.changeSortOrder("unknown");
break;
case sort_order_coordinate:
@@ -332,7 +334,7 @@ int bamsort(::libmaus2::util::ArgInfo const & arginfo)
bool const streaming = arginfo.getValue<unsigned int>("streaming",getDefaultStreaming());
std::string const hash = arginfo.getValue<std::string>("hash",getDefaultHash());
- std::string const stagsorttag = sortorder == "tag" ? getSortTag(arginfo) : std::string();
+ std::string const stagsorttag = (sortorder == "tag" || sortorder == "tagonly") ? getSortTag(arginfo) : std::string();
// input decoder wrapper
libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
@@ -423,6 +425,8 @@ int bamsort(::libmaus2::util::ArgInfo const & arginfo)
sort_order = sort_order_hash;
else if ( sortorder == "tag" )
sort_order = sort_order_tag;
+ else if ( sortorder == "tagonly" )
+ sort_order = sort_order_tagonly;
else
sort_order = sort_order_coordinate;
@@ -846,6 +850,112 @@ int bamsort(::libmaus2::util::ArgInfo const & arginfo)
BEC.createOutput(*(BSW.Pout), verbose);
BSW.flush();
}
+ else if ( sort_order == sort_order_tagonly )
+ {
+ char const * tagsorttag = stagsorttag.c_str();
+
+ ::libmaus2::bambam::BamEntryContainer< ::libmaus2::bambam::BamAlignmentTagOnlyComparator >
+ BEC(tagsorttag,blockmem,tmpfilenameout,sortthreads);
+
+ if ( verbose )
+ std::cerr << "[V] Reading alignments from source." << std::endl;
+ uint64_t incnt = 0;
+
+ // current alignment
+ libmaus2::bambam::BamAlignment & curalgn = dec.getAlignment();
+ // previous alignment
+ libmaus2::bambam::BamAlignment prevalgn;
+ // previous alignment valid
+ bool prevalgnvalid = false;
+ // MQ field filter
+ libmaus2::bambam::BamAuxFilterVector MQfilter;
+ libmaus2::bambam::BamAuxFilterVector MSfilter;
+ libmaus2::bambam::BamAuxFilterVector MCfilter;
+ libmaus2::bambam::BamAuxFilterVector MTfilter;
+ libmaus2::bambam::BamAuxFilterVector CMCfilter;
+ MQfilter.set("MQ");
+ MSfilter.set("ms");
+ MCfilter.set("mc");
+ MTfilter.set("mt");
+ CMCfilter.set("MC");
+
+ // remove the original style tags (MC handled separately)
+ MSfilter.set("MS");
+ MTfilter.set("MT");
+
+ while ( dec.readAlignment() )
+ {
+ if ( curalgn.isSecondary() || curalgn.isSupplementary() )
+ {
+ chksums.update(curalgn);
+ BEC.putAlignment(curalgn);
+ }
+ else if ( prevalgnvalid )
+ {
+ // different name
+ if ( strcmp(curalgn.getName(),prevalgn.getName()) )
+ {
+ chksums.update(prevalgn);
+ BEC.putAlignment(prevalgn);
+ curalgn.swap(prevalgn);
+ }
+ // same name
+ else
+ {
+ libmaus2::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter);
+
+ if ( addMSMC )
+ {
+ libmaus2::bambam::BamAlignment::addMateBaseScore(prevalgn,curalgn,MSfilter);
+ libmaus2::bambam::BamAlignment::addMateCoordinate(prevalgn,curalgn,MCfilter);
+ libmaus2::bambam::BamAlignment::addMateCigarString(prevalgn,curalgn,MCaux,CMCfilter);
+ removeOldStyleMateCoordinate(prevalgn,curalgn);
+
+ switch ( tag_type )
+ {
+ case tag_type_string:
+ libmaus2::bambam::BamAlignment::addMateTag(prevalgn,curalgn,MTfilter,tag);
+ break;
+ case tag_type_nucleotide:
+ libmaus2::bambam::BamAlignment::addMateTag(prevalgn,curalgn,MTfilter,nucltag);
+ break;
+ default:
+ break;
+ }
+ }
+
+ chksums.update(prevalgn);
+ BEC.putAlignment(prevalgn);
+ chksums.update(curalgn);
+ BEC.putAlignment(curalgn);
+ prevalgnvalid = false;
+ }
+ }
+ else
+ {
+ prevalgn.swap(curalgn);
+ prevalgnvalid = true;
+ }
+
+ if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) )
+ std::cerr << "[V] " << incnt << std::endl;
+ }
+
+ if ( prevalgnvalid )
+ {
+ chksums.update(prevalgn);
+ BEC.putAlignment(prevalgn);
+ prevalgnvalid = false;
+ }
+
+ if ( verbose )
+ std::cerr << "[V] read " << incnt << " alignments" << std::endl;
+
+ // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs);
+ BamSortWriter BSW(chksums,upheadtext,sort_order,markduplicates,rmdup,arginfo,header,Pcbs);
+ BEC.createOutput(*(BSW.Pout), verbose);
+ BSW.flush();
+ }
else if ( sort_order == sort_order_queryname_HI )
{
::libmaus2::bambam::BamEntryContainer< ::libmaus2::bambam::BamAlignmentNameHIComparator >
View it on GitLab: https://salsa.debian.org/med-team/biobambam2/-/compare/8671b1757298f38d5488eb8033a567756e02c2d0...41211d249a26c176a5d54a89b9471bf146229d16
--
View it on GitLab: https://salsa.debian.org/med-team/biobambam2/-/compare/8671b1757298f38d5488eb8033a567756e02c2d0...41211d249a26c176a5d54a89b9471bf146229d16
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221008/b9e6cc21/attachment-0001.htm>
More information about the debian-med-commit
mailing list