[med-svn] [Git][med-team/sambamba][upstream] New upstream version 0.8.1+dfsg

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Wed Aug 25 15:50:10 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / sambamba


Commits:
12c7a3be by Nilesh Patra at 2021-08-25T19:58:11+05:30
New upstream version 0.8.1+dfsg
- - - - -


24 changed files:

- − .github/ISSUE_TEMPLATE/bug_report.md
- − .gitignore
- − .gitmodules
- .guix-build
- + .guix-build-static
- − .travis.yml
- BioD/bio/std/file/fai.d
- BioD/bio/std/file/fasta.d
- BioD/bio/std/hts/sam/header.d
- INSTALL.md
- Makefile
- Makefile.guix
- README.md
- RELEASE-NOTES.md
- + TODO.org
- VERSION
- doc/design.org
- − man/.gitignore
- sambamba/flagstat.d
- sambamba/main.d
- sambamba/markdup.d
- sambamba/pileup.d
- test/benchmark/stats.org
- test/test_suite.sh


Changes:

=====================================
.github/ISSUE_TEMPLATE/bug_report.md deleted
=====================================
@@ -1,40 +0,0 @@
----
-name: Bug report 🐞
-about: Create a report to help us improve
-title: ''
-labels: bug
-assignees: ''
----
-**Only bug reports!**
-
-The D version of Sambamba is in maintenance mode. Use the github issue
-tracker to report bugs *only*. For comments, questions and features,
-please use the google group mailing list as stated on the
-[README](https://github.com/biod/sambamba/blob/master/README.md)!
-
-**Describe the bug**
-
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-**Desktop (please complete the following information):**
-- Browser [e.g. chrome, safari]
-- Version [e.g. 22]
-**Additional context**
-Add any other context about the problem here.
-
-Include a set of BAM/BED files to reproduce the issue
-
-+ bonus points if you try to minimize the test case yourself, as issues are often localized:
-  - try to use sambamba slice to first extract the reference where the error occurs
-  - if that succeeds (the error is still reproducible), continue to crop the file in binary-search fashion


=====================================
.gitignore deleted
=====================================
@@ -1,22 +0,0 @@
-.dub/
-dub.selections.json
-lz4/
-bin/
-test.log
-build/
-shunit*
-/*.sam
-/*.bam
-/*.bai
-/*.cram
-/*.crai
-/*.txt
-!meson_options.txt
-*.hex
-*.zcat
-*.out
-/utils/ldc_version_info_.d
-profile.data
-profile.raw
-output
-builddir


=====================================
.gitmodules deleted
=====================================
@@ -1,3 +0,0 @@
-[submodule "lz4"]
-	path = lz4
-	url = https://github.com/lz4/lz4.git


=====================================
.guix-build
=====================================
@@ -1,3 +1,4 @@
 # This command creates a build container for Sambamba using GNU Guix
 
-guix environment -C guix --ad-hoc gcc-toolchain gdb bash ld-wrapper ldc which python git binutils-gold vim zlib
+env GUIX_PACKAGE_PATH=~/iwrk/opensource/guix/guix-bioinformatics \
+guix environment -C guix --ad-hoc gcc-toolchain gdb bash ld-wrapper ldc which python git binutils-gold vim zlib $*


=====================================
.guix-build-static
=====================================
@@ -0,0 +1,3 @@
+# This command creates a build container for Sambamba using GNU Guix
+
+guix environment -C guix --ad-hoc gcc-toolchain gdb bash ld-wrapper ldc which python git binutils-gold vim zlib:static


=====================================
.travis.yml deleted
=====================================
@@ -1,19 +0,0 @@
-language: d
-d:
-  - ldc
-matrix:
-  # ARM64 testing is under development
-  allow_failures:
-    - os: osx
-  include:
-    - os: linux
-      compiler: gcc
-    - os: linux
-      arch:
-        - arm64
-      compiler: gcc
-    - os: osx
-      compiler: clang
-script:
-  - make
-  - make check


=====================================
BioD/bio/std/file/fai.d
=====================================
@@ -41,12 +41,6 @@ struct FaiRecord {
     string toString() {
         return format("%s\t%s\t%s\t%s\t%s", header, seqLen, offset, lineLen, lineOffset);
     }
-    unittest {
-        auto rec = FaiRecord("chr2", "\n", 10, 50, 4);
-        assert(rec.toString() == "chr2\t10\t4\t50\t51");
-        rec.lineTerm = "\r\n";
-        assert(rec.toString() == "chr2\t10\t4\t50\t52");
-    }
 
     this(string str) {
         auto res = str.split("\t");
@@ -56,10 +50,6 @@ struct FaiRecord {
         lineLen = to!ulong(res[3]);
         lineTerm = (to!ulong(res[4])-lineLen) == 1 ? "\n" : "\r\n";
     }
-    unittest {
-        auto s = "chr2\t10\t4\t50\t51";
-        assert(FaiRecord(s).toString() == s);
-    }
 
     this(string header, string lineTerm, ulong seqLen, ulong lineLen, ulong offset) {
         this.header = header;
@@ -68,48 +58,25 @@ struct FaiRecord {
         this.lineLen = lineLen;
         this.lineTerm = lineTerm;
     }
-    unittest {
-        assert(FaiRecord("chr2", "\n", 10, 50, 4).toString() == "chr2\t10\t4\t50\t51");
-    }
 }
 
-auto readFai(string filename) {
-    File f = File(filename, "r");
+auto readFai(string fai_filename) {
+    auto f = new File(fai_filename, "r");
     return f.byLineCopy()
             .map!(x => FaiRecord(x));
 }
-unittest {
-    auto faiString = "chr2\t10\t4\t50\t51";
-    auto testIndex = tempDir.buildPath("test1.fa.fai");
-    // scope(exit) remove(testIndex);
-    auto f = File(testIndex,"w");
-    f.writeln(faiString);
-    f.close();
-    auto recs = readFai(testIndex).array;
-    // assert(recs.length == 1);
-    assert(is(typeof(recs[0])==FaiRecord));
-    assert(recs[0].toString() == faiString);
-}
 
 auto makeIndex(T)(T records) {
     FaiRecord[string] index;
     foreach (record; records) {
-        index[record.header] = record;
+      index[record.header] = record;
     }
     index.rehash;
     return index;
 }
-unittest {
-    auto records = to!(FaiRecord[])(["chr2\t10\t4\t50\t51"]);
-    auto i = makeIndex(records);
-    assert( i.length == 1);
-    assert( "chr2" in i);
-    assert( i["chr2"] ==  FaiRecord("chr2\t10\t4\t50\t51"));
-}
-
-auto buildFai(string filename) {
 
-    File f = File(filename, "r");
+auto buildFai(string fasta_filename) {
+    auto f = new File(fasta_filename, "r");
     FaiRecord[] records;
     string lineTerm = f.byLine(KeepTerminator.yes).take(1).front.endsWith("\r\n") ? "\r\n" : "\n";
     f.seek(0);
@@ -133,10 +100,11 @@ auto buildFai(string filename) {
     return records;
 }
 
-unittest {
+
+version(Broken) unittest {
     auto testFa = tempDir.buildPath("test1.fa");
     // scope(exit) remove(testFa);
-    auto fa = File(testFa, "w");
+    auto fa = new File(testFa, "w");
     fa.writeln(q"(
         >chr1
         acgtgagtgc
@@ -144,6 +112,7 @@ unittest {
         acgtgagtgcacgtgagtgcacgtgagtgc
         acgtgagtgcacgtgagtgc
     )".outdent().strip());
+    fa.flush();
     fa.close();
     auto recs = buildFai(testFa).array;
     assert(recs.length == 2, recs[0].toString());
@@ -151,3 +120,40 @@ unittest {
     assert(recs[0].toString() == "chr1\t10\t6\t10\t11");
     assert(recs[1].toString() == "chr2\t50\t23\t30\t31");
 }
+
+unittest {
+    auto records = to!(FaiRecord[])(["chr2\t10\t4\t50\t51"]);
+    auto i = makeIndex(records);
+    assert( i.length == 1);
+    assert( "chr2" in i);
+    assert( i["chr2"] ==  FaiRecord("chr2\t10\t4\t50\t51"));
+}
+
+version(Broken) unittest {
+  auto faiString = "chr2\t10\t4\t50\t51";
+  auto testIndex = tempDir.buildPath("test1.fa.fai");
+  auto f = new File(testIndex,"w");
+  f.writeln(faiString);
+  f.flush();
+  f.close();
+  auto recs = readFai(testIndex).array;
+  assert(recs.length == 1);
+  assert(is(typeof(recs[0])==FaiRecord));
+  assert(recs[0].toString() == faiString);
+}
+
+unittest {
+  auto rec = FaiRecord("chr2", "\n", 10, 50, 4);
+  assert(rec.toString() == "chr2\t10\t4\t50\t51");
+  rec.lineTerm = "\r\n";
+  assert(rec.toString() == "chr2\t10\t4\t50\t52");
+}
+
+unittest {
+  auto s = "chr2\t10\t4\t50\t51";
+  assert(FaiRecord(s).toString() == s);
+}
+
+unittest {
+  assert(FaiRecord("chr2", "\n", 10, 50, 4).toString() == "chr2\t10\t4\t50\t51");
+}


=====================================
BioD/bio/std/file/fasta.d
=====================================
@@ -157,7 +157,7 @@ struct Region {
 
 auto fastaRecords(string filename) {
 
-    File f = File(filename);
+    auto f = new File(filename);
     FastaRecord[] records;
     string lineTerm = f.byLine(KeepTerminator.yes).take(1).front.endsWith("\r\n") ? "\r\n" : "\n";
     f.seek(0);
@@ -180,7 +180,7 @@ auto fastaRecords(string filename) {
     return records;
 }
 
-unittest {
+version(Broken) unittest {
     auto testFa = tempDir.buildPath("test2.fa");
     // scope(exit) testFa.remove;
 
@@ -194,6 +194,7 @@ unittest {
         >chr3 hrsv | Kilifi | partial sequence
         CATGTTATTACAAGTAGTGATATTTGCCCTAATAATAATATTGTAGTGAAATCCAATTTCACAACAATGC
     )".outdent().strip());
+    f.flush();
     f.close();
     auto records = fastaRecords(testFa);
     assert ( records.length == 3 );
@@ -219,7 +220,7 @@ unittest {
 }
 
 auto fastaRegions(string filename, string[] queries) {
-    File f = File(filename);
+    auto f = new File(filename);
     FaiRecord[string] index = makeIndex(readFai(filename~=".fai"));
     Region[] regions = to!(Region[])(queries);
     auto res = fetchFastaRegions(f, index, regions);
@@ -227,7 +228,7 @@ auto fastaRegions(string filename, string[] queries) {
     return res;
 }
 
-auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
+auto fetchFastaRegions(File *fasta, FaiRecord[string] index, Region[] regions) {
 
     FastaRecord[] records;
 
@@ -251,10 +252,10 @@ auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
     return records;
 }
 
-unittest {
+version(Broken) unittest {
   auto testFa = tempDir.buildPath("test3.fa");
   // scope(exit) remove(testFa);
-  auto fa = File(testFa,"w");
+  auto fa = new File(testFa,"w");
   fa.writeln(q"(
         >chr1
         acgtgagtgc
@@ -262,6 +263,7 @@ unittest {
         acgtgagtgcacgtgagtgcacgtgagtgc
         acgtgagtgcacgtgagtgc
     )".outdent().strip());
+  fa.flush();
   fa.close();
   auto faiString = "
         chr1\t10\t6\t10\t11
@@ -269,18 +271,19 @@ unittest {
     ".outdent().strip();
   auto testIndex = tempDir.buildPath("test3.fa.fai");
   // scope(exit) testIndex.remove;
-  auto f2 = File(testIndex,"w");
+  auto f2 = new File(testIndex,"w");
   f2.writeln(faiString);
+  f2.flush();
   f2.close();
 
     auto regions = fastaRegions(testFa, ["chr1:4-6", "chr2:36-45"]);
     assert ( regions.length == 2 );
     assert ( regions[0].header == "chr1:4-6" );
-    assert ( regions[0].len == 3 );
+    assert ( regions[0].len == 3, regions[0].toString() );
     assert ( regions[0].sequence == "tga" );
     assert ( regions[0].lineLen == 10 );
     assert ( regions[1].header == "chr2:36-45" );
-    assert ( regions[1].len == 10 );
+    assert ( regions[1].len == 10, regions[1].toString() );
     assert ( regions[1].sequence == "agtgcacgtg" );
     assert ( regions[1].lineLen == 30 );
 
@@ -294,7 +297,7 @@ unittest {
     regions = fastaRegions(testFa, ["chr2"]);
     assert ( regions.length == 1 );
     assert ( regions[0].header == "chr2" );
-    assert ( regions[0].len == 50 );
+    assert ( regions[0].len == 50, regions[0].toString() );
     assert ( regions[0].sequence == "acgtgagtgcacgtgagtgcacgtgagtgcacgtgagtgcacgtgagtgc" );
     assert ( regions[0].lineLen == 30 );
 }


=====================================
BioD/bio/std/hts/sam/header.d
=====================================
@@ -215,12 +215,16 @@ private {
 
 mixin HeaderLineStruct!("HdLine", "@HD", null,
           Field!("format_version", "VN"),
-          Field!("sorting_order", "SO"));
+          Field!("grouping", "GO"),
+          Field!("sorting_order", "SO"),
+          Field!("sub_sorting_order", "SS"));
 
 mixin HeaderLineStruct!("SqLine", "@SQ", "name",
           Field!("name", "SN"),
           Field!("length", "LN", uint),
+          Field!("alternative_names", "AN"),
           Field!("assembly", "AS"),
+          Field!("description", "DS"),
           Field!("md5", "M5"),
           Field!("species", "SP"),
           Field!("uri", "UR"),
@@ -228,6 +232,7 @@ mixin HeaderLineStruct!("SqLine", "@SQ", "name",
 
 mixin HeaderLineStruct!("RgLine", "@RG", "identifier",
           Field!("identifier", "ID"),
+          Field!("barcode", "BC"),
           Field!("sequencing_center", "CN"),
           Field!("description", "DS"),
           Field!("date", "DT"),


=====================================
INSTALL.md
=====================================
@@ -18,7 +18,7 @@ and run our development setup (gold was added lately by ldc)
 
     guix environment -C guix --ad-hoc gcc-toolchain gdb bash ld-wrapper ldc which python git binutils-gold vim
     make clean
-    make -f Makefile.guix -j 4
+    env CC=gcc make -f Makefile.guix -j 4
     make -f Makefile.guix check
 
 this way all dependencies are isolated. To create a static release use
@@ -31,7 +31,30 @@ We use GNU Guix containers for development. Install Guix and run a build
 container with
 
     . .guix-build
-    make -f Makefile.guix
+    make -f Makefile.guix clean
+    # build the debug version
+    env CC=gcc make -f Makefile.guix lz4-static -j 8
+    env CC=gcc make -f Makefile.guix -j 8
     make -f Makefile.guix check
 
-Note that this also works in the emacs shell.
+To make the static release:
+
+    env CC=gcc make -f Makefile.guix static
+
+It gives some errors, but should work:
+
+    ./bin/sambamba
+
+When you only get unit tests disable them with `--DRT-testmode=run-main`
+
+Note that this all also works in the emacs shell.
+
+### Guix VM
+
+    guix package -i qemu -p ~/opt/qemu
+    . ~/opt/qemu/etc/profile
+
+Download the bootable image from https://guix.gnu.org/en/download/ and
+start it with, for example
+
+    qemu-system-x86_64    -nic user,model=virtio-net-pci    -enable-kvm -m 1024    -device virtio-blk,drive=myhd    -drive if=none,file=guix-system-vm-image-1.2.0.x86_64-linux,id=myhd


=====================================
Makefile
=====================================
@@ -52,14 +52,14 @@ coverage:                          DFLAGS += -cov
 
 release static pgo-static:         DFLAGS += -O3 -release -enable-inlining -boundscheck=off -L-lz
 
-static:                            DFLAGS += -static -L-Bstatic
+static:                            DFLAGS += -static -L-Bstatic -link-defaultlib-shared=false
 
 pgo-static:                        DFLAGS += -fprofile-instr-use=profile.data
 
 lz4-static: lz4/lib/liblz4.a
 
 lz4/lib/liblz4.a: lz4/lib/lz4.c lz4/lib/lz4hc.c lz4/lib/lz4frame.c lz4/lib/xxhash.c
-	cd lz4/lib && gcc -O3 -c lz4.c lz4hc.c lz4frame.c xxhash.c && $(AR) rcs liblz4.a lz4.o lz4hc.o lz4frame.o xxhash.o
+	cd lz4/lib && $(CC) -O3 -c lz4.c lz4hc.c lz4frame.c xxhash.c && $(AR) rcs liblz4.a lz4.o lz4hc.o lz4frame.o xxhash.o
 
 utils/ldc_version_info_.d:
 	python3 ./gen_ldc_version_info.py $(shell which ldmd2) > utils/ldc_version_info_.d


=====================================
Makefile.guix
=====================================
@@ -31,7 +31,7 @@ LIBS   = -L-L$(LDC_LIB_PATH) -L-lrt -L-lpthread -L-lm -L-lz lz4/lib/liblz4.a
 
 LIBS_STATIC = $(DLIBS) lz4/lib/liblz4.a
 SRC         = $(wildcard main.d utils/*.d thirdparty/*.d) $(wildcard BioD/contrib/undead/*.d BioD/contrib/undead/*/*.d) $(wildcard BioD/bio/*/*.d BioD/bio/*/*/*.d BioD/bio/*/*/*/*.d BioD/bio/*/*/*/*/*.d BioD/bio/*/*/*/*/*/*/*.d) $(wildcard sambamba/*.d sambamba/*/*.d sambamba/*/*/*.d)
-OBJ    = $(SRC:.d=.o) utils/ldc_version_info_.o
+OBJ    = $(SRC:.d=.o)
 OUT    = bin/sambamba-$(shell cat VERSION)
 static: OUT += -static
 
@@ -72,7 +72,7 @@ utils/ldc_version_info_.d:
 ldc_version_info: utils/ldc_version_info_.d
 
 lz4/lib/liblz4.a: lz4/lib/lz4.c lz4/lib/lz4hc.c lz4/lib/lz4frame.c lz4/lib/xxhash.c
-	cd lz4/lib && gcc -O3 -c lz4.c lz4hc.c lz4frame.c xxhash.c && $(AR) rcs liblz4.a lz4.o lz4hc.o lz4frame.o xxhash.o
+	cd lz4/lib && $(CC) -O3 -c lz4.c lz4hc.c lz4frame.c xxhash.c && $(AR) rcs liblz4.a lz4.o lz4hc.o lz4frame.o xxhash.o
 
 lz4-static: lz4/lib/liblz4.a
 
@@ -116,7 +116,7 @@ clean-c:
 	rm -v lz4/lib/*.[oa]
 
 clean-d:
-	rm -v $(OBJ) $(OUT) trace.{def,log}
+	rm -v $(OBJ) $(OUT) # trace.{def,log}
 	rm -v bin/*
 
 clean-tests:


=====================================
README.md
=====================================
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/biod/sambamba.svg?branch=master)](https://travis-ci.org/biod/sambamba) [![AnacondaBadge](https://anaconda.org/bioconda/sambamba/badges/installer/conda.svg)](https://anaconda.org/bioconda/sambamba) [![DL](https://anaconda.org/bioconda/sambamba/badges/downloads.svg)](https://anaconda.org/bioconda/sambamba) [![AnacondaVersion](https://anaconda.org/bioconda/sambamba/badges/version.svg)] [![BrewBadge](https://img.shields.io/badge/%F0%9F%8D%BAbrew-sambamba-brightgreen.svg)](https://github.com/brewsci/homebrew-bio) [![GuixBadge](https://img.shields.io/badge/gnuguix-sambamba-brightgreen.svg)](https://www.gnu.org/software/guix/packages/S/) [![DebianBadge](https://badges.debian.net/badges/debian/testing/sambamba/version.svg)](https://packages.debian.org/testing/sambamba)
+[![Build Status](https://travis-ci.org/biod/sambamba.svg?branch=master)](https://travis-ci.org/biod/sambamba) [![AnacondaBadge](https://anaconda.org/bioconda/sambamba/badges/installer/conda.svg)](https://anaconda.org/bioconda/sambamba) [![DL](https://anaconda.org/bioconda/sambamba/badges/downloads.svg)](https://anaconda.org/bioconda/sambamba) [![BrewBadge](https://img.shields.io/badge/%F0%9F%8D%BAbrew-sambamba-brightgreen.svg)](https://github.com/brewsci/homebrew-bio) [![GuixBadge](https://img.shields.io/badge/gnuguix-sambamba-brightgreen.svg)](https://www.gnu.org/software/guix/packages/S/) [![DebianBadge](https://badges.debian.net/badges/debian/testing/sambamba/version.svg)](https://packages.debian.org/testing/sambamba)
 
 # SAMBAMBA
 


=====================================
RELEASE-NOTES.md
=====================================
@@ -1,16 +1,31 @@
+## ChangeLog v0.8.1 (20210731)
 
-## ChangeLog v0.8.0 (2020??)
++ Some FASTA unittests break when they write to disk and read after -
+  marked with ~version(Broken)~
++ Fix build for LDC 1.26.0 and LLVM 9.0.1 - some speed improvement
+
+## ChangeLog v0.8.0 (20201130)
 
 Maintainance release and bug fixes: this is a special release where we
 removed all CRAM support. The added value of CRAM in sambamba was
 limited because it was using essentially the same htslib backend as
 samtools. Removing the htslib dependency removes one maintenance
-headache. See also https://github.com/biod/sambamba/issues/425
+headache. See also https://github.com/biod/sambamba/issues/425.
+
+BioD was also moved back into the main trunk. We separated it in the
+past, but as there is no development there we might as well have it in
+Sambamba (again).
 
 + Removed CRAM support and htslib dependency with ec78eabfbdebd620cf5d4a4d8232692184eacbf7
++ Removed BioD source tree back into Sambamba
 + Sambamba builds on ARM64 (see [Travis-CI](https://travis-ci.org/github/biod/sambamba))
 + Changed github issue tracker template to report bugs only
-+ Removed BioD source tree back into Sambamba
++ Added FASTA handlers, indexing and slicing (thanks @NickRoz1)
++ Updated slice.d (thanks @NickRoz1)
++ Fixed FASTA file handlers not closing cdc5f84c5b6135eedeaf5cba265e3eb9a3c23227
++ Added badges to the top of README
++ Added Meson build system for Debian (thanks Matthias @ximion)
++ Fixed Python2 -> Python3 stuff (thanks Matthias @ximion)
 
 ## ChangeLog v0.7.1 (20191128)
 


=====================================
TODO.org
=====================================
@@ -0,0 +1,5 @@
+* SAMBAMBA
+
+** Enhancements
+
++ Make markdup single pass for pipes, see #421


=====================================
VERSION
=====================================
@@ -1 +1 @@
-0.8.0
+0.8.1


=====================================
doc/design.org
=====================================
@@ -3,7 +3,7 @@
 ** Introduction
 
 Because of its great multi-core performance Sambamba has served over
-five years in sequencing centers [[https://groups.google.com/d/msg/sambamba-discussion/fIgrrUa441o/XG7Rt3dFAQAJ][around the world]]. Here we start on a
+eight years in sequencing centers [[https://groups.google.com/d/msg/sambamba-discussion/fIgrrUa441o/XG7Rt3dFAQAJ][around the world]]. Here we start on a
 new design (sambamba2) that should improve performance and, perhaps
 more importantly, make the building blocks more composable. D has
 proven to be a great language for multi-core performance and code
@@ -80,3 +80,6 @@ and should be capable of writing to a log file.
 The original markdup routine is written in sambamba/markdup.d. It
 maintains state in a structure called IndexedBamRead with an index
 value and the read object.
+
+Note that Markdup does two reader passes. This prevents piping
+through stdin. A future version may do a single pass.


=====================================
man/.gitignore deleted
=====================================
@@ -1 +0,0 @@
-*.html


=====================================
sambamba/flagstat.d
=====================================
@@ -56,8 +56,11 @@ void computeFlagStatistics(R)(R alignments) {
     }
 }
 
-void writeParam(string description, ulong[2] param) {
-    writefln("%s + %s %s", param[0], param[1], description);
+void writeParam(string description, ulong[2] param, bool tabular) {
+    if (tabular)
+        writefln("%s,%s,%s", description, param[0], param[1]);
+    else
+        writefln("%s + %s %s", param[0], param[1], description);
 }
 
 float percent(ulong a, ulong b) { return to!float(a) / b * 100.0; }
@@ -67,8 +70,11 @@ string percentStr(ulong a, ulong b) {
     return format("%.2f%%", percent(a, b));
 }
 
-void writeParamWithPercentage(string description, ulong[2] param, ulong[2] total) {
-    writefln("%s + %s %s (%s:%s)", param[0], param[1], description,
+void writeParamWithPercentage(string description, ulong[2] param, ulong[2] total, bool tabular) {
+    if (tabular)
+        writefln("%s,%s:%s,%s:%s", description, param[0], percentStr(param[0], total[0]), param[1], percentStr(param[1], total[1]));
+    else
+        writefln("%s + %s %s (%s:%s)", param[0], param[1], description,
              percentStr(param[0], total[0]),
              percentStr(param[1], total[1]));
 }
@@ -86,17 +92,21 @@ void printUsage() {
     stderr.writeln("            use NTHREADS for decompression");
     stderr.writeln("         -p, --show-progress");
     stderr.writeln("            show progressbar in STDERR");
+    stderr.writeln("         -b, --tabular");
+    stderr.writeln("            output in csv format");
 }
 
 int flagstat_main(string[] args) {
     size_t threads = totalCPUs;
     bool show_progress;
+    bool tabular;
 
     try {
         getopt(args,
                std.getopt.config.caseSensitive,
                "nthreads|t",      &threads,
-               "show-progress|p", &show_progress);
+               "show-progress|p", &show_progress,
+               "tabular|b", &tabular);
 
         if (args.length < 2) {
             printUsage();
@@ -118,19 +128,19 @@ int flagstat_main(string[] args) {
         }
         
         scope(exit) {
-            writeParam("in total (QC-passed reads + QC-failed reads)", reads);
-            writeParam("secondary", secondary);
-            writeParam("supplementary", supplementary);
-            writeParam("duplicates", dup);
-            writeParamWithPercentage("mapped", mapped, reads);
-            writeParam("paired in sequencing", pair_all);
-            writeParam("read1", first);
-            writeParam("read2", second);
-            writeParamWithPercentage("properly paired", pair_good, pair_all);
-            writeParam("with itself and mate mapped", pair_map);
-            writeParamWithPercentage("singletons", single, pair_all);
-            writeParam("with mate mapped to a different chr", diff_chr);
-            writeParam("with mate mapped to a different chr (mapQ>=5)", diff_high);
+            writeParam("in total (QC-passed reads + QC-failed reads)", reads, tabular);
+            writeParam("secondary", secondary, tabular);
+            writeParam("supplementary", supplementary, tabular);
+            writeParam("duplicates", dup, tabular);
+            writeParamWithPercentage("mapped", mapped, reads, tabular);
+            writeParam("paired in sequencing", pair_all, tabular);
+            writeParam("read1", first, tabular);
+            writeParam("read2", second, tabular);
+            writeParamWithPercentage("properly paired", pair_good, pair_all, tabular);
+            writeParam("with itself and mate mapped", pair_map, tabular);
+            writeParamWithPercentage("singletons", single, pair_all, tabular);
+            writeParam("with mate mapped to a different chr", diff_chr, tabular);
+            writeParam("with mate mapped to a different chr (mapQ>=5)", diff_high, tabular);
         }
     } catch (Throwable e) {
         stderr.writeln(e.msg);


=====================================
sambamba/main.d
=====================================
@@ -83,7 +83,7 @@ For bug reports and feature requests see
 
 void printVersion() {
     stderr.writeln();
-    stderr.writeln("sambamba " ~ VERSION ~ " by Artem Tarasov and Pjotr Prins (C) 2012-2020");
+    stderr.writeln("sambamba " ~ VERSION ~ " by Artem Tarasov and Pjotr Prins (C) 2012-2021");
     stderr.writeln("    LDC " ~ LDC_VERSION_STRING ~ " / DMD " ~ DMD_VERSION_STRING ~
      " / LLVM" ~ LLVM_VERSION_STRING ~ " / bootstrap " ~ BOOTSTRAP_VERSION_STRING);
     stderr.writeln();


=====================================
sambamba/markdup.d
=====================================
@@ -1246,37 +1246,40 @@ int markdup_main(string[] args) {
         InputRange!IndexedBamRead reads;
         shared(ProgressBar) bar;
 
-        void initInputs() {
+        void initInputs(MultiBamReader bamreader) {
             if (!show_progress)
-                reads = bam.reads.withIndices.inputRangeObject;
+                reads = bamreader.reads.withIndices.inputRangeObject;
             else {
                 bar = new shared(ProgressBar)();
-                reads = bam.readsWithProgress((lazy float p) { bar.update(p); },
-                                              () { bar.finish(); }).withIndices
-                           .inputRangeObject;
+                reads = bamreader.readsWithProgress(
+                          (lazy float p) { bar.update(p); },
+                          () { bar.finish(); })
+                        .withIndices
+                        .inputRangeObject;
             }
         }
 
-        initInputs();
+        initInputs(bam);
         auto dup_idx_storage = getDuplicateOffsets(reads, rg_index, taskPool, cfg);
 
         auto elapsed = sw.peek();
         stderr.writeln("collected list of positions in ",elapsed.total!"minutes"," min ",elapsed.total!"seconds" % 60," sec");
 
         // marking or removing duplicates
-        bam = new MultiBamReader(args[1 .. $-1]);  // FIXME: initialized twice
-        bam.setBufferSize(io_buffer_size / (args.length - 2));
+        bam = null;
+        auto bam2 = new MultiBamReader(args[1 .. $-1]);
+        bam2.setBufferSize(io_buffer_size / (args.length - 2));
         auto out_stream = new BufferedFile(args[$-1], FileMode.OutNew, io_buffer_size);
         auto writer = new BamWriter(out_stream, compression_level);
         writer.setFilename(args[$-1]);
         scope(exit) writer.finish();
-        auto header = addPG("markdup", unparsed_args, bam.header);
+        auto header = addPG("markdup", unparsed_args, bam2.header);
         writer.writeSamHeader(header);
-        writer.writeReferenceSequenceInfo(bam.reference_sequences);
+        writer.writeReferenceSequenceInfo(bam2.reference_sequences);
 
         stderr.writeln(remove_duplicates ? "removing" : "marking", " duplicates...");
 
-        initInputs();
+        initInputs(bam2);
 
         auto indices = dup_idx_storage.reader;
 


=====================================
sambamba/pileup.d
=====================================
@@ -749,4 +749,5 @@ int pileup_main(string[] args) {
           return 1;
         }
     }
+    return 0;
 }


=====================================
test/benchmark/stats.org
=====================================
@@ -7,6 +7,38 @@ wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00100/alignment/HG00
 
 
 ** View
+
+Processing an 841Mb BAM file.
+
+*** AMD Ryzen 7 3700X 8-Core Processor
+
+#+BEGIN_SRC
+time ./bin/sambamba-0.8.1-pre1 HG00100.chrom20.ILLUMINA.bwa.GBR.low_coverage.20130415.bam > /dev/null
+
+sambamba 0.8.1-pre1
+ by Artem Tarasov and Pjotr Prins (C) 2012-2021
+    LDC 1.26.0 / DMD v2.096.1 / LLVM9.0.1 / bootstrap LDC - the LLVM D compiler (0.17.6)
+real    0m1.398s
+user    0m16.589s
+sys     0m0.240s
+
+time ./bin/sambamba-0.8.1-pre1 sort HG00100.chrom20.ILLUMINA.bwa.GBR.low_coverage.20130415.bam
+
+sambamba 0.8.1-pre1
+ by Artem Tarasov and Pjotr Prins (C) 2012-2021
+    LDC 1.26.0 / DMD v2.096.1 / LLVM9.0.1 / bootstrap LDC - the LLVM D compiler (0.17.6)
+real    0m9.151s
+user    2m5.779s
+sys     0m3.101s
+
+time ./bin/sambamba-0.8.1-pre1 markdup HG00100.chrom20.ILLUMINA.bwa.GBR.low_coverage.20130415.sorted.bam dedup.bam
+ by Artem Tarasov and Pjotr Prins (C) 2012-2021
+    LDC 1.26.0 / DMD v2.096.1 / LLVM9.0.1 / bootstrap LDC - the LLVM D compiler (0.17.6)
+real    0m11.319s
+user    1m47.719s
+sys     0m4.070s
+#+END_SRC
+
 *** 4x Intel(R) Core(TM) i5-2520M CPU @ 2.50GHz (hyperthreaded)
 **** sambamba 0.6.8-pre3
 


=====================================
test/test_suite.sh
=====================================
@@ -5,7 +5,7 @@ sambamba=$1
 if [ ! -f $sambamba ]; then
     sambamba=./bin/sambamba
 fi
-opts="-q"
+opts="-q --DRT-testmode=run-main"
 outdir=output
 mkdir -p $outdir
 
@@ -207,6 +207,12 @@ testIssue356(){
     assertEquals 0 $?
 }
 
+testIssue421(){
+    # sambamba-markdup: not enough data in stream https://github.com/biod/sambamba/issues/421
+    cat test/issue_204.bam | $sambamba $opts markdup /dev/stdin test.bam 2> /dev/null
+    # assertEquals 0 $?  Disable test until we have a single pass, see issue 421
+}
+
 testFastaIndex(){
     # check input and output are from here: http://www.htslib.org/doc/faidx.html
     $sambamba $opts index -F test/test.fasta $outdir/res.fai



View it on GitLab: https://salsa.debian.org/med-team/sambamba/-/commit/12c7a3befb2326f94d108bf8bc2fb25cf924450d

-- 
View it on GitLab: https://salsa.debian.org/med-team/sambamba/-/commit/12c7a3befb2326f94d108bf8bc2fb25cf924450d
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210825/dae0811c/attachment-0001.htm>


More information about the debian-med-commit mailing list