[med-svn] [Git][med-team/mosdepth][master] 3 commits: New upstream version 0.3.1

Steffen Möller gitlab at salsa.debian.org
Mon Nov 2 21:50:41 GMT 2020



Steffen Möller pushed to branch master at Debian Med / mosdepth


Commits:
4466b303 by Steffen Moeller at 2020-11-02T22:39:00+01:00
New upstream version 0.3.1
- - - - -
6e9fc821 by Steffen Moeller at 2020-11-02T22:39:02+01:00
Update upstream source from tag 'upstream/0.3.1'

Update to upstream version '0.3.1'
with Debian dir 2db198d2533e9ccc8c79c1e2eee9983e973d8b1a
- - - - -
8f0e0415 by Steffen Moeller at 2020-11-02T22:50:09+01:00
New upstream version - compiles.

- - - - -


22 changed files:

- + .github/workflows/build.yml
- .gitignore
- .travis.yml
- CHANGES.md
- README.md
- + debian/TODO
- debian/changelog
- debian/control
- debian/copyright
- + debian/manpages
- + debian/mosdepth.install
- debian/rules
- debian/upstream/metadata
- debian/watch
- + depthstat.nim
- functional-tests.sh
- + int2str.nim
- mosdepth.nim
- mosdepth.nimble
- scripts/install.sh
- scripts/plot-dist.py
- tests/funcs.nim


Changes:

=====================================
.github/workflows/build.yml
=====================================
@@ -0,0 +1,113 @@
+# copied from Daniel Cook's Seq collection
+name: Build
+
+on: 
+  - push
+  - pull_request
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-18.04, macos-10.15]
+        nimversion:
+        - stable
+        - devel
+
+
+    steps:
+    - uses: actions/checkout at v2
+
+    # Caching
+    - name: Cache choosenim
+      id: cache-choosenim
+      uses: actions/cache at v1
+      with:
+        path: ~/.choosenim
+        key: ${{ runner.os }}-choosenim-stable
+
+    - name: Cache nimble
+      id: cache-nimble
+      uses: actions/cache at v1
+      with:
+        path: ~/.nimble
+        key: ${{ runner.os }}-nimble-stable
+
+    - name: Cache htslib
+      id: cache-htslib
+      uses: actions/cache at v1
+      with:
+        path: $HOME/htslib
+        key: ${{ runner.os }}-htslib-1.10
+
+    # Install Dependencies
+    - name: Install dependencies (Linux)
+      if: runner.os == 'Linux'
+      run: |
+        sudo apt-get update
+        sudo apt-get -qy install bwa make build-essential cmake libncurses-dev ncurses-dev libbz2-dev lzma-dev liblzma-dev \
+             curl libssl-dev libtool autoconf automake libcurl4-openssl-dev
+
+
+    - name: Install d4 
+        git clone https://github.com/38/d4-format
+        cd d4-format
+        cargo build --release
+        sudo cp ../d4-format/target/release/libd4binding.* /usr/local/lib
+        sudo cp ./d4binding/include/d4.h /usr/local/include/
+
+    # Setup htslib
+    - name: Install htslib (linux)
+      if: runner.os == 'Linux'
+      run: |
+        cd
+        git clone --recursive https://github.com/samtools/htslib.git
+        cd htslib && git checkout 1.10 && autoheader && autoconf && ./configure --enable-libcurl
+        cd
+        make -j 4 -C htslib
+        echo "::set-env name=LD_LIBRARY_PATH::${LD_LIBRARY_PATH}:${HOME}/htslib"
+        ls -lh $HOME/htslib/*.so
+
+    - name: Install hstlib (macos)
+      if: runner.os == 'macOS'
+      run: |
+        brew install htslib
+
+    - uses: iffy/install-nim at v1.1
+      with:
+        nimversion: ${{ matrix.nimversion }}
+
+    - uses: actions-rs/toolchain at v1
+      with:
+        toolchain: stable
+    - uses: actions-rs/cargo at v1
+
+
+    # Build and Test
+    - name: Build test executable
+      run: nimble build -Y mosdepth.nimble
+
+    - name: "Copy binary"
+      run: chmod +x mosdepth && mkdir bin && cp mosdepth bin/mosdepth_debug_${{ matrix.os }}
+
+    - name: "Build and Copy release binary"
+      run: nim c -d:danger -d:release -o:bin/mosdepth_${{ matrix.os }} mosdepth
+    
+    - name: Functional Tests
+      env:
+        TERM: "xterm"
+      run: |
+        bash ./functional-tests.sh
+
+    - name: Unit Tests
+      run: |
+        nim c -r tests/all.nim
+
+    - name: Upload Artifact
+      if: success()
+      uses: actions/upload-artifact at v1.0.0
+      with:
+        name: mosdepth_${{ matrix.os }}_executable
+        path: bin/


=====================================
.gitignore
=====================================
@@ -1,4 +1,5 @@
 nimcache
+*.d4
 *.bam
 *.cram
 *.bai
@@ -6,3 +7,6 @@ nimcache
 profile_results.txt
 t.*.bed.gz
 t.*.bed.gz.csi
+*.bed.gz*
+*.dist.txt
+*.summary.txt


=====================================
.travis.yml
=====================================
@@ -3,7 +3,7 @@ env:
   - BRANCH=devel
 
 before_install:
-  - export LD_LIBRARY_PATH=./htslib/
+  - export LD_LIBRARY_PATH=./htslib-1.10.2/:/usr/local/lib
   - export PATH="$TRAVIS_BUILD_DIR/nim-$BRANCH/bin:$PATH"
   - export NIM_LIB_PREFIX=$TRAVIS_BUILD_DIR/nim-$BRANCH/
 
@@ -12,6 +12,7 @@ install:
 script:
   - nimble test
   - bash functional-tests.sh
+  - nim c -r depthstat.nim
   - nim c -d:release --cc:$CC mosdepth.nim
   - ./mosdepth -h
 branches:


=====================================
CHANGES.md
=====================================
@@ -1,4 +1,37 @@
-0.2.5 (dev)
+v0.3.1
+======
++ fix bug with regions and d4 that would cause error even when --d4 was not used.
+
+v0.3.0
+======
++ allow chromosome names containing '-' as arguments for -c
++ d4 output
+
+0.2.9
+=====
++ modifies region.dist.txt to contain the aggregate coverage of each window when -b (integer) is specified
+  (otherwise region.dist.txt and global.disk.txt are identical with -b (integer) )
++ improve speed by ~30% when using per-base output with better int2str method
+  
+0.2.8
+=====
++ fix off-by-one error in CSI index (but not data) of output bed files (#98)
+
+0.2.7
+=====
++ small optimizations
++ exit with 1 on bad help #80
++ fix check on remote bam (brentp/hts-nim#48)
++ fix erroneous assert #99
++ update static binary to htslib 1.10 (this fixes other bugs reported and closed in mosdepth)
+
+0.2.6
+=====
++ fix #54. for quantize
++ add summary file output (implemented by @danielecook)
++ add `--median` flag to output the median for each region from --by (default is to use mean).
+
+0.2.5
 =====
 + remove dependency on PCRE
 + don't double count fully overlapping reads (thanks to @jaudoux for the fix in #73)


=====================================
README.md
=====================================
@@ -4,7 +4,8 @@ fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing
 
 ![logo](https://user-images.githubusercontent.com/1739/29678184-da1f384c-88ba-11e7-9d98-df4fe3a59924.png "logo")
 
-[![Build Status](https://travis-ci.org/brentp/mosdepth.svg?branch=master)](https://travis-ci.org/brentp/mosdepth)
+[![Build](https://github.com/brentp/mosdepth/workflows/Build/badge.svg?branch=master)](https://github.com/brentp/mosdepth/actions?query=workflow%3ABuild)
+[![Build Status](https://travis-ci.com/brentp/mosdepth.svg?branch=master)](https://travis-ci.com/brentp/mosdepth)
 
 [![citation](https://img.shields.io/badge/cite-open%20access-orange.svg)](https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btx699/4583630?guestAccessKey=35b55064-4566-4ab3-a769-32916fa1c6e6)
 
@@ -13,24 +14,28 @@ fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing
 + per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome.
 + mean per-window depth given a window size--as would be used for CNV calling.
 + the mean per-region given a BED file of regions.
-+ a distribution of proportion of bases covered at or above a given threshhold for each chromosome and genome-wide.
+* the mean or median per-region cumulative coverage histogram given a window size
++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide.
 + quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20)
 + threshold output to indicate how many bases in each region are covered at the given thresholds.
++ A summary of mean depths per chromosome and within specified regions per chromosome.
++ a [d4](https://github.com/38/d4-format) file (better than bigwig).
 
 when appropriate, the output files are bgzipped and indexed for ease of use.
 
 ## usage
 
 ```
-mosdepth 0.2.3
+mosdepth 0.3.0
 
   Usage: mosdepth [options] <prefix> <BAM-or-CRAM>
 
 Arguments:
 
-  <prefix>       outputs: `{prefix}.mosdepth.global.dist.txt` (NOTE!!! this is changed in version 0.2.2)
+  <prefix>       outputs: `{prefix}.mosdepth.global.dist.txt`
+                          `{prefix}.mosdepth.summary.txt`
                           `{prefix}.mosdepth.region.dist.txt` (if --by is specified)
-                          `{prefix}.per-base.bed.gz` (unless -n/--no-per-base is specified)
+                          `{prefix}.per-base.bed.gz|per-base.d4` (unless -n/--no-per-base is specified)
                           `{prefix}.regions.bed.gz` (if --by is specified)
                           `{prefix}.quantized.bed.gz` (if --quantize is specified)
                           `{prefix}.thresholds.bed.gz` (if --thresholds is specified)
@@ -45,6 +50,8 @@ Common Options:
   -n --no-per-base           dont output per-base depth. skipping this output will speed execution
                              substantially. prefer quantized or thresholded values if possible.
   -f --fasta <fasta>         fasta file for use with CRAM files.
+  --d4                       output per-base depth in d4 format. This is much faster.
+
 
 Other options:
 
@@ -52,7 +59,7 @@ Other options:
   -i --include-flag <FLAG>      only include reads with any of the bits in FLAG set. default is unset. [default: 0]
   -x --fast-mode                dont look at internal cigar operations or correct mate overlaps (recommended for most use-cases).
   -q --quantize <segments>      write quantized output see docs for description.
-  -Q --mapq <mapq>              mapping quality threshold [default: 0]
+  -Q --mapq <mapq>              mapping quality threshold. reads with a mapping quality less than this are ignored [default: 0]
   -T --thresholds <thresholds>  for each interval in --by, write number of bases covered by at
                                 least threshold bases. Specify multiple integer values separated
                                 by ','.
@@ -131,9 +138,21 @@ Output will go to `$sample.mosdepth.dist.txt`
 
 This also forces the output to have 5 decimals of precision rather than the default of 2.
 
+## D4
+
+D4 is a format created by [Hao Hou](https://github.com/38) in the Quinlan lab. It is
+incorporated into `mosdepth` as of version 0.3.0 for per-base output with the `--d4` flag.
+It improves write speed dramatically; for one test-case it takes **24.8s** to write a
+per-base.bed.gz with mosdepth compared to **7.7s** to write a d4 file. For the same case,
+running `mosdepth` without writing per-base takes 5.9 seconds so D4 greatly mitigates
+the cost of outputing per-base depth **and** the output is more useful.
+
 ## Installation
 
-The simplest way is to [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/mosdepth/README.html)
+
+The simplest option is to download the [binary from the releases](https://github.com/brentp/mosdepth/releases).
+
+Another quick way is to [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/mosdepth/README.html)
 
 It can also be installed with `brew` as `brew install brewsci/bio/mosdepth` or used via docker with quay:
 ```
@@ -141,10 +160,8 @@ docker pull quay.io/biocontainers/mosdepth:0.2.4--he527e40_0
 docker run -v /hostpath/:/opt/mount quay.io/biocontainers/mosdepth:0.2.4--he527e40_0 mosdepth -n --fast-mode -t 4 --by 1000 /opt/mount/sample /opt/mount/$bam
 ```
 
-Unless you want to install [nim](https://nim-lang.org), simply download the
-[binary from the releases](https://github.com/brentp/mosdepth/releases).
-
-`mosdepth` uses requires htslib version 1.4 or later. If you get an error
+The binary from releases is static, with no dependencies. If you build it yourself,
+`mosdepth` requires htslib version 1.4 or later. If you get an error
 about "`libhts.so` not found", set `LD_LIBRARY_PATH` to the directory that
 contains `libhts.so`. e.g.
 
@@ -153,25 +170,6 @@ contains `libhts.so`. e.g.
 If you get the error `could not import: hts_check_EOF` you may need to
 install a more recent version of htslib.
 
-`mosdepth` also requires a recent version of [PCRE](https://www.pcre.org/), and will give the error `could not import: pcre_free_study` if the version of PCRE on your system is too old.
-If you do not have root and cannot get the system version of PCRE upgraded, you download and compile a local copy
-```
-cd ~/src/
-wget ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.gz
-tar zxvf pcre-8.41.tar.gz
-cd pcre-8.41/
-./configure
-make
-```
-
-Then pass that path to mosdepth just like we did with htslib
-```
-LD_LIBRARY_PATH=~/src/pcre-8.41/.libs/:/~/src/htslib/ mosdepth -h
-```
-
-If you still see an error about `could not import: pcre_free_study` then 
-for some, the solution has been to do: `ln -s /usr/local/lib/libpcre.so /usr/local/lib/libpcre.so.3`
-
 If you do want to install from source, see the [travis.yml](https://github.com/brentp/mosdepth/blob/master/.travis.yml)
 and the [install.sh](https://github.com/brentp/mosdepth/blob/master/scripts/install.sh).
 
@@ -187,7 +185,7 @@ for at least a given coverage value. It does this for each chromosome, and for t
 whole genome.
 
 Each row will indicate:
- + chromosome (or "genome")
+ + chromosome (or "total")
  + coverage level
  + proportion of bases covered at that level
 


=====================================
debian/TODO
=====================================
@@ -0,0 +1,3 @@
+Need to integrate https://github.com/38/d4-format
+
+ -- Steffen  Mon, 02 Nov 2020 22:26:57 +0100


=====================================
debian/changelog
=====================================
@@ -1,11 +1,5 @@
-mosdepth (0.2.5-1) UNRELEASED; urgency=medium
+mosdepth (0.3.1-1) UNRELEASED; urgency=medium
 
-  * Initial release (Closes: #<bug>)
-  TODO: several nim libraries would be needed - giving up here
-        https://github.com/brentp/hts-nim
-        https://github.com/docopt/docopt.nim
-        https://github.com/nitely/nim-regex
-        https://github.com/nitely/nim-unicodedb
-        ...
+  * Initial release (Closes: #973658)
 
- -- Andreas Tille <tille at debian.org>  Sat, 13 Apr 2019 10:32:57 +0200
+ -- Steffen Moeller <moeller at debian.org>  Sun, 31 Mar 2019 17:25:57 +0200


=====================================
debian/control
=====================================
@@ -1,34 +1,42 @@
 Source: mosdepth
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
 Section: science
 Priority: optional
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+Uploaders: Steffen Moeller <moeller at debian.org>
 Build-Depends: debhelper (>= 12~),
                nim,
-               libpcre3-dev
-Standards-Version: 4.3.0
+               nim-docopt,
+               nim-regex,
+               nim-hts,
+               help2man
+Standards-Version: 4.5.0
 Vcs-Browser: https://salsa.debian.org/med-team/mosdepth
 Vcs-Git: https://salsa.debian.org/med-team/mosdepth.git
 Homepage: https://github.com/brentp/mosdepth
+Rules-Requires-Root: no
 
 Package: mosdepth
 Architecture: any
-Depends: ${shlibs:Depends},
-         ${misc:Depends}
-Description: fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing
- mosdepth can output:
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: BAM/CRAM depth calculation biological sequencing
+ Many small reads are produced by high-throughput "next generation"
+ sequencing technologies. The final sequence is derived from how
+ these reads are overlapping towards a consensus.
+ The more reads are covering/confirming parts of a nucleotide seq,
+ the higher the confidence is. Too many reads would be indicative
+ of e.g. repeats in the genome.
  .
-  * per-base depth about 2x as fast samtools depth--about 25 minutes of
-    CPU time for a 30X genome.
-  * mean per-window depth given a window size--as would be used for
-    CNV calling.
-  * the mean per-region given a BED file of regions.
-  * a distribution of proportion of bases covered at or above a given
-    threshhold for each chromosome and genome-wide.
-  * quantized output that merges adjacent bases as long as they fall in
-    the same coverage bins e.g. (10-20)
-  * threshold output to indicate how many bases in each region are
-    covered at the given thresholds.
- .
- when appropriate, the output files are bgzipped and indexed for
- ease of use.
+ mosdepth can output:
+  *  per-base depth about 2x as fast samtools depth--about 25 minutes
+     of CPU time for a 30X genome.
+  *  mean per-window depth given a window size--as would be used for
+     CNV calling.
+  *  the mean per-region given a BED file of regions.
+  *  a distribution of proportion of bases covered at or above a given
+     threshhold for each chromosome and genome-wide.
+  *  quantized output that merges adjacent bases as long as they fall
+     in the same coverage bins e.g. (10-20)
+  *  threshold output to indicate how many bases in each region are
+     covered at the given thresholds.
+ when appropriate, the output files are bgzipped and indexed for ease
+ of use.


=====================================
debian/copyright
=====================================
@@ -1,16 +1,22 @@
 Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: mosdepth
-Source: https://github.com/brentp/mosdepth/releases
+Source: https://github.com/brentp/mosdepth
 
 Files: *
-Copyright: 2017 Brent Pedersen
-License: MIT
+Copyright: 2017-2020 Brent Pedersen
+License: Expat
+
+Files: intToStr.nim
+Copyright: 2014 Milo Yip
+ adpated from: https://github.com/miloyip/itoa-benchmark countlut method..
+ https://github.com/miloyip/itoa-benchmark/blob/master/license.txt
+License: Expat
 
 Files: debian/*
-Copyright: 2019 Andreas Tille <tille at debian.org>
-License: MIT
+Copyright: 2019-2020 Steffen Moeller <moeller at debian.org>
+License: Expat
 
-License: MIT
+License: Expat 
  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
@@ -28,3 +34,4 @@ License: MIT
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
+


=====================================
debian/manpages
=====================================
@@ -0,0 +1 @@
+debian/mosdepth.1


=====================================
debian/mosdepth.install
=====================================
@@ -0,0 +1 @@
+mosdepth	usr/bin


=====================================
debian/rules
=====================================
@@ -1,30 +1,17 @@
 #!/usr/bin/make -f
 
-# DH_VERBOSE := 1
+ DH_VERBOSE := 1
 export LC_ALL=C.UTF-8
 
 include /usr/share/dpkg/default.mk
-# this provides:
-# DEB_SOURCE: the source package name
-# DEB_VERSION: the full version of the package (epoch + upstream vers. + revision)
-# DEB_VERSION_EPOCH_UPSTREAM: the package's version without the Debian revision
-# DEB_VERSION_UPSTREAM_REVISION: the package's version without the Debian epoch
-# DEB_VERSION_UPSTREAM: the package's upstream version
-# DEB_DISTRIBUTION: the distribution(s) listed in the current entry of debian/changelog
-# SOURCE_DATE_EPOCH: the source release date as seconds since the epoch, as
-#                    specified by <https://reproducible-builds.org/specs/source-date-epoch/>
-
-# for hardening you might like to uncomment this:
-# export DEB_BUILD_MAINT_OPTIONS=hardening=+all
 
 %:
 	dh $@
 
 override_dh_auto_build:
-	nimble build -y
+	nim c -p:/usr/share/nimble/unicodeplus  -p:/usr/share/nimble/unicodedb  -p:/usr/share/nimble/regex  -p:/usr/share/nimble/docopt -p:/usr/share/nimble/hts  mosdepth.nim
+	help2man mosdepth > debian/mosdepth.1
 
-### When overriding auto_test make sure DEB_BUILD_OPTIONS will be respected
-#override_dh_auto_test:
-#ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-#	do_stuff_for_testing
-#endif
+override_dh_auto_clean:
+	rm -f debian/mosdepth.1
+	rm -f mosdepth


=====================================
debian/upstream/metadata
=====================================
@@ -1,20 +1,18 @@
 Reference:
- - Author: Brent S Pedersen and Aaron R Quinlan
-   Title: "Mosdepth: quick coverage calculation for genomes and exomes"
+ - Author: Brent S Pedersen and Aaron R. Quinlan
+   Title: >
+    Mosdepth: quick coverage calculation for genomes and exomes
    Journal: Bioinformatics
    Year: 2018
    Volume: 34
    Number: 5
-   Pages: 867–868
+   Pages: 867-868
    DOI: 10.1093/bioinformatics/btx699
    PMID: 29096012
-   URL: https://academic.oup.com/bioinformatics/article/34/5/867/4583630
-   eprint: "https://academic.oup.com/bioinformatics/article-pdf/\
-    34/5/867/25331748/btx699.pdf"
+   URL: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6030888/
+   eprint: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6030888/pdf/btx699.pdf
 Registry:
  - Name: bio.tools
    Entry: mosdepth
- - Name: OMICtools
-   Entry: OMICS_20873
  - Name: conda:bioconda
    Entry: mosdepth


=====================================
debian/watch
=====================================
@@ -1,3 +1,4 @@
 version=4
+#https://github.com/brentp/mospdepth/releases/latest .*/archive/#PREFIX#@ANY_VERSION@@ARCHIVE_EXT@
+https://github.com/brentp/mosdepth/tags .*/v([\d.])+\.tar\.gz
 
-https://github.com/brentp/mosdepth/releases .*/archive/v?@ANY_VERSION@@ARCHIVE_EXT@


=====================================
depthstat.nim
=====================================
@@ -0,0 +1,81 @@
+
+type
+  depth_stat* = object
+    cum_length*: int
+    cum_depth*: uint64
+    min_depth*: uint32
+    max_depth*: uint32
+
+type CountStat*[T:SomeOrdinal] = object
+  counts: seq[T]
+  n: int
+
+proc initCountStat*[T](size:int=32768): CountStat[T] =
+  return CountStat[T](counts: newSeq[T](size))
+
+proc add*[T](c: var CountStat, value: T) {.inline.} =
+  c.n.inc
+  if value.int > c.counts.high.int:
+    c.counts[c.counts.high].inc
+  elif value < 0:
+    raise newException(IndexError, "error setting negative depth value:" & $value)
+  else:
+    c.counts[value].inc
+
+proc median*[T](c: CountStat[T]): int {.inline.} =
+  var stop_n = int(0.5 + c.n.float64 * 0.5)
+  var cum = 0
+  for i, cnt in c.counts:
+    cum += cnt.int
+    if cum >= stop_n:
+      return i
+  return -1
+
+proc clear*[T](c: var CountStat[T]) {.inline.} =
+  if c.n == 0: return
+  c.n = 0
+  zeroMem(c.counts[0].addr, sizeof(c.counts[0]) * c.counts.len)
+
+template len*[T](c:CountStat[T]): int = c.counts.len
+
+proc newDepthStat*[T: SomeNumber](d: seq[T]): depth_stat =
+  result.cum_length = len(d)
+  result.min_depth = uint32.high
+  for dp in d:
+    result.cum_depth += dp.uint64
+    result.min_depth = min(result.min_depth, dp.uint32)
+    result.max_depth = max(result.max_depth, dp.uint32)
+
+proc clear*(ds: var depth_stat) =
+    ds.cum_length = 0
+    ds.cum_depth = 0
+    ds.min_depth = uint32.high
+    ds.max_depth = 0
+
+proc `+`*(a, b: depth_stat): depth_stat {.inline.} =
+  result = depth_stat(cum_length: a.cum_length + b.cum_length,
+                      cum_depth: a.cum_depth + b.cum_depth,
+                      min_depth: min(a.min_depth, b.min_depth),
+                      max_depth: max(a.max_depth, b.max_depth))
+
+when isMainModule:
+  import unittest
+  suite "count-stat":
+    test "count-test":
+
+      var c = initCountStat[uint32]()
+      for i in 0..10:
+        c.add(i.uint32)
+
+      check c.median == 5
+      check c.n == 11
+      check c.len == 32768
+
+      c.add(6)
+      c.add(6)
+      c.add(6)
+      c.add(6)
+      check c.median == 6
+      c.clear()
+      check c.median == 0
+      check c.n == 0


=====================================
functional-tests.sh
=====================================
@@ -10,8 +10,10 @@ test -e ssshtest || wget -q https://raw.githubusercontent.com/ryanlayer/ssshtest
 
 set -o nounset
 
+
 set -e
 nim c --boundChecks:on -x:on mosdepth.nim
+nim c -r tests/funcs.nim
 set +e
 exe=./mosdepth
 bam=/data/human/NA12878.subset.bam
@@ -26,12 +28,16 @@ assert_exit_code 0
 assert_equal "$(zgrep ^MT t.per-base.bed.gz)" "MT	0	80	1
 MT	80	16569	0"
 assert_equal "$(zgrep -w ^1 t.per-base.bed.gz)" "1	0	249250621	0"
+assert_equal "$(cat t.mosdepth.summary.txt | grep 'MT')" "MT	16569	80	0.00	0	1"
+assert_equal "$(cat t.mosdepth.summary.txt | grep 'total')" "total	16569	80	0.00	0	1"
 
 run overlapFastMode $exe t --fast-mode tests/ovl.bam
 assert_equal "$(zgrep ^MT t.per-base.bed.gz)" "MT	0	6	1
 MT	6	42	2
 MT	42	80	1
 MT	80	16569	0"
+assert_equal $(cat t.mosdepth.summary.txt  | cut -f 2 | tail -n+2 | sort | uniq) 16569
+assert_equal $(cat t.mosdepth.summary.txt  | cut -f 4 | tail -n+2 | sort | uniq) "0.01"
 assert_exit_code 0
 
 
@@ -41,7 +47,7 @@ assert_exit_code 1
 
 run unordered_bed $exe --by tests/unordered.bed t tests/ovl.bam
 assert_exit_code 0
-assert_equal $(zcat t.regions.bed.gz | wc -l) 2
+assert_equal $(zcat < t.regions.bed.gz | wc -l) 2
 
 # theres data left in the bam but the region tree is empty...
 run missing_bed_chrom $exe --by tests/missing.bed t tests/ovl.bam
@@ -68,13 +74,13 @@ assert_exit_code 0
 
 rm -f t.thresholds.bed.gz*
 run threshold_test $exe --by 100 -T 0,1,2,3,4,5 -c MT t tests/ovl.bam
-assert_equal "$(zcat t.thresholds.bed.gz | tail -n +2 | head -1)" "MT	0	100	unknown	100	80	0	0	0	0"
-assert_equal "0" "$(zcat t.thresholds.bed.gz | tail -n+2 | cut -f 7 | uniq)"
+assert_equal "$(zcat < t.thresholds.bed.gz | tail -n +2 | head -1)" "MT	0	100	unknown	100	80	0	0	0	0"
+assert_equal "0" "$(zcat < t.thresholds.bed.gz | tail -n+2 | cut -f 7 | uniq)"
 assert_exit_code 0
 
 rm -f t.thresholds.bed.gz*
 run threshold_test_by $exe --by tests/track.bed -T 0,1,2 -c MT t tests/ovl.bam
-assert_equal "$(zcat t.thresholds.bed.gz | tail -n +2)" "MT	2	80	aregion	78	78	0"
+assert_equal "$(zcat < t.thresholds.bed.gz | tail -n +2)" "MT	2	80	aregion	78	78	0"
 assert_exit_code 0
 
 export MOSDEPTH_Q0=AAA
@@ -85,10 +91,13 @@ assert_exit_code 0
 assert_equal "$(zgrep -w ^MT t.quantized.bed.gz)" "MT	0	80	BBB
 MT	80	16569	AAA"
 assert_equal "$(head -1 t.mosdepth.global.dist.txt)" "MT	1	0.0048283"
+assert_equal "$(tail -n +2 t.mosdepth.summary.txt | head -1)" "MT	16569	80	0.0048283	0	1"
+assert_equal "$(tail -n 1 t.mosdepth.summary.txt)" "total	16569	80	0.0048283	0	1"
+
 
 run track_header $exe --by tests/track.bed t tests/ovl.bam
 assert_exit_code 0
-assert_equal "$(zcat t.regions.bed.gz)" "MT	2	80	aregion	1.00"
+assert_equal "$(zcat < t.regions.bed.gz)" "MT	2	80	aregion	1.00"
 
 run track_header_by $exe --by tests/bad.bed t tests/ovl.bam
 assert_exit_code 1
@@ -107,20 +116,38 @@ assert_equal "$(cat tt.mosdepth.global.dist.txt)" "MT	0	1.00
 total	0	1.00"
 
 run big_chrom $exe t tests/big.bam
+assert_equal $(cat t.mosdepth.summary.txt | wc -l) 3
+assert_equal $(cat t.mosdepth.summary.txt  | cut -f 3 | tail -n +2 | uniq) 15
+assert_equal $(cat t.mosdepth.summary.txt  | cut -f 5 | tail -n +2 | uniq) 0
+assert_equal $(cat t.mosdepth.summary.txt  | cut -f 6 | tail -n +2 | uniq) 1
 assert_exit_code 0
 
-rm -f tt.mosdepth.region.dist.txt
-rm -f t.mosdepth.region.dist.txt
+rm -f tt.*
+rm -f t.*
+
 run empty_tids $exe t -n --thresholds 1,5 --by tests/empty-tids.bed tests/empty-tids.bam
 assert_exit_code 0
 assert_equal $(grep -w HPV26 -c t.mosdepth.region.dist.txt) 0
+rm -f t.*
+
+#regions and global should not be the same when -b is specified
+run regions $exe t_regions -n -b 100 tests/empty-tids.bam 
+assert_equal $(diff -u t_regions.mosdepth.region.dist.txt t_regions.mosdepth.global.dist.txt | wc -l) 1447
+rm -f t_regions.*
 
-rm -f t.per-base.bed.gz*
 run overlappingPairs $exe t tests/overlapping-pairs.bam
-assert_equal "$(zcat t.per-base.bed.gz)" "1	0	565173	0
+assert_equal "$(zcat < t.per-base.bed.gz)" "1	0	565173	0
 1	565173	565253	1
 1	565253	249250621	0"
 assert_exit_code 0
+rm -f t.*
+
+
+nim c -d:d4 --boundChecks:on -x:on mosdepth.nim
+rm -f t.per-base.bed.gz
+run d4test $exe --d4 t --fast-mode tests/ovl.bam
+assert_exit_code 0
+
 
 test -e $bam || exit
 
@@ -129,4 +156,13 @@ assert_equal "$(zgrep -w "chrM	9999	10000" t.per-base.bed.gz)" "chrM	9999	10000
 assert_exit_code 0
 
 run flag $exe -c chrM -F 4 --by 20000 tx /data/human/NA12878.subset.bam
-assert_exit_code 0
\ No newline at end of file
+assert_exit_code 0
+
+
+run check_exit_code_on_bad_args $exe -t4prefix sample.bam
+assert_exit_code 1
+assert_in_stderr "error parsing arguments"
+
+
+
+


=====================================
int2str.nim
=====================================
@@ -0,0 +1,95 @@
+import bitops
+
+#[
+
+this intToStr is adpated from: https://github.com/miloyip/itoa-benchmark countlut method.
+
+https://github.com/miloyip/itoa-benchmark/blob/master/license.txt
+Copyright (C) 2014 Milo Yip
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+]#
+
+
+const gDigitsLut = [
+    '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
+    '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
+    '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
+    '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
+    '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
+    '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
+    '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
+    '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
+    '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
+    '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
+];
+
+const powers_of_10 = [
+        0'i32,
+        10,
+        100,
+        1000,
+        10000,
+        100000,
+        1000000,
+        10000000,
+        100000000,
+        1000000000]
+
+proc countdigits(value:int32): int {.inline.} =
+  result = (32 - countLeadingZeroBits(value or 1)) * 1233 shr 12
+  result = result - int(value < powers_of_10[result]) + 1
+
+proc fastIntToStr*(value:int32, outstr:var string, offset:int=0) {.inline.} =
+  outstr.setLen(offset + countdigits(value))
+  var value = value
+  var L = outstr.high
+  while value >= 100:
+    let i = (value mod 100) shl 1
+    value = int32(value / 100)
+
+    outstr[L] = gDigitsLut[i + 1]
+    outstr[L-1] = gDigitsLut[i]
+    L -= 2
+  if value < 10:
+    outstr[L] = (value + '0'.int).char
+
+  else:
+    let i = value shl 1
+    outstr[L] = gDigitsLut[i + 1]
+    outstr[L-1] = gDigitsLut[i]
+
+
+when isMainModule:
+
+  import times
+  import strutils
+
+  var t0 = cpuTime()
+  var outstr = newString(10)
+  for i in 0'i32..200_000_000:
+    fastIntToStr(i, outstr)
+    doAssert outstr == $i
+  echo cpuTime() - t0
+
+  t0 = cpuTime()
+  echo "only to 100m for $"
+  for i in 0'i32..100_000_000:
+    let outstr = intToStr(i)
+  echo cpuTime() - t0


=====================================
mosdepth.nim
=====================================
@@ -1,15 +1,23 @@
 import hts
 import tables
+import ./int2str
 import strutils as S
 import algorithm as alg
 import sequtils as sequtils
 import strutils as su
 import os
+import strformat
 import docopt
 import times
+import math
+import ./depthstat
 
+when defined(d4):
+  import d4
 
 var precision: int
+var output_summary_header = true
+
 try:
   var tmp = getEnv("MOSDEPTH_PRECISION")
   precision = parse_int(tmp)
@@ -20,11 +28,11 @@ type
   pair = tuple[pos: int, value: int32]
   depth_t = tuple[start: int, stop: int, value: int]
   depth_s = tuple[start: int, stop: int, value: string]
-  region_t = ref object
-    chrom: string
-    start: uint32
-    stop: uint32
-    name: string
+  region_t* = ref object
+    chrom*: string
+    start*: uint32
+    stop*: uint32
+    name*: string
 
   coverage_t = seq[int32]
 
@@ -38,10 +46,7 @@ proc `$`*(r: region_t): string =
 
 proc to_coverage(c: var coverage_t) =
   # to_coverage converts from an array of start/end inc/decs to actual coverage.
-  var d = int32(0)
-  for i, v in pairs(c):
-    d += v
-    c[i] = d
+  c.cumsum()
 
 iterator gen_depths(arr: coverage_t, offset: int=0, istop: int=0): depth_t =
   # given `arr` with values in each index indicating the number of reads
@@ -125,7 +130,7 @@ iterator gen_quantized(quants: seq[int], arr: coverage_t): depth_s {.inline.} =
         yield (last_pos, pos, lookup[last_quantized])
       last_quantized = quantized
       last_pos = pos
-    if last_quantized != -1 and last_pos < arr.high:
+    if last_quantized != -1 and last_pos < arr.high and last_quantized < len(lookup):
       yield (last_pos, len(arr)-1, lookup[last_quantized])
 
 proc pair_sort(a, b: pair): int =
@@ -184,24 +189,28 @@ proc bed_line_to_region(line: string): region_t =
      s = S.parse_int(cse[1])
      e = S.parse_int(cse[2])
      reg = region_t(chrom: cse[0], start: uint32(s), stop: uint32(e))
+   doAssert s <= e, "[slivar] ERROR: start > end in bed line:" & line
    if len(cse) > 3:
      reg.name = cse[3]
    return reg
 
-proc region_line_to_region(region: string): region_t =
+proc region_line_to_region*(region: string): region_t =
   if region == "" or region == "nil":
     return nil
+  if ':' notin region:
+    return region_t(chrom:region)
+  result = region_t()
+
   var i = 0
-  var r = region_t()
-  for w in region.split({':', '-'}):
+  # rsplit yields strings in reverse order
+  for w in region.rsplit({':', '-'}, maxsplit=2):
     if i == 1:
-      r.start = uint32(S.parse_int(w)) - 1
-    elif i == 2:
-      r.stop = uint32(S.parse_int(w))
+      result.start = uint32(S.parse_int(w)) - 1
+    elif i == 0:
+      result.stop = uint32(S.parse_int(w))
     else:
-      r.chrom = w
+      result.chrom = w
     inc(i)
-  return r
 
 proc get_tid(tgts: seq[hts.Target], chrom: string): int =
   for t in tgts:
@@ -285,11 +294,10 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, mapq:int=
             # 4623241 4623264
             # chr1 4623171 69M1D23M9S (pos: 4623171, value: 1)(pos: 4623241, value: 1)(pos: 4623240, value: -1)(pos: 4623264, value: -1)
             # chr1 4623223 4S97M (pos: 4623223, value: 1)(pos: 4623320, value: -1)
-            assert (rec.start <= mate.stop)
             # each element will have a .value of 1 for start and -1 for end.
 
-            var ses = sequtils.to_seq(gen_start_ends(rec.cigar, rec.start))
-            for p in gen_start_ends(mate.cigar, mate.start):
+            var ses = sequtils.to_seq(gen_start_ends(rec.cigar, rec.start.int))
+            for p in gen_start_ends(mate.cigar, mate.start.int):
                 ses.add(p)
             alg.sort(ses, pair_sort)
             var pair_depth = 0
@@ -306,10 +314,10 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, mapq:int=
               last_pos = p.pos
             if pair_depth != 0: echo $rec.qname & ":" & $rec & " " & $mate.qname & ":" & $mate & " " & $pair_depth
     if fast_mode:
-      arr[rec.start].inc
-      arr[rec.stop].dec
+      arr[rec.start] += 1
+      arr[rec.stop] -= 1
     else:
-      inc_coverage(rec.cigar, rec.start, arr)
+      inc_coverage(rec.cigar, rec.start.int, arr)
 
   if not found:
     return -2
@@ -352,28 +360,33 @@ iterator region_gen(window: uint32, target: hts.Target, bed_regions: TableRef[st
         for r in bed_regions[target.name]: yield r
         bed_regions.del(target.name)
 
-proc imean(vals: coverage_t, start:uint32, stop:uint32): float64 =
+proc imean(vals: coverage_t, start:uint32, stop:uint32, ms:var CountStat[uint32]): float64 =
   if start > uint32(len(vals)):
     return 0
-  var L = float64(stop - start)
-  for i in start..<stop:
-    if int(i) == len(vals): break
-    result += float64(vals[int(i)]) / L
+
+  if ms.len != 0:
+    ms.clear()
+    for i in start..<min(stop, uint32(len(vals))):
+      ms.add(vals[i])
+    return ms.median.float64
+
+  else:
+    var L = float64(stop - start)
+    for i in start..<min(stop, uint32(len(vals))):
+      result += float64(vals[int(i)]) / L
 
 const MAX_COVERAGE = int32(400000)
 
-proc inc(d: var seq[int64], coverage: coverage_t, start:uint32, stop:uint32) =
+proc inc(d: var seq[int64], coverage: var coverage_t, start:uint32, stop:uint32) =
   var v:int32
   var L = int32(d.high)
   if int(start) >= len(coverage):
     stderr.write_line("[mosdepth] warning requested interval outside of chromosome range:", start, "..", stop)
     return
-  var istop = stop
-  if int(stop) > len(coverage):
-    istop = uint32(len(coverage))
+  var istop = min(stop, uint32(coverage.len))
 
   for i in start..<istop:
-    v = coverage[int(i)]
+    v = coverage[i]
     if v > MAX_COVERAGE:
       v = MAX_COVERAGE - 10
     if v >= L:
@@ -382,7 +395,7 @@ proc inc(d: var seq[int64], coverage: coverage_t, start:uint32, stop:uint32) =
         d[j] = 0
       L = int32(d.high)
     if v < 0: continue
-    inc(d[v])
+    d[v] += 1
 
 proc write_distribution(chrom: string, d: var seq[int64], fh:File) =
   var sum: int64
@@ -404,6 +417,28 @@ proc write_distribution(chrom: string, d: var seq[int64], fh:File) =
   # reverse it back because we use to update the full genome
   reverse(d)
 
+proc write_summary(region: string, stat: depth_stat, fh:File) =
+  var mean_depth: float64
+  if stat.cum_length > 0:
+    mean_depth = float64(stat.cum_depth) / float64(stat.cum_length)
+  else:
+    mean_depth = 0.float64
+  let stat_min = if stat.min_depth == uint32.high: 0.uint32 else: stat.min_depth
+  if output_summary_header:
+    fh.write_line ["chrom",
+                   "length",
+                   "bases",
+                   "mean",
+                   "min",
+                   "max"].join("\t")
+    output_summary_header = false
+  fh.write_line [region,
+                 $stat.cum_length,
+                 $stat.cum_depth,
+                 $mean_depth.format_float(ffDecimal, precision=precision),
+                 $stat_min,
+                 $stat.max_depth].join("\t")
+
 proc get_targets(targets: seq[hts.Target], r: region_t): seq[hts.Target] =
   if r == nil:
     return targets
@@ -444,11 +479,11 @@ proc get_quantize_args*(qa: string) : seq[int] =
     quit(2)
 
 
-proc write_thresholds(fh:BGZI, tid:int, arr:coverage_t, thresholds:seq[int], region: region_t) =
+proc write_thresholds(fh:BGZI, tid:int, arr:var coverage_t, thresholds:seq[int], region: region_t) =
   # write the number of bases in each region that are >= each threshold.
   if thresholds.len == 0: return
   var
-    line = new_string_of_cap(100)
+    line = new_string_of_cap(32)
     start = int(region.start)
     stop = int(region.stop)
   line.add(region.chrom & "\t")
@@ -466,13 +501,14 @@ proc write_thresholds(fh:BGZI, tid:int, arr:coverage_t, thresholds:seq[int], reg
     return
 
   var counts = new_seq[int](len(thresholds))
+  shallow(arr)
 
   # iterate over the region and count bases >= request cutoffs.
   for v in arr[start..<stop]:
     for i, t in thresholds:
-      if v >= t:
-        counts[i] += 1
-      # else: break # if we know they are sorted we can break
+      # if we know they are sorted we can break
+      if v < t: break
+      counts[i] += 1
 
   for count in counts:
     line.add("\t" & intToStr(count))
@@ -498,8 +534,18 @@ proc get_min_levels(targets: seq[Target]): int =
     result += 1
     s = s shl 3
 
+proc isdigit(s:string): bool =
+  for c in s:
+    if not c.isdigit: return false
+  return true
+
+proc to_tuples(targets:seq[Target]): seq[tuple[name:string, length:int]] =
+  result.setLen(targets.len)
+  for i, t in targets:
+    result[i] = (t.name, t.length.int)
 
-proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16, region: string, thresholds: seq[int], fast_mode:bool, args: Table[string, docopt.Value]) =
+proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16, region: string, thresholds: seq[int],
+          fast_mode:bool, args: Table[string, docopt.Value], use_median:bool=false, use_d4:bool=false) =
   # windows are either from regions, or fixed-length windows.
   # we assume the input is sorted by chrom.
   var
@@ -512,15 +558,25 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
     skip_per_base = args["--no-per-base"]
     window: uint32 = 0
     bed_regions: TableRef[string, seq[region_t]] # = Table[string, seq[region_t]]
-    fbase: BGZI
     #fbase: BGZ
     fquantize: BGZI
     fthresholds: BGZI
     fregion: BGZI
     fh_global_dist:File
     fh_region_dist:File
+    fh_summary: File
     quantize = get_quantize_args($args["--quantize"])
 
+    # summary stat output
+    chrom_region_stat: depth_stat
+    chrom_stat: depth_stat
+    global_region_stat: depth_stat
+    global_stat: depth_stat
+
+  when defined(d4):
+    var fd4:D4
+  var fbase: BGZI
+
   var region_distribution = new_seq[int64](1000)
   var global_distribution = new_seq[int64](1000)
 
@@ -534,7 +590,13 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
   if not skip_per_base:
     # can't use set-threads when indexing on the fly so this must
     # not call set_threads().
-    fbase = wopen_bgzi(prefix & ".per-base.bed.gz", 1, 2, 3, true, compression_level=1, levels=levels)
+    if use_d4:
+      when defined(d4):
+        doAssert fd4.open(prefix & ".per-base.d4", mode="w"), &"[mosdepth] error opening {prefix}.per-base.d4"
+        fd4.set_chromosomes(targets.to_tuples)
+
+    else:
+      fbase = wopen_bgzi(prefix & ".per-base.bed.gz", 1, 2, 3, true, compression_level=1, levels=levels)
     #open(fbase, prefix & ".per-base.bed.gz", "w1")
   if quantize.len != 0:
     fquantize = wopen_bgzi(prefix & ".quantized.bed.gz", 1, 2, 3, true, compression_level=1, levels=levels)
@@ -546,6 +608,9 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
   if not open(fh_global_dist, prefix & ".mosdepth.global.dist.txt", fmWrite):
     stderr.write_line("[mosdepth] could not open file:", prefix & ".mosdepth.global.dist.txt")
 
+  if not open(fh_summary, prefix & ".mosdepth.summary.txt", fmWrite):
+    stderr.write_line("[mosdepth] could not open file:", prefix & ".mosdepth.summary.txt")
+
   if region != "" and not open(fh_region_dist, prefix & ".mosdepth.region.dist.txt", fmWrite):
     stderr.write_line("[mosdepth] could not open file:", prefix & ".mosdepth.dist.txt")
 
@@ -555,6 +620,9 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
       window = uint32(S.parse_int(region))
     else:
       bed_regions = bed_to_table(region)
+  shallow(arr)
+
+  var cs = initCountStat[uint32](size=if use_median: 65536 else: 0)
 
   for target in sub_targets:
     chrom_global_distribution = new_seq[int64](1000)
@@ -575,7 +643,8 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
       var me = 0'f64
       for r in region_gen(window, target, bed_regions):
         if tid != -2:
-          me = imean(arr, r.start, r.stop)
+          me = imean(arr, r.start, r.stop, cs)
+          chrom_region_stat = chrom_region_stat + newDepthStat(arr[r.start..<r.stop])
         var m = su.format_float(me, ffDecimal, precision=precision)
 
         if r.name == "":
@@ -585,10 +654,21 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
         discard fregion.write_interval(line, target.name, int(r.start), int(r.stop))
         line = line[0..<0]
         if tid != -2:
-          chrom_region_distribution.inc(arr, r.start, r.stop)
+          if region.isdigit: #stores the aggregated coverage for each region when working with even windows across the genome
+            chrom_region_distribution[min(me.toInt,int64(len(chrom_region_distribution))-1)] += 1
+          else: # stores the per-base coverage in each region specified in the bed file
+            chrom_region_distribution.inc(arr, r.start, r.stop)
+
         write_thresholds(fthresholds, tid, arr, thresholds, r)
     if tid != -2:
       chrom_global_distribution.inc(arr, uint32(0), uint32(len(arr) - 1))
+      chrom_stat = newDepthStat(arr[0..<len(arr)-1])
+      global_stat = global_stat + chrom_stat
+      write_summary(target.name, chrom_stat, fh_summary)
+      if region != "":
+        write_summary(target.name & "_region", chrom_region_stat, fh_summary)
+      global_region_stat = global_region_stat + chrom_region_stat
+      chrom_region_stat.clear()
 
     # write the distribution for each chrom
     write_distribution(target.name, chrom_global_distribution, fh_global_dist)
@@ -603,10 +683,30 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
 
     if not skip_per_base:
       if tid == -2:
-        discard fbase.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t0", target.name, 0, int(target.length))
+        when defined(d4):
+          if use_d4:
+            fd4.write(target.name, @[Interval(left: 0'u32, right: target.length.uint32, value: 0'i32)])
+        else:
+          discard fbase.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t0", target.name, 0, int(target.length))
       else:
-        for p in gen_depths(arr):
-          discard fbase.write_interval(starget & intToStr(p.start) & "\t" & intToStr(p.stop) & "\t" & intToStr(p.value), target.name, p.start, p.stop)
+        var write_fbase = true
+        when defined(d4):
+          if use_d4:
+            fd4.write(target.name, 0, arr)
+            write_fbase = false
+
+        if write_fbase:
+          var line = newStringOfCap(32)
+          line.add(starget)
+          for p in gen_depths(arr):
+            # re-use line each time.
+            line.setLen(starget.len)
+            fastIntToStr(p.start.int32, line, line.len)
+            line.add('\t')
+            fastIntToStr(p.stop.int32, line, line.len)
+            line.add('\t')
+            fastIntToStr(p.value.int32, line, line.len)
+            discard fbase.write_interval(line, target.name, p.start, p.stop)
     if quantize.len != 0:
       if tid == -2 and quantize[0] == 0:
         var lookup = make_lookup(quantize)
@@ -616,6 +716,10 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
         for p in gen_quantized(quantize, arr):
             discard fquantize.write_interval(starget & intToStr(p.start) & "\t" & intToStr(p.stop) & "\t" & p.value, target.name, p.start, p.stop)
 
+  write_summary("total", global_stat, fh_summary)
+  if region != "":
+    write_summary("total_region", global_region_stat, fh_summary)
+
   write_distribution("total", global_distribution, fh_global_dist)
   if region != "":
     write_distribution("total", region_distribution, fh_region_dist)
@@ -636,6 +740,9 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
   if fthresholds != nil and close(fthresholds) != 0:
       stderr.write_line("[mosdepth] error writing thresholds file\n")
       quit(1)
+  when defined(d4):
+    if use_d4:
+      fd4.close
 
   if fbase != nil and close(fbase) != 0:
       stderr.write_line("[mosdepth] error writing per-base file\n")
@@ -653,7 +760,8 @@ proc check_chrom(r: region_t, targets: seq[Target]) =
 proc threshold_args*(ts: string): seq[int] =
   if ts == "nil":
     return
-  return map(ts.split(','), proc (s:string): int = return parse_int(s))
+  result = map(ts.split(','), proc (s:string): int = return parse_int(s))
+  sort(result)
 
 
 proc check_cram_has_ref(cram_path: string, fasta:string) =
@@ -667,9 +775,9 @@ when(isMainModule):
   when not defined(release) and not defined(lto):
     stderr.write_line "[mosdepth] WARNING: built in debug mode; will be slow"
 
-  let version = "mosdepth 0.2.5"
+  let version = "mosdepth 0.3.1"
   let env_fasta = getEnv("REF_PATH")
-  let doc = format("""
+  var doc = format("""
   $version
 
   Usage: mosdepth [options] <prefix> <BAM-or-CRAM>
@@ -677,6 +785,7 @@ when(isMainModule):
 Arguments:
 
   <prefix>       outputs: `{prefix}.mosdepth.dist.txt`
+                          `{prefix}.mosdepth.summary.txt`
                           `{prefix}.per-base.bed.gz` (unless -n/--no-per-base is specified)
                           `{prefix}.regions.bed.gz` (if --by is specified)
                           `{prefix}.quantized.bed.gz` (if --quantize is specified)
@@ -692,6 +801,12 @@ Common Options:
   -n --no-per-base           dont output per-base depth. skipping this output will speed execution
                              substantially. prefer quantized or thresholded values if possible.
   -f --fasta <fasta>         fasta file for use with CRAM files [default: $env_fasta].
+""" % ["version", version, "env_fasta", env_fasta])
+  when defined(d4):
+    doc &= """  --d4                       output per-base depth in d4 format.
+"""
+
+  doc &= """
 
 Other options:
 
@@ -699,20 +814,32 @@ Other options:
   -i --include-flag <FLAG>      only include reads with any of the bits in FLAG set. default is unset. [default: 0]
   -x --fast-mode                dont look at internal cigar operations or correct mate overlaps (recommended for most use-cases).
   -q --quantize <segments>      write quantized output see docs for description.
-  -Q --mapq <mapq>              mapping quality threshold [default: 0]
+  -Q --mapq <mapq>              mapping quality threshold. reads with a quality less than this value are ignored [default: 0]
   -T --thresholds <thresholds>  for each interval in --by, write number of bases covered by at
                                 least threshold bases. Specify multiple integer values separated
                                 by ','.
+  -m --use-median               output median of each region (in --by) instead of mean.
   -R --read-groups <string>     only calculate depth for these comma-separated read groups IDs.
   -h --help                     show help
-  """ % ["version", version, "env_fasta", env_fasta])
+  """
+
+  var args: Table[string, Value]
+  try:
+    args = docopt(doc, version = version, quit=false)
+  except DocoptExit:
+    echo (ref DocoptExit)(get_current_exception()).usage
+    quit "error parsing arguments"
 
-  let args = docopt(doc, version = version)
   let mapq = S.parse_int($args["--mapq"])
   var
     region: string
     thresholds: seq[int] = threshold_args($args["--thresholds"])
     fast_mode:bool = args["--fast-mode"]
+    use_median:bool = args["--use-median"]
+  when defined(d4):
+    var use_d4:bool = args["--d4"] and not args["--no-per-base"]
+  else:
+    var use_d4:bool = false
 
   if $args["--by"] != "nil":
     region = $args["--by"]
@@ -748,4 +875,4 @@ Other options:
   discard bam.set_option(FormatOption.CRAM_OPT_DECODE_MD, 0)
   check_chrom(chrom, bam.hdr.targets)
 
-  main(bam, chrom, mapq, eflag, iflag, region, thresholds, fast_mode, args)
+  main(bam, chrom, mapq, eflag, iflag, region, thresholds, fast_mode, args, use_median=use_median, use_d4=use_d4)


=====================================
mosdepth.nimble
=====================================
@@ -1,13 +1,13 @@
 # Package
 
-version       = "0.2.5"
+version       = "0.3.1"
 author        = "Brent Pedersen"
 description   = "fast depth"
 license       = "MIT"
 
 # Dependencies
 
-requires "hts >= 0.2.7", "docopt >= 0.6.8"
+requires "hts >= 0.3.1", "docopt >= 0.6.8", "nim >= 1.0.0", "https://github.com/brentp/d4-nim"
 
 bin = @["mosdepth"]
 skipDirs = @["tests"]


=====================================
scripts/install.sh
=====================================
@@ -21,10 +21,26 @@ cd $base
 nimble refresh
 $base/nim-$BRANCH/bin/nimble install -y
 
-git clone --recursive https://github.com/samtools/htslib.git
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -qy
+
+export HTSLIB=dynamic
+sudo apt-get update
+sudo apt-get install git llvm curl wget libcurl4-openssl-dev
+wget https://github.com/samtools/htslib/archive/1.10.2.tar.gz
+tar xzf 1.10.2.tar.gz
+cd htslib-1.10.2/
+autoheader && autoconf && ./configure --enable-libcurl
+sudo make -j4 install
+git clone https://github.com/38/d4-format
+cd d4-format
+~/.cargo/bin/cargo build --release
+sudo cp ../d4-format/target/release/libd4binding.* /usr/local/lib
+sudo cp ./d4binding/include/d4.h /usr/local/include/
+sudo ldconfig
+
 
-cd htslib && git checkout 1.9 && autoheader && autoconf && ./configure --enable-libcurl
 cd ..
-make -j 4 -C htslib
-export LD_LIBRARY_PATH=$base/htslib
-ls -lh $base/htslib/*.so
+export LD_LIBRARY_PATH=$base/htslib-1.10.2
+ls -lh $base/htslib-1.10.2/*.so
+
+nimble install -y https://github.com/brentp/d4-nim/


=====================================
scripts/plot-dist.py
=====================================
@@ -19,6 +19,9 @@ def main():
         for chrom, data in it.groupby(gen, itemgetter(0)):
             if chrom.startswith("GL"):
                 continue
+            if "Un" in chrom: continue
+            if "random" in chrom or "HLA" in chrom: continue
+            if chrom.endswith("alt"): continue
             chroms[chrom] = True
             xs, ys = [], []
             v50 = 0


=====================================
tests/funcs.nim
=====================================
@@ -1,4 +1,5 @@
-import unittest, mosdepth as mosdepth
+import unittest
+import mosdepth
 import os
 
 suite "mosdepth-suite":
@@ -64,6 +65,14 @@ suite "mosdepth-suite":
     check ts[2] == 3
     check ts.len == 3
 
+  test "name splitting":
 
+    var r = region_line_to_region("Super-Scaffold_52")
+    check r.chrom == "Super-Scaffold_52"
+    check r.start == 0
+    check r.stop == 0
 
-
+    r = region_line_to_region("Super-Scaffold_52:2-1000")
+    check r.chrom == "Super-Scaffold_52"
+    check r.start == 1
+    check r.stop == 1000



View it on GitLab: https://salsa.debian.org/med-team/mosdepth/-/compare/dc8b42ddb9c296679a4e8b61be6ea85274348a20...8f0e04158b3e6f1105334ede4173cd8add3d8623

-- 
View it on GitLab: https://salsa.debian.org/med-team/mosdepth/-/compare/dc8b42ddb9c296679a4e8b61be6ea85274348a20...8f0e04158b3e6f1105334ede4173cd8add3d8623
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201102/8027eafa/attachment-0001.html>


More information about the debian-med-commit mailing list