[med-svn] [Git][med-team/mosdepth][master] 5 commits: Add Depends on libhts-dev

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Tue Dec 5 21:15:49 GMT 2023



Nilesh Patra pushed to branch master at Debian Med / mosdepth


Commits:
82addd0c by Nilesh Patra at 2023-12-06T02:36:15+05:30
Add Depends on libhts-dev

- - - - -
a3394d2c by Nilesh Patra at 2023-12-06T02:38:11+05:30
New upstream version 0.3.6+ds
- - - - -
158ae9ad by Nilesh Patra at 2023-12-06T02:38:11+05:30
Update upstream source from tag 'upstream/0.3.6+ds'

Update to upstream version '0.3.6+ds'
with Debian dir a0fd0a166e78b50fdae96649f149c4f4b2a6fba8
- - - - -
21dc6efa by Nilesh Patra at 2023-12-06T02:41:03+05:30
minor fix

- - - - -
8de96abd by Nilesh Patra at 2023-12-06T02:41:03+05:30
Upload to unstable

- - - - -


10 changed files:

- − .github/FUNDING.yml
- − .github/workflows/build.yml
- CHANGES.md
- debian/changelog
- debian/control
- debian/tests/control
- debian/tests/run-unit-test
- functional-tests.sh
- mosdepth.nim
- mosdepth.nimble


Changes:

=====================================
.github/FUNDING.yml deleted
=====================================
@@ -1,13 +0,0 @@
-# These are supported funding model platforms
-
-github:  [brentp]
-patreon: # Replace with a single Patreon username
-open_collective: # Replace with a single Open Collective username
-ko_fi: # Replace with a single Ko-fi username
-tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-otechie: # Replace with a single Otechie username
-lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
-custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']


=====================================
.github/workflows/build.yml deleted
=====================================
@@ -1,116 +0,0 @@
-# copied from Daniel Cook's Seq collection
-name: Build
-
-on: 
-  - push
-  - pull_request
-
-jobs:
-  build:
-
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-18.04, macos-10.15]
-        version:
-        - stable
-        - devel
-
-
-    steps:
-    - uses: actions/checkout at v2
-
-    # Caching
-    - name: Cache choosenim
-      id: cache-choosenim
-      uses: actions/cache at v1
-      with:
-        path: ~/.choosenim
-        key: ${{ runner.os }}-choosenim-stable
-
-    - name: Cache nimble
-      id: cache-nimble
-      uses: actions/cache at v1
-      with:
-        path: ~/.nimble
-        key: ${{ runner.os }}-nimble-stable
-
-    - name: Cache htslib
-      id: cache-htslib
-      uses: actions/cache at v1
-      with:
-        path: $HOME/htslib
-        key: ${{ runner.os }}-htslib-1.10
-
-    # Install Dependencies
-    - name: Install dependencies (Linux)
-      if: runner.os == 'Linux'
-      run: |
-        sudo apt-get update
-        sudo apt-get -qy install bwa make build-essential cmake libncurses-dev ncurses-dev libbz2-dev lzma-dev liblzma-dev \
-             curl libssl-dev libtool autoconf automake libcurl4-openssl-dev zlib1g-dev
-
-    # Setup htslib
-    - name: Install htslib (linux)
-      if: runner.os == 'Linux'
-      run: |
-        cd
-        git clone --recursive https://github.com/samtools/htslib.git
-        cd htslib && git checkout 1.11 && autoheader && autoconf && ./configure --enable-libcurl
-        sudo make -j 4 install
-        sudo ldconfig
-        #echo "::set-env name=LD_LIBRARY_PATH::${LD_LIBRARY_PATH}:${HOME}/htslib"
-        #ls -lh $HOME/htslib/*.so
-
-    - name: Install hstlib (macos)
-      if: runner.os == 'macOS'
-      run: |
-        brew install htslib
-
-    - name: Install d4 
-      run: |
-        #export HTSLIB=system
-        git clone https://github.com/38/d4-format
-        cd d4-format
-        cargo build --release --all-features --package=d4binding
-        sudo cp target/release/libd4binding.* /usr/local/lib
-        sudo cp d4binding/include/d4.h /usr/local/include/
-        sudo ldconfig || true
-
-
-    - uses: iffy/install-nim at v4.1.1
-      with:
-        version: ${{ matrix.version }}
-
-    - uses: actions-rs/toolchain at v1
-      with:
-        toolchain: stable
-    - uses: actions-rs/cargo at v1
-
-
-    # Build and Test
-    - name: Build test executable
-      run: nimble build -Y mosdepth.nimble
-
-    - name: "Copy binary"
-      run: chmod +x mosdepth && mkdir bin && cp mosdepth bin/mosdepth_debug_${{ matrix.os }}
-
-    - name: "Build and Copy release binary"
-      run: nim c -d:danger -d:release -o:bin/mosdepth_${{ matrix.os }} mosdepth
-    
-    - name: Functional Tests
-      env:
-        TERM: "xterm"
-      run: |
-        bash ./functional-tests.sh
-
-    - name: Unit Tests
-      run: |
-        nim c -r tests/all.nim
-
-    - name: Upload Artifact
-      if: success()
-      uses: actions/upload-artifact at v1.0.0
-      with:
-        name: mosdepth_${{ matrix.os }}_executable
-        path: bin/


=====================================
CHANGES.md
=====================================
@@ -1,3 +1,8 @@
+v0.3.6
+======
++ allow filtering on fragment length thanks @LudvigOlsen for implementing! (#214)
++ fix bug where empty chromosomes are not reported as having 0 depth (#216)
+
 v0.3.5
 ======
 + fix bug with summary min for regions (#207 thanks to Xavier for supplying test-case)


=====================================
debian/changelog
=====================================
@@ -1,10 +1,13 @@
-mosdepth (0.3.5+ds-1) UNRELEASED; urgency=medium
+mosdepth (0.3.6+ds-1) unstable; urgency=medium
 
-  * Team upload.
-  * New upstream version
+  [ Andreas Tille ]
   * Standards-Version: 4.6.2 (routine-update)
 
- -- Andreas Tille <tille at debian.org>  Fri, 27 Oct 2023 07:10:35 +0200
+  [ Nilesh Patra ]
+  * Add Depends on libhts-dev (Closes: #1054602)
+  * New upstream version 0.3.6+ds
+
+ -- Nilesh Patra <nilesh at debian.org>  Wed, 06 Dec 2023 02:39:09 +0530
 
 mosdepth (0.3.3+ds-2) unstable; urgency=medium
 


=====================================
debian/control
=====================================
@@ -19,7 +19,7 @@ Rules-Requires-Root: no
 
 Package: mosdepth
 Architecture: any
-Depends: ${shlibs:Depends}, ${misc:Depends}
+Depends: ${shlibs:Depends}, ${misc:Depends}, libhts-dev
 Description: BAM/CRAM depth calculation biological sequencing
  Many small reads are produced by high-throughput "next generation"
  sequencing technologies. The final sequence is derived from how


=====================================
debian/tests/control
=====================================
@@ -1,4 +1,4 @@
 Tests: run-unit-test
-Depends: @, python3, libhts-dev
+Depends: @, python3
 Restrictions: allow-stderr
 Architecture: amd64 arm64 ppc64el


=====================================
debian/tests/run-unit-test
=====================================
@@ -35,7 +35,7 @@ done
 cat test2.*.txt | tail -20
 echo "PASS"
 
-echo "Tet 3"
+echo "Test 3"
 python3 plot-dist.py test2.mosdepth.global.dist.txt
 [ -s dist.html ]
 echo "PASS"


=====================================
functional-tests.sh
=====================================
@@ -58,6 +58,26 @@ assert_exit_code 0
 assert_equal $(zgrep -c "MT" t.per-base.bed.gz) 2
 assert_equal "MT	0	16569	0.00" "$(zgrep ^MT t.regions.bed.gz)"
 
+
+# fragment length filtering
+run length_filter $exe t tests/ovl.bam --min-frag-len 80 --max-frag-len 80
+assert_exit_code 0
+assert_equal $(zgrep -c "MT" t.per-base.bed.gz) 2
+assert_equal "MT 0 80 1 MT 80 16569 0 " "$(zgrep ^MT t.per-base.bed.gz | tr -s '[:space:]' ' ')"
+
+run length_filter $exe t tests/ovl.bam --min-frag-len 81
+assert_exit_code 0
+assert_equal "MT	0	16569	0" "$(zgrep ^MT t.per-base.bed.gz)"
+
+run length_filter $exe t tests/ovl.bam --max-frag-len 79
+assert_exit_code 0
+assert_equal "MT	0	16569	0" "$(zgrep ^MT t.per-base.bed.gz)"
+
+run bad_frag_len_filter $exe t tests/ovl.bam --min-frag-len 10 --max-frag-len 9
+assert_in_stderr "--max-frag-len was lower than --min-frag-len."
+assert_exit_code 2
+
+
 unset MOSDEPTH_Q0
 unset MOSDEPTH_Q1
 unset MOSDEPTH_Q2


=====================================
mosdepth.nim
=====================================
@@ -229,7 +229,7 @@ proc init(arr: var coverage_t, tlen:int) =
       arr.set_len(int(tlen))
   zeroMem(arr[0].addr, len(arr) * sizeof(arr[0]))
 
-proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, mapq:int= -1, eflag: uint16=1796, iflag:uint16=0, read_groups:seq[string]=(@[]), fast_mode:bool=false): int =
+proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, mapq:int= -1, min_len:int= -1, max_len:int=int.high, eflag: uint16=1796, iflag:uint16=0, read_groups:seq[string]=(@[]), fast_mode:bool=false): int =
   # depth updates arr in-place and yields the tid for each chrom.
   # returns -1 if the chrom is not found in the bam header
   # returns -2 if the chrom was found in the header, but there was no data for it
@@ -253,6 +253,7 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, mapq:int=
       arr.init(int(tgt.length+1))
       found = true
     if int(rec.mapping_quality) < mapq: continue
+    if int(abs(rec.isize)) < min_len or int(abs(rec.isize)) > max_len: continue
     if (rec.flag and eflag) != 0:
       continue
     if iflag != 0 and ((rec.flag and iflag) == 0):
@@ -497,7 +498,7 @@ proc write_thresholds(fh:BGZI, tid:int, arr:var coverage_t, thresholds:seq[int],
   if tid == -2:
     for i in thresholds:
       line.add("\t0")
-    discard fh.write_interval(line, region.chrom, start, stop)
+    doAssert fh.write_interval(line, region.chrom, start, stop) >= 0
     return
 
   var counts = new_seq[int](len(thresholds))
@@ -512,13 +513,13 @@ proc write_thresholds(fh:BGZI, tid:int, arr:var coverage_t, thresholds:seq[int],
 
   for count in counts:
     line.add("\t" & intToStr(count))
-  discard fh.write_interval(line, region.chrom, start, stop)
+  doAssert fh.write_interval(line, region.chrom, start, stop) >= 0
 
 proc write_header(fh:BGZI, thresholds: seq[int]) =
-  discard fh.bgz.write("#chrom	start	end	region")
+  doAssert fh.bgz.write("#chrom	start	end	region") >= 0
   for threshold in thresholds:
-    discard fh.bgz.write("\t" & intToStr(threshold) & "X")
-  discard fh.bgz.write("\n")
+    doAssert fh.bgz.write("\t" & intToStr(threshold) & "X") >= 0
+  doAssert fh.bgz.write("\n") >= 0
 
 proc get_min_levels(targets: seq[Target]): int =
   # determine how many levels are needed to store the data given
@@ -544,7 +545,7 @@ proc to_tuples(targets:seq[Target]): seq[tuple[name:string, length:int]] =
   for i, t in targets:
     result[i] = (t.name, t.length.int)
 
-proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16, region: string, thresholds: seq[int],
+proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int, eflag: uint16, iflag: uint16, region: string, thresholds: seq[int],
           fast_mode:bool, args: Table[string, docopt.Value], use_median:bool=false, use_d4:bool=false) =
   # windows are either from regions, or fixed-length windows.
   # we assume the input is sorted by chrom.
@@ -638,7 +639,7 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
     if skip_per_base and thresholds.len == 0 and quantize.len == 0 and bed_regions != nil and not bed_regions.contains(target.name):
       continue
     rchrom = region_t(chrom: target.name)
-    var tid = coverage(bam, arr, rchrom, mapq, eflag, iflag, read_groups=read_groups, fast_mode=fast_mode)
+    var tid = coverage(bam, arr, rchrom, mapq, min_len, max_len, eflag, iflag, read_groups=read_groups, fast_mode=fast_mode)
     if tid == -1: continue # -1 means that chrom is not even in the bam
     if tid != -2: # -2 means there were no reads in the bam
       arr.to_coverage()
@@ -657,7 +658,7 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
           line.add(starget & intToStr(int(r.start)) & "\t" & intToStr(int(r.stop)) & "\t" & m)
         else:
           line.add(starget & intToStr(int(r.start)) & "\t" & intToStr(int(r.stop)) & "\t" & r.name & "\t" & m)
-        discard fregion.write_interval(line, target.name, int(r.start), int(r.stop))
+        doAssert fregion.write_interval(line, target.name, int(r.start), int(r.stop)) >= 0
         line = line[0..<0]
         if tid != -2:
           if region.isdigit: #stores the aggregated coverage for each region when working with even windows across the genome
@@ -693,7 +694,7 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
           if use_d4:
             fd4.write(target.name, @[Interval(left: 0'u32, right: target.length.uint32, value: 0'i32)])
         else:
-          discard fbase.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t0", target.name, 0, int(target.length))
+          doAssert fbase.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t0", target.name, 0, int(target.length)) >= 0
       else:
         var write_fbase = true
         when defined(d4):
@@ -712,15 +713,15 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, eflag: uint16, iflag: uint16
             fastIntToStr(p.stop.int32, line, line.len)
             line.add('\t')
             fastIntToStr(p.value.int32, line, line.len)
-            discard fbase.write_interval(line, target.name, p.start, p.stop)
+            doAssert fbase.write_interval(line, target.name, p.start, p.stop) >= 0
     if quantize.len != 0:
       if tid == -2 and quantize[0] == 0:
         var lookup = make_lookup(quantize)
-        discard fquantize.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t" & lookup[0], target.name, 0, int(target.length))
+        doAssert fquantize.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t" & lookup[0], target.name, 0, int(target.length)) >= 0
       else:
         if tid == -2: continue
         for p in gen_quantized(quantize, arr):
-            discard fquantize.write_interval(starget & intToStr(p.start) & "\t" & intToStr(p.stop) & "\t" & p.value, target.name, p.start, p.stop)
+            doAssert fquantize.write_interval(starget & intToStr(p.start) & "\t" & intToStr(p.stop) & "\t" & p.value, target.name, p.start, p.stop) >= 0
 
   write_summary("total", global_stat, fh_summary)
   if region != "":
@@ -781,7 +782,7 @@ when(isMainModule):
   when not defined(release) and not defined(lto):
     stderr.write_line "[mosdepth] WARNING: built in debug mode; will be slow"
 
-  let version = "mosdepth 0.3.5"
+  let version = "mosdepth 0.3.6"
   let env_fasta = getEnv("REF_PATH")
   var doc = format("""
   $version
@@ -816,17 +817,19 @@ Common Options:
 
 Other options:
 
-  -F --flag <FLAG>              exclude reads with any of the bits in FLAG set [default: 1796]
-  -i --include-flag <FLAG>      only include reads with any of the bits in FLAG set. default is unset. [default: 0]
-  -x --fast-mode                dont look at internal cigar operations or correct mate overlaps (recommended for most use-cases).
-  -q --quantize <segments>      write quantized output see docs for description.
-  -Q --mapq <mapq>              mapping quality threshold. reads with a quality less than this value are ignored [default: 0]
-  -T --thresholds <thresholds>  for each interval in --by, write number of bases covered by at
-                                least threshold bases. Specify multiple integer values separated
-                                by ','.
-  -m --use-median               output median of each region (in --by) instead of mean.
-  -R --read-groups <string>     only calculate depth for these comma-separated read groups IDs.
-  -h --help                     show help
+  -F --flag <FLAG>                  exclude reads with any of the bits in FLAG set [default: 1796]
+  -i --include-flag <FLAG>          only include reads with any of the bits in FLAG set. default is unset. [default: 0]
+  -x --fast-mode                    dont look at internal cigar operations or correct mate overlaps (recommended for most use-cases).
+  -q --quantize <segments>          write quantized output see docs for description.
+  -Q --mapq <mapq>                  mapping quality threshold. reads with a quality less than this value are ignored [default: 0]
+  -l --min-frag-len <min-frag-len>  minimum insert size. reads with a smaller insert size than this are ignored [default: -1]
+  -u --max-frag-len <max-frag-len>  maximum insert size. reads with a larger insert size than this are ignored. [default: -1]
+  -T --thresholds <thresholds>      for each interval in --by, write number of bases covered by at
+                                    least threshold bases. Specify multiple integer values separated
+                                    by ','.
+  -m --use-median                   output median of each region (in --by) instead of mean.
+  -R --read-groups <string>         only calculate depth for these comma-separated read groups IDs.
+  -h --help                         show help
   """
 
   var args: Table[string, Value]
@@ -837,6 +840,14 @@ Other options:
     quit "error parsing arguments"
 
   let mapq = S.parse_int($args["--mapq"])
+  let min_len = S.parse_int($args["--min-frag-len"])
+  var max_len = S.parse_int($args["--max-frag-len"])
+  if max_len < 0:
+    max_len = int.high
+  if max_len < min_len:
+    stderr.write_line("[mosdepth] error --max-frag-len was lower than --min-frag-len.")
+    quit(2)
+  
   var
     region: string
     thresholds: seq[int] = threshold_args($args["--thresholds"])
@@ -870,7 +881,7 @@ Other options:
     stderr.write_line("[mosdepth] error alignment file must be indexed")
     quit(2)
 
-  var opts = SamField.SAM_FLAG.int or SamField.SAM_RNAME.int or SamField.SAM_POS.int or SamField.SAM_MAPQ.int or SamField.SAM_CIGAR.int
+  var opts = SamField.SAM_FLAG.int or SamField.SAM_RNAME.int or SamField.SAM_POS.int or SamField.SAM_MAPQ.int or SamField.SAM_CIGAR.int or SamField.SAM_TLEN.int
   if not fast_mode:
       opts = opts or SamField.SAM_QNAME.int or SamField.SAM_RNEXT.int or SamField.SAM_PNEXT.int #or SamField.SAM_TLEN.int
 
@@ -881,4 +892,4 @@ Other options:
   discard bam.set_option(FormatOption.CRAM_OPT_DECODE_MD, 0)
   check_chrom(chrom, bam.hdr.targets)
 
-  main(bam, chrom, mapq, eflag, iflag, region, thresholds, fast_mode, args, use_median=use_median, use_d4=use_d4)
+  main(bam, chrom, mapq, min_len, max_len, eflag, iflag, region, thresholds, fast_mode, args, use_median=use_median, use_d4=use_d4)


=====================================
mosdepth.nimble
=====================================
@@ -1,13 +1,13 @@
 # Package
 
-version       = "0.3.5"
+version       = "0.3.6"
 author        = "Brent Pedersen"
 description   = "fast depth"
 license       = "MIT"
 
 # Dependencies
 
-requires "hts >= 0.3.22", "docopt >= 0.6.8", "nim >= 1.0.0", "https://github.com/brentp/d4-nim >= 0.0.3"
+requires "hts >= 0.3.22", "docopt == 0.7.0", "nim >= 1.0.0", "https://github.com/brentp/d4-nim >= 0.0.3"
 
 bin = @["mosdepth"]
 skipDirs = @["tests"]



View it on GitLab: https://salsa.debian.org/med-team/mosdepth/-/compare/fed4dc3b14b1f5add8d5d27f0a719a4ab9ff4189...8de96abd4be466d38d4c5d6712ac9596c1a24def

-- 
View it on GitLab: https://salsa.debian.org/med-team/mosdepth/-/compare/fed4dc3b14b1f5add8d5d27f0a719a4ab9ff4189...8de96abd4be466d38d4c5d6712ac9596c1a24def
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231205/368faa13/attachment-0001.htm>


More information about the debian-med-commit mailing list