[med-svn] [Git][med-team/mosdepth][upstream] New upstream version 0.3.11+ds

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Thu Sep 25 22:10:43 BST 2025



Nilesh Patra pushed to branch upstream at Debian Med / mosdepth


Commits:
9747000c by Nilesh Patra at 2025-09-26T02:36:32+05:30
New upstream version 0.3.11+ds
- - - - -


7 changed files:

- − .github/FUNDING.yml
- − .github/workflows/build.yml
- CHANGES.md
- README.md
- functional-tests.sh
- mosdepth.nim
- mosdepth.nimble


Changes:

=====================================
.github/FUNDING.yml deleted
=====================================
@@ -1,13 +0,0 @@
-# These are supported funding model platforms
-
-github:  [brentp]
-patreon: # Replace with a single Patreon username
-open_collective: # Replace with a single Open Collective username
-ko_fi: # Replace with a single Ko-fi username
-tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-otechie: # Replace with a single Otechie username
-lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
-custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']


=====================================
.github/workflows/build.yml deleted
=====================================
@@ -1,126 +0,0 @@
-# copied from Daniel Cook's Seq collection
-name: Build
-
-on: 
-  push:
-    branches:
-      - master
-  pull_request:
-    branches:
-      - master
-
-jobs:
-  build:
-
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-20.04, macos-13]
-        version:
-        - 1.6.18
-        - 2.0.2
-
-
-    steps:
-    - uses: actions/checkout at v4
-
-    # Caching
-    - name: Cache choosenim
-      id: cache-choosenim
-      uses: actions/cache at v4
-      with:
-        path: ~/.choosenim
-        key: ${{ runner.os }}-choosenim-stable
-
-    - name: Cache nimble
-      id: cache-nimble
-      uses: actions/cache at v4
-      with:
-        path: ~/.nimble
-        key: ${{ runner.os }}-nimble-stable
-
-    - name: Cache htslib
-      id: cache-htslib
-      uses: actions/cache at v4
-      with:
-        path: $HOME/htslib
-        key: ${{ runner.os }}-htslib-1.10
-
-    # Install Dependencies
-    - name: Install dependencies (Linux)
-      if: runner.os == 'Linux'
-      run: |
-        sudo apt-get update
-        sudo apt-get -qy install bwa make build-essential cmake libncurses-dev ncurses-dev libbz2-dev lzma-dev liblzma-dev \
-             curl libssl-dev libtool autoconf automake libcurl4-openssl-dev zlib1g-dev
-
-    # Setup htslib
-    - name: Install htslib (linux)
-      if: runner.os == 'Linux'
-      run: |
-        cd
-        git clone -b 1.18 --recursive https://github.com/samtools/htslib.git
-        cd htslib && autoheader && autoconf && ./configure --enable-libcurl
-        sudo make -j 4 install
-        sudo ldconfig
-        #echo "::set-env name=LD_LIBRARY_PATH::${LD_LIBRARY_PATH}:${HOME}/htslib"
-        #ls -lh $HOME/htslib/*.so
-
-    - name: Install hstlib (macos)
-      if: runner.os == 'macOS'
-      run: |
-        brew install htslib
-
-    - name: Set DYLD_LIBRARY_PATH
-      if: runner.os == 'macOS'
-      run: |
-          echo "DYLD_LIBRARY_PATH=$(brew --prefix htslib)/lib" >> $GITHUB_ENV
-
-    - name: Install d4 
-      run: |
-        #export HTSLIB=system
-        git clone https://github.com/38/d4-format
-        echo 'location' >> ~/.curlrc # https://github.com/38/d4-format/pull/77#issuecomment-2044438359
-        cd d4-format
-        cargo build --release --all-features --package=d4binding
-        sudo cp target/release/libd4binding.* /usr/local/lib
-        sudo cp d4binding/include/d4.h /usr/local/include/
-        sudo ldconfig || true
-
-
-    - uses: iffy/install-nim at v5
-      with:
-        version: ${{ matrix.version }}
-
-    - name: Rust Toolchain
-      uses: dtolnay/rust-toolchain at nightly
-      with:
-        toolchain: stable
-
-    # Build and Test
-    - name: Build test executable
-      run: nimble build -Y mosdepth.nimble
-
-    - name: "Copy binary"
-      run: chmod +x mosdepth && mkdir bin && cp mosdepth bin/mosdepth_debug_${{ matrix.os }}
-
-    - name: "Build and Copy release binary"
-      run: nim c --mm:refc -d:danger -d:release -o:bin/mosdepth_${{ matrix.os }} mosdepth
-    
-    - name: Functional Tests
-      env:
-        TERM: "xterm"
-      run: |
-        bash ./functional-tests.sh
-
-    - name: Unit Tests
-      run: |
-        nim c -r tests/all.nim
-
-    - name: Upload Artifact
-      if: success()
-      uses: actions/upload-artifact at v4
-      with:
-        name: mosdepth_${{ matrix.os }}_nim${{ matrix.version }}_executable
-        path: bin/


=====================================
CHANGES.md
=====================================
@@ -1,3 +1,7 @@
+v0.3.11
+=======
++ add --fragment-mode (#246 from @LudvigOlsen). calculates coverage over a full fragment, including insert.
+
 v0.3.10
 =======
 + write sfi index in d4 files (#243)


=====================================
README.md
=====================================
@@ -23,7 +23,7 @@ when appropriate, the output files are bgzipped and indexed for ease of use.
 ## usage
 
 ```
-mosdepth 0.3.9
+mosdepth 0.3.11
 
   Usage: mosdepth [options] <prefix> <BAM-or-CRAM>
 
@@ -46,13 +46,13 @@ Common Options:
   -n --no-per-base           dont output per-base depth. skipping this output will speed execution
                              substantially. prefer quantized or thresholded values if possible.
   -f --fasta <fasta>         fasta file for use with CRAM files [default: ].
-  --d4                       output per-base depth in d4 format.
 
 Other options:
 
   -F --flag <FLAG>                  exclude reads with any of the bits in FLAG set [default: 1796]
   -i --include-flag <FLAG>          only include reads with any of the bits in FLAG set. default is unset. [default: 0]
   -x --fast-mode                    dont look at internal cigar operations or correct mate overlaps (recommended for most use-cases).
+  -a --fragment-mode                count the coverage of the full fragment including the full insert (proper pairs only).
   -q --quantize <segments>          write quantized output see docs for description.
   -Q --mapq <mapq>                  mapping quality threshold. reads with a quality less than this value are ignored [default: 0]
   -l --min-frag-len <min-frag-len>  minimum insert size. reads with a smaller insert size than this are ignored [default: -1]


=====================================
functional-tests.sh
=====================================
@@ -78,6 +78,20 @@ assert_in_stderr "--max-frag-len was lower than --min-frag-len."
 assert_exit_code 2
 
 
+# fragment-mode
+run fragment_mode $exe t --fragment-mode tests/full-fragment-pairs.bam
+assert_equal "$(zcat < t.per-base.bed.gz)" "chr22:20000000-23000000	0	17318	0
+chr22:20000000-23000000	17318	17320	1
+chr22:20000000-23000000	17320	17420	2
+chr22:20000000-23000000	17420	17756	1
+chr22:20000000-23000000	17756	52130	0
+chr22:20000000-23000000	52130	52135	1
+chr22:20000000-23000000	52135	52235	2
+chr22:20000000-23000000	52235	52546	1
+chr22:20000000-23000000	52546	3000001	0"
+assert_exit_code 0
+
+
 unset MOSDEPTH_Q0
 unset MOSDEPTH_Q1
 unset MOSDEPTH_Q2


=====================================
mosdepth.nim
=====================================
@@ -48,7 +48,7 @@ proc to_coverage(c: var coverage_t) =
   # to_coverage converts from an array of start/end inc/decs to actual coverage.
   c.cumsum()
 
-iterator gen_depths(arr: coverage_t, offset: int=0, istop: int=0): depth_t =
+iterator gen_depths(arr: coverage_t, offset: int = 0, istop: int = 0): depth_t =
   # given `arr` with values in each index indicating the number of reads
   # starting or ending at that location, generate depths.
   # offset is only used for a region like chr6:200-30000, in which case, offset will be 200
@@ -83,12 +83,12 @@ iterator gen_depths(arr: coverage_t, offset: int=0, istop: int=0): depth_t =
 
   # this is horrible, but it works. we don't know
   # if we've already printed the record on not.
-  elif  last_i != i:
-      yield (last_i - 1, i, last_depth)
+  elif last_i != i:
+    yield (last_i - 1, i, last_depth)
   else:
-      yield (last_i, i, last_depth)
+    yield (last_i, i, last_depth)
 
-proc linear_search*(q:int, vals:seq[int], idx: ptr int) {.inline.} =
+proc linear_search*(q: int, vals: seq[int], idx: ptr int) {.inline.} =
   if q < vals[0] or q > vals[vals.high]:
     idx[] = -1
     return
@@ -134,7 +134,7 @@ iterator gen_quantized(quants: seq[int], arr: coverage_t): depth_s {.inline.} =
       yield (last_pos, len(arr)-1, lookup[last_quantized])
 
 proc pair_sort(a, b: pair): int =
-   return a.pos - b.pos
+  return a.pos - b.pos
 
 iterator gen_start_ends(c: Cigar, ipos: int): pair {.inline.} =
   # generate start, end pairs given a cigar string and a position offset.
@@ -162,9 +162,10 @@ iterator gen_start_ends(c: Cigar, ipos: int): pair {.inline.} =
 
 proc inc_coverage(c: Cigar, ipos: int = 0, arr: var seq[int32]) {.inline.} =
   for p in gen_start_ends(c, ipos):
-      arr[p.pos] += p.value
+    arr[p.pos] += p.value
 
-iterator regions(bam: hts.Bam, region: region_t, tid: int, targets: seq[hts.Target]): Record {.inline.} =
+iterator regions(bam: hts.Bam, region: region_t, tid: int, targets: seq[
+    hts.Target]): Record {.inline.} =
   if region == nil:
     for r in bam:
       yield r
@@ -179,31 +180,31 @@ iterator regions(bam: hts.Bam, region: region_t, tid: int, targets: seq[hts.Targ
       stderr.write_line("[mosdepth]", region.chrom, " not found")
 
 proc bed_line_to_region(line: string): region_t =
-   var
-     cse = line.split('\t', 5)
-
-   if len(cse) < 3:
-     stderr.write_line("[mosdepth] skipping bad bed line:", line.strip())
-     return nil
-   var
-     s = S.parse_int(cse[1])
-     e = S.parse_int(cse[2])
-     reg = region_t(chrom: cse[0], start: uint32(s), stop: uint32(e))
-   doAssert s <= e, "[slivar] ERROR: start > end in bed line:" & line
-   if len(cse) > 3:
-     reg.name = cse[3].strip()
-   return reg
+  var
+    cse = line.split('\t', 5)
+
+  if len(cse) < 3:
+    stderr.write_line("[mosdepth] skipping bad bed line:", line.strip())
+    return nil
+  var
+    s = S.parse_int(cse[1])
+    e = S.parse_int(cse[2])
+    reg = region_t(chrom: cse[0], start: uint32(s), stop: uint32(e))
+  doAssert s <= e, "[slivar] ERROR: start > end in bed line:" & line
+  if len(cse) > 3:
+    reg.name = cse[3].strip()
+  return reg
 
 proc region_line_to_region*(region: string): region_t =
   if region == "" or region == "nil":
     return nil
   if ':' notin region:
-    return region_t(chrom:region)
+    return region_t(chrom: region)
   result = region_t()
 
   var i = 0
   # rsplit yields strings in reverse order
-  for w in region.rsplit({':', '-'}, maxsplit=2):
+  for w in region.rsplit({':', '-'}, maxsplit = 2):
     if i == 1:
       result.start = uint32(S.parse_int(w)) - 1
     elif i == 0:
@@ -212,16 +213,16 @@ proc region_line_to_region*(region: string): region_t =
       result.chrom = w
     inc(i)
 
-proc get_tid(tgts: seq[hts.Target], chrom: string, last_tid:var int): int =
+proc get_tid(tgts: seq[hts.Target], chrom: string, last_tid: var int): int =
   if tgts[last_tid + 1].name == chrom:
-      last_tid = tgts[last_tid + 1].tid
-      return last_tid
+    last_tid = tgts[last_tid + 1].tid
+    return last_tid
   for t in tgts:
     if t.name == chrom:
       last_tid = t.tid
       return last_tid
 
-proc init(arr: var coverage_t, tlen:int) =
+proc init(arr: var coverage_t, tlen: int) =
   ## try to re-use the array.
   if len(arr) != int(tlen):
     # must create a new array in some cases.
@@ -233,7 +234,11 @@ proc init(arr: var coverage_t, tlen:int) =
       arr.set_len(int(tlen))
   zeroMem(arr[0].addr, len(arr) * sizeof(arr[0]))
 
-proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, targets: seq[Target], mapq:int= -1, min_len:int= -1, max_len:int=int.high, eflag: uint16=1796, iflag:uint16=0, read_groups:seq[string]=(@[]), fast_mode:bool=false, last_tid: var int = -1): int =
+proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t,
+    targets: seq[Target], mapq: int = -1, min_len: int = -1,
+    max_len: int = int.high, eflag: uint16 = 1796, iflag: uint16 = 0,
+    read_groups: seq[string] = (@[]), fast_mode: bool = false,
+    fragment_mode: bool = false, last_tid: var int = -1): int =
   # depth updates arr in-place and yields the tid for each chrom.
   # returns -1 if the chrom is not found in the bam header
   # returns -2 if the chrom was found in the header, but there was no data for it
@@ -266,16 +271,18 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, targets:
       if t.isNone or not read_groups.contains(t.get):
         continue
     if tgt.tid != rec.b.core.tid:
-        raise newException(OSError, "expected only a single chromosome per query")
+      raise newException(OSError, "expected only a single chromosome per query")
 
     # rec:   --------------
     # mate:             ------------
     # handle overlapping mate pairs.
-    if (not fast_mode) and rec.flag.proper_pair and (not rec.flag.supplementary):
-      if rec.b.core.tid == rec.b.core.mtid and rec.stop > rec.matepos and 
+    if (not fast_mode) and (not fragment_mode) and rec.flag.proper_pair and (
+        not rec.flag.supplementary):
+      if rec.b.core.tid == rec.b.core.mtid and rec.stop > rec.matepos and
         # First case is partial overlap, second case is complete overlap
-        # For complete overlap we must check if the mate was already seen or not yet
-        ((rec.start < rec.matepos) or (rec.start == rec.mate_pos and not seen.hasKey(rec.qname))):
+          # For complete overlap we must check if the mate was already seen or not yet
+        ((rec.start < rec.matepos) or (rec.start == rec.mate_pos and
+            not seen.hasKey(rec.qname))):
         var rc = rec.copy()
         seen[rc.qname] = rc
       else:
@@ -302,7 +309,7 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, targets:
 
             var ses = sequtils.to_seq(gen_start_ends(rec.cigar, rec.start.int))
             for p in gen_start_ends(mate.cigar, mate.start.int):
-                ses.add(p)
+              ses.add(p)
             alg.sort(ses, pair_sort)
             var pair_depth = 0
             var last_pos = 0
@@ -316,10 +323,22 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, targets:
                 inc(arr[p.pos])
               pair_depth += p.value
               last_pos = p.pos
-            if pair_depth != 0: echo $rec.qname & ":" & $rec & " " & $mate.qname & ":" & $mate & " " & $pair_depth
+            if pair_depth != 0: echo $rec.qname & ":" & $rec & " " &
+                $mate.qname & ":" & $mate & " " & $pair_depth
+    
     if fast_mode:
       arr[rec.start] += 1
       arr[rec.stop] -= 1
+
+    elif fragment_mode:
+      # only count read1 from proper pairs
+      if rec.flag.read2 or (not rec.flag.proper_pair) or rec.flag.supplementary:
+        continue
+      
+      var fragment_start = min(rec.start, rec.matepos)
+      arr[fragment_start] += 1
+      arr[fragment_start + abs(rec.isize)] -= 1
+      
     else:
       inc_coverage(rec.cigar, rec.start.int, arr)
 
@@ -329,7 +348,7 @@ proc coverage(bam: hts.Bam, arr: var coverage_t, region: var region_t, targets:
 
 proc bed_to_table(bed: string): TableRef[string, seq[region_t]] =
   var bed_regions = newTable[string, seq[region_t]]()
-  var kstr = kstring_t(l:0, m: 0, s: nil)
+  var kstr = kstring_t(l: 0, m: 0, s: nil)
   var hf = hts_open(cstring(bed), "r")
   while hts_getline(hf, cint(10), addr kstr) > 0:
     if kstr.s[0] == 't' and ($kstr.s).startswith("track "):
@@ -342,28 +361,30 @@ proc bed_to_table(bed: string): TableRef[string, seq[region_t]] =
 
   # since it is read into mem, can also well sort.
   for chrom, ivs in bed_regions.mpairs:
-      sort(ivs, proc (a, b: region_t): int = int(a.start) - int(b.start))
+    sort(ivs, proc (a, b: region_t): int = int(a.start) - int(b.start))
 
   hts.free(kstr.s)
   return bed_regions
 
 iterator window_gen(window: uint32, t: hts.Target): region_t =
-  var start:uint32 = 0
+  var start: uint32 = 0
   while start + window < t.length:
     yield region_t(chrom: t.name, start: start, stop: start + window)
     start += window
   if start != t.length:
     yield region_t(chrom: t.name, start: start, stop: t.length)
 
-iterator region_gen(window: uint32, target: hts.Target, bed_regions: TableRef[string, seq[region_t]]): region_t =
-    if bed_regions == nil:
-      for r in window_gen(window, target): yield r
-    else:
-      if bed_regions.contains(target.name):
-        for r in bed_regions[target.name]: yield r
-        bed_regions.del(target.name)
+iterator region_gen(window: uint32, target: hts.Target, bed_regions: TableRef[
+    string, seq[region_t]]): region_t =
+  if bed_regions == nil:
+    for r in window_gen(window, target): yield r
+  else:
+    if bed_regions.contains(target.name):
+      for r in bed_regions[target.name]: yield r
+      bed_regions.del(target.name)
 
-proc imean(vals: coverage_t, start:uint32, stop:uint32, ms:var CountStat[uint32]): float64 =
+proc imean(vals: coverage_t, start: uint32, stop: uint32, ms: var CountStat[
+    uint32]): float64 =
   if start > uint32(len(vals)):
     return 0
 
@@ -380,11 +401,13 @@ proc imean(vals: coverage_t, start:uint32, stop:uint32, ms:var CountStat[uint32]
 
 const MAX_COVERAGE = int32(400000)
 
-proc inc(d: var seq[int64], coverage: var coverage_t, start:uint32, stop:uint32) =
-  var v:int32
+proc inc(d: var seq[int64], coverage: var coverage_t, start: uint32,
+    stop: uint32) =
+  var v: int32
   var L = int32(d.high)
   if int(start) >= len(coverage):
-    stderr.write_line("[mosdepth] warning requested interval outside of chromosome range:", start, "..", stop)
+    stderr.write_line("[mosdepth] warning requested interval outside of chromosome range:",
+        start, "..", stop)
     return
   var istop = min(stop, uint32(coverage.len))
 
@@ -400,7 +423,7 @@ proc inc(d: var seq[int64], coverage: var coverage_t, start:uint32, stop:uint32)
     if v < 0: continue
     d[v] += 1
 
-proc write_distribution(chrom: string, d: var seq[int64], fh:File) =
+proc write_distribution(chrom: string, d: var seq[int64], fh: File) =
   var sum: int64
   for v in d:
     sum += int64(v)
@@ -416,11 +439,12 @@ proc write_distribution(chrom: string, d: var seq[int64], fh:File) =
     if irev > 300 and v == 0: continue
     cum += float64(v) / float64(sum)
     if cum < 8e-5: continue
-    fh.write_line(chrom, "\t", irev, "\t", su.format_float(cum, ffDecimal, precision=precision))
+    fh.write_line(chrom, "\t", irev, "\t", su.format_float(cum, ffDecimal,
+        precision = precision))
   # reverse it back because we use to update the full genome
   reverse(d)
 
-proc write_summary(region: string, stat: depth_stat, fh:File) =
+proc write_summary(region: string, stat: depth_stat, fh: File) =
   var mean_depth: float64
   if stat.cum_length > 0:
     mean_depth = float64(stat.cum_depth) / float64(stat.cum_length)
@@ -438,7 +462,7 @@ proc write_summary(region: string, stat: depth_stat, fh:File) =
   fh.write_line [region,
                  $stat.cum_length,
                  $stat.cum_depth,
-                 $mean_depth.format_float(ffDecimal, precision=precision),
+                 $mean_depth.format_float(ffDecimal, precision = precision),
                  $stat_min,
                  $stat.max_depth].join("\t")
 
@@ -461,7 +485,7 @@ proc sum_into(afrom: seq[int64], ato: var seq[int64]) =
   for i, d in afrom:
     ato[i] += d
 
-proc get_quantize_args*(qa: string) : seq[int] =
+proc get_quantize_args*(qa: string): seq[int] =
   if qa == "nil":
     return
   var a = qa
@@ -474,7 +498,7 @@ proc get_quantize_args*(qa: string) : seq[int] =
   if a[a.high] == ':':
     a = a & intToStr(high(int))
   try:
-    var qs = map(a.split(':'), proc (s:string): int = return parse_int(s))
+    var qs = map(a.split(':'), proc (s: string): int = return parse_int(s))
     sort(qs, system.cmp[int])
     return qs
   except:
@@ -482,7 +506,8 @@ proc get_quantize_args*(qa: string) : seq[int] =
     quit(2)
 
 
-proc write_thresholds(fh:BGZI, tid:int, arr:var coverage_t, thresholds:seq[int], region: region_t) =
+proc write_thresholds(fh: BGZI, tid: int, arr: var coverage_t, thresholds: seq[
+    int], region: region_t) =
   # write the number of bases in each region that are >= each threshold.
   if thresholds.len == 0: return
   var
@@ -505,7 +530,7 @@ proc write_thresholds(fh:BGZI, tid:int, arr:var coverage_t, thresholds:seq[int],
 
   var counts = new_seq[int](len(thresholds))
   when NimMajor < 2:
-      shallow(arr)
+    shallow(arr)
 
   # iterate over the region and count bases >= request cutoffs.
   for v in arr[start..<stop]:
@@ -518,7 +543,7 @@ proc write_thresholds(fh:BGZI, tid:int, arr:var coverage_t, thresholds:seq[int],
     line.add("\t" & intToStr(count))
   doAssert fh.write_interval(line, region.chrom, start, stop) >= 0
 
-proc write_header(fh:BGZI, thresholds: seq[int]) =
+proc write_header(fh: BGZI, thresholds: seq[int]) =
   doAssert fh.bgz.write("#chrom	start	end	region") >= 0
   for threshold in thresholds:
     doAssert fh.bgz.write("\t" & intToStr(threshold) & "X") >= 0
@@ -538,36 +563,36 @@ proc get_min_levels(targets: seq[Target]): int =
     result += 1
     s = s shl 3
 
-proc isdigit(s:string): bool =
+proc isdigit(s: string): bool =
   for c in s:
     if not c.isdigit: return false
   return true
 
-proc to_tuples(targets:seq[Target]): seq[tuple[name:string, length:int]] =
+proc to_tuples(targets: seq[Target]): seq[tuple[name: string, length: int]] =
   result.setLen(targets.len)
   for i, t in targets:
     result[i] = (t.name, t.length.int)
 
 proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int, eflag: uint16, iflag: uint16, region: string, thresholds: seq[int],
-          fast_mode:bool, args: Table[string, docopt.Value], use_median:bool=false, use_d4:bool=false) =
+          fast_mode: bool, args: Table[string, docopt.Value], use_median: bool = false, fragment_mode: bool = false, use_d4: bool = false) =
   # windows are either from regions, or fixed-length windows.
   # we assume the input is sorted by chrom.
   var
     targets = bam.hdr.targets
     sub_targets = get_targets(targets, chrom)
     read_groups: seq[string]
-    rchrom : region_t
+    rchrom: region_t
     arr: coverage_t
     prefix: string = $(args["<prefix>"])
     skip_per_base = args["--no-per-base"]
     window: uint32 = 0
     bed_regions: TableRef[string, seq[region_t]] # = Table[string, seq[region_t]]
-    #fbase: BGZ
+                                                 #fbase: BGZ
     fquantize: BGZI
     fthresholds: BGZI
     fregion: BGZI
-    fh_global_dist:File
-    fh_region_dist:File
+    fh_global_dist: File
+    fh_region_dist: File
     fh_summary: File
     quantize = get_quantize_args($args["--quantize"])
 
@@ -578,7 +603,7 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
     global_stat: depth_stat
 
   when defined(d4):
-    var fd4:D4
+    var fd4: D4
   var fbase: BGZI
 
   # use clear to set min depth to uint32.high
@@ -603,17 +628,20 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
     # not call set_threads().
     if use_d4:
       when defined(d4):
-        doAssert fd4.open(prefix & ".per-base.d4", mode="w"), &"[mosdepth] error opening {prefix}.per-base.d4"
+        doAssert fd4.open(prefix & ".per-base.d4", mode = "w"), &"[mosdepth] error opening {prefix}.per-base.d4"
         fd4.set_chromosomes(targets.to_tuples)
 
     else:
-      fbase = wopen_bgzi(prefix & ".per-base.bed.gz", 1, 2, 3, true, compression_level=1, levels=levels)
+      fbase = wopen_bgzi(prefix & ".per-base.bed.gz", 1, 2, 3, true,
+          compression_level = 1, levels = levels)
     #open(fbase, prefix & ".per-base.bed.gz", "w1")
   if quantize.len != 0:
-    fquantize = wopen_bgzi(prefix & ".quantized.bed.gz", 1, 2, 3, true, compression_level=1, levels=levels)
+    fquantize = wopen_bgzi(prefix & ".quantized.bed.gz", 1, 2, 3, true,
+        compression_level = 1, levels = levels)
 
   if thresholds.len != 0:
-    fthresholds = wopen_bgzi(prefix & ".thresholds.bed.gz", 1, 2, 3, true, compression_level=1, levels=levels)
+    fthresholds = wopen_bgzi(prefix & ".thresholds.bed.gz", 1, 2, 3, true,
+        compression_level = 1, levels = levels)
     fthresholds.write_header(thresholds)
 
   if not open(fh_global_dist, prefix & ".mosdepth.global.dist.txt", fmWrite):
@@ -622,35 +650,44 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
   if not open(fh_summary, prefix & ".mosdepth.summary.txt", fmWrite):
     stderr.write_line("[mosdepth] could not open file:", prefix & ".mosdepth.summary.txt")
 
-  if region != "" and not open(fh_region_dist, prefix & ".mosdepth.region.dist.txt", fmWrite):
+  if region != "" and not open(fh_region_dist, prefix &
+      ".mosdepth.region.dist.txt", fmWrite):
     stderr.write_line("[mosdepth] could not open file:", prefix & ".mosdepth.dist.txt")
 
   if region != "":
-    fregion = wopen_bgzi(prefix & ".regions.bed.gz", 1, 2, 3, true, levels=levels)
+    fregion = wopen_bgzi(prefix & ".regions.bed.gz", 1, 2, 3, true,
+        levels = levels)
     if region.isdigit():
       window = uint32(S.parse_int(region))
     else:
       bed_regions = bed_to_table(region)
   when NimMajor < 2:
-      shallow(arr)
+    shallow(arr)
 
-  var cs = initCountStat[uint32](size=if use_median: 65536 else: 0)
+  var cs = initCountStat[uint32](size = if use_median: 65536 else: 0)
 
   var ii = 0
   var last_tid = -1
 
   for target in sub_targets:
     if ii > 0 and ii mod 100_000 == 0:
-        stderr.write_line("[mosdepth] on contig:", ii, ". percent of contigs completed:", su.format_float(100 * ii/sub_targets.len, ffDecimal, precision=2))
+      stderr.write_line("[mosdepth] on contig:", ii,
+          ". percent of contigs completed:", su.format_float(100 *
+          ii/sub_targets.len, ffDecimal, precision = 2))
     ii += 1
-    zeroMem(chrom_global_distribution[0].addr, len(chrom_global_distribution) * sizeof(chrom_global_distribution[0]))
+    zeroMem(chrom_global_distribution[0].addr, len(chrom_global_distribution) *
+        sizeof(chrom_global_distribution[0]))
     if region != "":
-      zeroMem(chrom_region_distribution[0].addr, len(chrom_region_distribution) * sizeof(chrom_region_distribution[0]))
+      zeroMem(chrom_region_distribution[0].addr, len(
+          chrom_region_distribution) * sizeof(chrom_region_distribution[0]))
     # if we can skip per base and there's no regions from this chrom we can avoid coverage calc.
-    if skip_per_base and thresholds.len == 0 and quantize.len == 0 and bed_regions != nil and not bed_regions.contains(target.name):
+    if skip_per_base and thresholds.len == 0 and quantize.len == 0 and
+        bed_regions != nil and not bed_regions.contains(target.name):
       continue
     rchrom = region_t(chrom: target.name)
-    var tid = coverage(bam, arr, rchrom, targets, mapq, min_len, max_len, eflag, iflag, read_groups=read_groups, fast_mode=fast_mode, last_tid=last_tid)
+    var tid = coverage(bam, arr, rchrom, targets, mapq, min_len, max_len, eflag,
+        iflag, read_groups = read_groups, fast_mode = fast_mode, fragment_mode = fragment_mode,
+        last_tid = last_tid)
     if tid == -1: continue # -1 means that chrom is not even in the bam
     if tid != -2: # -2 means there were no reads in the bam
       arr.to_coverage()
@@ -662,18 +699,23 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
       for r in region_gen(window, target, bed_regions):
         if tid != -2:
           me = imean(arr, r.start, r.stop, cs)
-          chrom_region_stat = chrom_region_stat + newDepthStat(arr[r.start..<min(arr.len.uint32, r.stop)])
-        var m = su.format_float(me, ffDecimal, precision=precision)
+          chrom_region_stat = chrom_region_stat + newDepthStat(arr[
+              r.start..<min(arr.len.uint32, r.stop)])
+        var m = su.format_float(me, ffDecimal, precision = precision)
 
         if r.name == "":
-          line.add(starget & intToStr(int(r.start)) & "\t" & intToStr(int(r.stop)) & "\t" & m)
+          line.add(starget & intToStr(int(r.start)) & "\t" & intToStr(int(
+              r.stop)) & "\t" & m)
         else:
-          line.add(starget & intToStr(int(r.start)) & "\t" & intToStr(int(r.stop)) & "\t" & r.name & "\t" & m)
-        doAssert fregion.write_interval(line, target.name, int(r.start), int(r.stop)) >= 0
+          line.add(starget & intToStr(int(r.start)) & "\t" & intToStr(int(
+              r.stop)) & "\t" & r.name & "\t" & m)
+        doAssert fregion.write_interval(line, target.name, int(r.start), int(
+            r.stop)) >= 0
         line = line[0..<0]
         if tid != -2:
           if region.isdigit: #stores the aggregated coverage for each region when working with even windows across the genome
-            chrom_region_distribution[min(me.toInt,int64(len(chrom_region_distribution))-1)] += 1
+            chrom_region_distribution[min(me.toInt, int64(len(
+                chrom_region_distribution))-1)] += 1
           else: # stores the per-base coverage in each region specified in the bed file
             chrom_region_distribution.inc(arr, r.start, r.stop)
 
@@ -704,9 +746,11 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
       if tid == -2:
         when defined(d4):
           if use_d4:
-            fd4.write(target.name, @[Interval(left: 0'u32, right: target.length.uint32, value: 0'i32)])
+            fd4.write(target.name, @[Interval(left: 0'u32,
+                right: target.length.uint32, value: 0'i32)])
         else:
-          doAssert fbase.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t0", target.name, 0, int(target.length)) >= 0
+          doAssert fbase.write_interval(starget & "0\t" & intToStr(int(
+              target.length)) & "\t0", target.name, 0, int(target.length)) >= 0
       else:
         var write_fbase = true
         when defined(d4):
@@ -729,11 +773,15 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
     if quantize.len != 0:
       if tid == -2 and quantize[0] == 0:
         var lookup = make_lookup(quantize)
-        doAssert fquantize.write_interval(starget & "0\t" & intToStr(int(target.length)) & "\t" & lookup[0], target.name, 0, int(target.length)) >= 0
+        doAssert fquantize.write_interval(starget & "0\t" & intToStr(int(
+            target.length)) & "\t" & lookup[0], target.name, 0, int(
+            target.length)) >= 0
       else:
         if tid == -2: continue
         for p in gen_quantized(quantize, arr):
-            doAssert fquantize.write_interval(starget & intToStr(p.start) & "\t" & intToStr(p.stop) & "\t" & p.value, target.name, p.start, p.stop) >= 0
+          doAssert fquantize.write_interval(starget & intToStr(p.start) & "\t" &
+              intToStr(p.stop) & "\t" & p.value, target.name, p.start,
+              p.stop) >= 0
 
   write_summary("total", global_stat, fh_summary)
   if region != "":
@@ -746,27 +794,28 @@ proc main(bam: hts.Bam, chrom: region_t, mapq: int, min_len: int, max_len: int,
 
   if bed_regions != nil and chrom == nil:
     for chrom, regions in bed_regions:
-      stderr.write_line("[mosdepth] warning chromosome:", chrom, " from bed with " , len(regions), " regions not found")
+      stderr.write_line("[mosdepth] warning chromosome:", chrom,
+          " from bed with ", len(regions), " regions not found")
 
   if fregion != nil and close(fregion) != 0:
-      stderr.write_line("[mosdepth] error writing region file\n")
-      quit(1)
+    stderr.write_line("[mosdepth] error writing region file\n")
+    quit(1)
 
   if fquantize != nil and close(fquantize) != 0:
-      stderr.write_line("[mosdepth] error writing quantize file\n")
-      quit(1)
+    stderr.write_line("[mosdepth] error writing quantize file\n")
+    quit(1)
 
   if fthresholds != nil and close(fthresholds) != 0:
-      stderr.write_line("[mosdepth] error writing thresholds file\n")
-      quit(1)
+    stderr.write_line("[mosdepth] error writing thresholds file\n")
+    quit(1)
   when defined(d4):
     if use_d4:
       fd4.close
       doAssert index_build_sfi(prefix & ".per-base.d4")
 
   if fbase != nil and close(fbase) != 0:
-      stderr.write_line("[mosdepth] error writing per-base file\n")
-      quit(1)
+    stderr.write_line("[mosdepth] error writing per-base file\n")
+    quit(1)
   close(fh_global_dist)
 
 proc check_chrom(r: region_t, targets: seq[Target]) =
@@ -780,11 +829,11 @@ proc check_chrom(r: region_t, targets: seq[Target]) =
 proc threshold_args*(ts: string): seq[int] =
   if ts == "nil":
     return
-  result = map(ts.split(','), proc (s:string): int = return parse_int(s))
+  result = map(ts.split(','), proc (s: string): int = return parse_int(s))
   sort(result)
 
 
-proc check_cram_has_ref(cram_path: string, fasta:string) =
+proc check_cram_has_ref(cram_path: string, fasta: string) =
   if fasta != "" and fileExists(fasta):
     return
   if cram_path.ends_with(".cram"):
@@ -795,7 +844,7 @@ when(isMainModule):
   when not defined(release) and not defined(lto):
     stderr.write_line "[mosdepth] WARNING: built in debug mode; will be slow"
 
-  let version = "mosdepth 0.3.10"
+  let version = "mosdepth 0.3.11"
   let env_fasta = getEnv("REF_PATH")
   var doc = format("""
   $version
@@ -833,6 +882,7 @@ Other options:
   -F --flag <FLAG>                  exclude reads with any of the bits in FLAG set [default: 1796]
   -i --include-flag <FLAG>          only include reads with any of the bits in FLAG set. default is unset. [default: 0]
   -x --fast-mode                    dont look at internal cigar operations or correct mate overlaps (recommended for most use-cases).
+  -a --fragment-mode                count the coverage of the full fragment including the full insert (proper pairs only).
   -q --quantize <segments>          write quantized output see docs for description.
   -Q --mapq <mapq>                  mapping quality threshold. reads with a quality less than this value are ignored [default: 0]
   -l --min-frag-len <min-frag-len>  minimum insert size. reads with a smaller insert size than this are ignored [default: -1]
@@ -847,7 +897,7 @@ Other options:
 
   var args: Table[string, Value]
   try:
-    args = docopt(doc, version = version, quit=false)
+    args = docopt(doc, version = version, quit = false)
   except DocoptExit:
     echo (ref DocoptExit)(get_current_exception()).usage
     quit "error parsing arguments"
@@ -860,16 +910,21 @@ Other options:
   if max_len < min_len:
     stderr.write_line("[mosdepth] error --max-frag-len was lower than --min-frag-len.")
     quit(2)
-  
+
   var
     region: string
     thresholds: seq[int] = threshold_args($args["--thresholds"])
-    fast_mode:bool = args["--fast-mode"]
-    use_median:bool = args["--use-median"]
+    fast_mode: bool = args["--fast-mode"]
+    fragment_mode: bool = args["--fragment-mode"]
+    use_median: bool = args["--use-median"]
   when defined(d4):
-    var use_d4:bool = args["--d4"] and not args["--no-per-base"]
+    var use_d4: bool = args["--d4"] and not args["--no-per-base"]
   else:
-    var use_d4:bool = false
+    var use_d4: bool = false
+
+  if fragment_mode and fast_mode:
+    stderr.write_line("[mosdepth] error only one of --fast-mode and --fragment-mode can be specified")
+    quit(2)
 
   if $args["--by"] != "nil":
     region = $args["--by"]
@@ -887,16 +942,19 @@ Other options:
     iflag: uint16 = uint16(S.parse_int($args["--include-flag"]))
     threads = S.parse_int($args["--threads"])
     chrom = region_line_to_region($args["--chrom"])
-    bam:Bam
+    bam: Bam
   check_cram_has_ref($args["<BAM-or-CRAM>"], $args["--fasta"])
-  open(bam, $args["<BAM-or-CRAM>"], threads=threads, index=true, fai=fasta)
+  open(bam, $args["<BAM-or-CRAM>"], threads = threads, index = true, fai = fasta)
   if bam.idx == nil:
     stderr.write_line("[mosdepth] error alignment file must be indexed")
     quit(2)
 
-  var opts = SamField.SAM_FLAG.int or SamField.SAM_RNAME.int or SamField.SAM_POS.int or SamField.SAM_MAPQ.int or SamField.SAM_CIGAR.int or SamField.SAM_TLEN.int
+  var opts = SamField.SAM_FLAG.int or SamField.SAM_RNAME.int or
+      SamField.SAM_POS.int or SamField.SAM_MAPQ.int or SamField.SAM_CIGAR.int or
+      SamField.SAM_TLEN.int
   if not fast_mode:
-      opts = opts or SamField.SAM_QNAME.int or SamField.SAM_RNEXT.int or SamField.SAM_PNEXT.int #or SamField.SAM_TLEN.int
+    opts = opts or SamField.SAM_QNAME.int or SamField.SAM_RNEXT.int or
+        SamField.SAM_PNEXT.int #or SamField.SAM_TLEN.int
 
   if $args["--read-groups"] != "nil":
     opts = opts or SamField.SAM_RGAUX.int
@@ -905,4 +963,6 @@ Other options:
   discard bam.set_option(FormatOption.CRAM_OPT_DECODE_MD, 0)
   check_chrom(chrom, bam.hdr.targets)
 
-  main(bam, chrom, mapq, min_len, max_len, eflag, iflag, region, thresholds, fast_mode, args, use_median=use_median, use_d4=use_d4)
+  main(bam, chrom, mapq, min_len, max_len, eflag, iflag, region, thresholds,
+      fast_mode, args, use_median = use_median, fragment_mode = fragment_mode, use_d4 = use_d4)
+      


=====================================
mosdepth.nimble
=====================================
@@ -1,6 +1,6 @@
 # Package
 
-version       = "0.3.10"
+version       = "0.3.11"
 author        = "Brent Pedersen"
 description   = "fast depth"
 license       = "MIT"



View it on GitLab: https://salsa.debian.org/med-team/mosdepth/-/commit/9747000cdf85fc5971796df05d489f02d918ec49

-- 
View it on GitLab: https://salsa.debian.org/med-team/mosdepth/-/commit/9747000cdf85fc5971796df05d489f02d918ec49
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250925/3283941a/attachment-0001.htm>


More information about the debian-med-commit mailing list