[med-svn] [Git][med-team/metabat][master] 5 commits: New upstream version 2.17

Andreas Tille (@tille) gitlab at salsa.debian.org
Wed Nov 8 15:06:07 GMT 2023



Andreas Tille pushed to branch master at Debian Med / metabat


Commits:
ea192bc4 by Andreas Tille at 2023-11-08T15:13:57+01:00
New upstream version 2.17
- - - - -
0055d9f4 by Andreas Tille at 2023-11-08T15:13:57+01:00
routine-update: New upstream version

- - - - -
f9dd1de5 by Andreas Tille at 2023-11-08T15:13:58+01:00
Update upstream source from tag 'upstream/2.17'

Update to upstream version '2.17'
with Debian dir 1e63afc261816c0906e5de42a3cab26e36f5b9b9
- - - - -
f8063f7b by Andreas Tille at 2023-11-08T15:13:58+01:00
routine-update: Standards-Version: 4.6.2

- - - - -
ffd476cb by Andreas Tille at 2023-11-08T15:55:17+01:00
Update patch

- - - - -


24 changed files:

- .dockerignore
- .gitlab-ci.yml
- CMakeLists.txt
- Dockerfile
- Legal.txt
- README.md
- cmake/htslib.cmake
- debian/changelog
- debian/control
- debian/patches/use_debian_packaged_libs.patch
- + filter_sam_by_min_len.py
- license.txt
- + merge_abundances.py
- merge_depths.pl → merge_depths-DEPRECATED.pl
- runMetaBat.sh
- src/BamUtils.h
- src/IOThreadBuffer.h
- src/contigOverlaps.cpp
- src/jgi_summarize_bam_contig_depths.cpp
- src/jgi_summarize_bam_contig_depths.h
- src/metabat2.cpp
- src/metabat2.h
- test/CMakeLists.txt
- test/contigs.fa


Changes:

=====================================
.dockerignore
=====================================
@@ -9,7 +9,7 @@ jgi_summarize_bam_contig_depths
 contigOverlaps
 *.o
 .sconsign.dblite
-build/
+build*/
 samtools*
 /include/
 /lib/


=====================================
.gitlab-ci.yml
=====================================
@@ -7,6 +7,8 @@ services:
 
 before_script:
   - set -e
+  - apk update && apk add git
+  - git --version
   - export METABAT_VERSION=$(git describe --tags)
   - docker info
   - docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
@@ -22,7 +24,7 @@ build-master:
     - docker build -t "$CI_REGISTRY_IMAGE" -t "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG" -t "$DOCKERHUB_USER/metabat:$CI_COMMIT_SHA" -t "$DOCKERHUB_USER/metabat:latest" -t "$DOCKERHUB_USER/metabat:$METABAT_VERSION" .
     - docker push "$CI_REGISTRY_IMAGE"
     - docker login -u "$DOCKERHUB_USER" -p "$DOCKERHUB_PASSWORD" $DOCKERHUB_REGISTRY
-    - docker push $DOCKERHUB_USER/metabat
+    - docker push -a $DOCKERHUB_USER/metabat
   only:
     - master
 


=====================================
CMakeLists.txt
=====================================
@@ -11,11 +11,11 @@ include(${CMAKE_ROOT}/Modules/ExternalProject.cmake)
 include(cmake/zlib.cmake)
 include(cmake/htslib.cmake)
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD 17)
 set(CMAKE_C_STANDARD_REQUIRED ON)
 set(CMAKE_C_EXTENSIONS OFF)
 
@@ -48,7 +48,8 @@ if (NOT NO_TESTING)
     add_subdirectory(test)
 endif()
 
-install(PROGRAMS runMetaBat.sh merge_depths.pl aggregateBinDepths.pl aggregateContigOverlapsByBin.pl
+install(PROGRAMS runMetaBat.sh filter_sam_by_min_len.py aggregateBinDepths.pl aggregateContigOverlapsByBin.pl
+                 merge_abundances.py merge_depths-DEPRECATED.pl
         DESTINATION bin/
        )
 INSTALL(CODE "execute_process( \


=====================================
Dockerfile
=====================================
@@ -1,4 +1,4 @@
-FROM ubuntu:18.04 AS run-env
+FROM ubuntu:22.04 AS run-env
 
 LABEL Maintainer="Rob Egan<RSEgan at lbl.gov>"
 
@@ -8,7 +8,7 @@ WORKDIR /root
 ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update  && \
-    apt-get install -y libgomp1 && \
+    apt-get install -y libgomp1 python3 && \
     apt-get autoremove -y && \
     apt-get clean && \
     apt-get autoclean && \


=====================================
Legal.txt
=====================================
@@ -1,11 +1,11 @@
 *********
 
-MetaBAT Copyright (c) 2014, The Regents of the University of California, 
-through Lawrence Berkeley National Laboratory (subject to receipt of any 
-required approvals from the U.S. Dept. of Energy).  All rights reserved.
+MetaBAT v1.0 to v2.0 Copyright (c) 2014, The Regents of the University of
+California, through Lawrence Berkeley National Laboratory (subject to receipt of
+any required approvals from the U.S. Dept. of Energy).  All rights reserved.
 
-If you have questions about your rights to use or distribute this software, 
-please contact Berkeley Lab's technology transfer department at  TTD at lbl.gov 
+If you have questions about your rights to use or distribute this software,
+please contact Berkeley Lab's technology transfer department at IPO at lbl.gov
 referring to "MetaBAT (2014-075)."
 
 NOTICE.  This software was developed under funding from the U.S. Department of 


=====================================
README.md
=====================================
@@ -34,10 +34,12 @@ INSTALLATION (non-Docker):
 
 Requirements:
 
-* boost >= 1.59.0 (dev and libs for boost_graph, system, filesystem and serialization)
-* python >= 2.7
+* boost >= 1.59.0 (dev and libs for boost_graph, system, filesystem and serialization). For some linux distributions, if you get an error like this: "CMake Error at /usr/share/cmake/Modules/FindPackageHandleStandardArgs.cmake:230 (message):
+  Could NOT find Boost (missing: program_options filesystem system graph serialization iostreams) (found suitable version "1.66.0", minimum required is "1.55.0"). This means that you don't have the full boost library (just the c++ headers). You may want to build [boost]{https://www.boost.org/} yourself by following instructions specified in the section "Easy Build and Install". For Ubuntu, install libboost-all-dev instead of libboost-dev. 
+  python >= 2.7
 * cmake >= 3.8.2
 * gcc/g++ >= 4.9 or intel >= 18.0.1.163 or llvm >= 8.0
+* autoconf - autoheader, automake for support of HTSlib
 
 (htslib 1.9 is downloaded and installed automatically if not present on the system)
 
@@ -400,13 +402,13 @@ The output will look like the following:
 
 
 ```
-MetaBAT Copyright (c) 2014, The Regents of the University of California, 
-through Lawrence Berkeley National Laboratory (subject to receipt of any 
-required approvals from the U.S. Dept. of Energy).  All rights reserved.
+MetaBAT v1.0 to v2.0 Copyright (c) 2014, The Regents of the University of
+California, through Lawrence Berkeley National Laboratory (subject to receipt of
+any required approvals from the U.S. Dept. of Energy).  All rights reserved.
 
-If you have questions about your rights to use or distribute this software, 
-please contact Berkeley Lab's technology transfer department at  TTD at lbl.gov 
-referring to "MetaBAT (2014-075)."
+If you have questions about your rights to use or distribute this software,
+please contact Berkeley Lab's technology transfer department at IPO at lbl.gov
+referring to "MetaBAT (2014-075).
 
 NOTICE.  This software was developed under funding from the U.S. Department of 
 Energy.  As such, the U.S. Government has been granted for itself and others 


=====================================
cmake/htslib.cmake
=====================================
@@ -14,13 +14,13 @@ endif()
 ExternalProject_Add(htslib
     PREFIX ${htslib_PREFIX}
     GIT_REPOSITORY "https://github.com/samtools/htslib.git"
-    GIT_TAG "1.9"
+    GIT_TAG "1.17"
     UPDATE_COMMAND ""
     BUILD_IN_SOURCE 1
     #CONFIGURE_COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/contrib/htslib-prefix/src/htslib/configure"
     #CONFIGURE_COMMAND "autoheader"
     #CONFIGURE_COMMAND "autoconf"
-    CONFIGURE_COMMAND autoheader && autoconf && ./configure --disable-bz2 --disable-lzma --disable-libcurl
+    CONFIGURE_COMMAND autoheader && autoconf && autoreconf --install && ./configure --disable-bz2 --disable-lzma --disable-libcurl
     BUILD_COMMAND ${MAKE_COMMAND} lib-static
     INSTALL_COMMAND ${MAKE_COMMAND} install prefix=${htslib_INSTALL}
     LOG_DOWNLOAD 1


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+metabat (2.17-1) UNRELEASED; urgency=medium
+
+  * New upstream version
+  * Standards-Version: 4.6.2 (routine-update)
+
+ -- Andreas Tille <tille at debian.org>  Wed, 08 Nov 2023 15:13:57 +0100
+
 metabat (2.15-4) unstable; urgency=medium
 
   * Fix watch file


=====================================
debian/control
=====================================
@@ -15,7 +15,7 @@ Build-Depends: debhelper-compat (= 13),
                libhts-dev,
                pkg-config,
                python3
-Standards-Version: 4.6.1
+Standards-Version: 4.6.2
 Vcs-Browser: https://salsa.debian.org/med-team/metabat
 Vcs-Git: https://salsa.debian.org/med-team/metabat.git
 Homepage: https://bitbucket.org/berkeleylab/metabat/wiki/Home


=====================================
debian/patches/use_debian_packaged_libs.patch
=====================================
@@ -21,7 +21,7 @@ Last-Update: Thu, 28 May 2020 17:21:42 +0200
 +find_package(PkgConfig REQUIRED)
 +pkg_check_modules(HTSlib REQUIRED htslib)
  
- set(CMAKE_CXX_STANDARD 11)
+ set(CMAKE_CXX_STANDARD 17)
  set(CMAKE_CXX_STANDARD_REQUIRED ON)
 @@ -23,7 +25,13 @@ add_definitions(-D_XOPEN_SOURCE=700)
  


=====================================
filter_sam_by_min_len.py
=====================================
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+import gzip
+import argparse
+
+"""
+This program takes an excessively large assembly (with > 2^31 scaffolds) and its sam[.gz] files
+calculates the minimum_scaffold_length that <= max_num_scaffolds
+and produces a filtered assembly and filtered sam.gz file using the new subset assembly
+"""
+
+include_unmapped = False
+
+def maybe_gz_open(filename:str, mode:str):
+    if filename.endswith(".gz"):
+        return gzip.open(filename, mode + 't')
+    else:
+        return open(filename, mode)
+
+def fold(fasta):
+    return fasta
+
+class Header():
+    """
+    SAM header
+    """
+    def __init__(self):
+        self.hd = list() # list of @HD
+        self.sq = dict() # dict of @SQ (name: dict().  Ordering is important and maintained)
+        self.rg = list() # list of @RG
+        self.pg = list() # list of @PG
+        self.co = list() # list of @CO
+        self.histogram = dict()
+
+    def copy(self):
+        """Return a new header"""
+        header = Header()
+        header.hd = self.hd.copy()
+        header.sq = self.sq.copy()
+        header.rg = self.rg.copy()
+        header.pg = self.pg.copy()
+        header.co = self.co.copy()
+        return header
+
+    def add_line(self, line):
+        line = line.rstrip()
+        if line.startswith("@HD"):
+            self.hd.append(line)
+        elif line.startswith("@SQ"):
+            fields=line.split()
+            sq = dict()
+            for field in fields[1:]:
+                logger.debug(f"field {field} in line {line}")
+                k,v = field.split(":")
+                sq[k] = v
+            assert "SN" in sq
+            assert "LN" in sq
+
+            self.sq[sq["SN"]] = sq
+            length = int(sq["LN"])
+            if length not in self.histogram:
+                self.histogram[length] = 1
+            else:
+                self.histogram[length] += 1
+        elif line.startswith("@RG"):
+            self.rg.append(line)
+        elif line.startswith("@PG"):
+            self.pg.append(line)
+        elif line.startswith("@CO"):
+            self.co.append(line)
+        else:
+            logger.error(f"Unknown header line: {line}")
+            raise 
+
+    def dump(self, fh):
+        for line in self.hd:
+            print(line, file=fh)
+        for k,d in self.sq.items():
+            line = f"@SQ"
+            for k,v in d.items():
+                line += f"\t{k}:{v}"
+            print(line, file=fh)
+        for line in self.rg:
+            print(line, file=fh)
+        for line in self.pg:
+            print(line, file=fh)
+        for line in self.co:
+            print(line, file=fh)
+
+    def load_header(self, sam_fh) -> str:
+        """
+        Loads a header from a sam file handle
+        returns the first non-header line
+        """
+        logger.info(f"Reading header from sam file handle {sam_fh}")
+        lines = 0
+        for line in sam_fh:
+            if not line[0] == "@":
+                logger.info(f"Read {lines} header lines")
+                return line
+            self.add_line(line)
+            lines += 1
+        return None
+
+    def filter_min_len(self, min_len:int):
+        """
+        Returns a new Header (with sequences sorted largest to smallest)
+        returning only those scaffolds >= min_len
+        """
+        logger.info(f"Filtering {len(self.sq)} sequences with new header and min_len={min_len}")
+        new_header = self.copy()
+        new_sq = list()
+        for name,sq in new_header.sq.items():
+            if int(sq["LN"]) >= min_len:
+                new_sq.append(sq)
+        logger.info(f"Filtered {len(new_sq)} sequences with min_len={min_len} from {len(new_header.sq)}")
+        new_sq.sort(key=lambda x: int(x["LN"]), reverse=True)
+        logger.info(f"Sorted first is {new_sq[0]} last is {new_sq[-1]}")
+        new_header.sq = dict()
+        for sq in new_sq:
+            new_header.sq[sq["SN"]] = sq
+        return new_header
+
+    def calculate_min_len(self, max_num_scaffolds:int = None) -> int:
+        """Calculates the longest min_len that results in <= max_num_scaffolds"""
+        if max_num_scaffolds is None:
+            max_num_scaffolds = 2147483647
+        logger.info(f"Finding the min_len that leaves at most {max_num_scaffolds}, starting with {len(self.sq)}")
+        min_len = 0
+        bins = sorted(self.histogram.keys())
+        stop = 2 ** 31 -1
+        if len(bins):
+            stop = bins[-1]
+        logger.info(f"bins={self.histogram}")
+        num = len(self.sq)
+        while num > max_num_scaffolds:
+            if min_len > stop:
+                break
+            if min_len in self.histogram:
+                num -= self.histogram[min_len]
+            min_len += 1
+        logger.info(f"Found that min_len={min_len} yields {num} scaffolds out of {len(self.sq)}")
+        return min_len
+
+    def filter_sam(self, sam_file_name:str, new_file_name:str):
+        """
+        Writes a new sam file replacing the header and filtering out lines not covered by the new header
+        """
+        
+        global include_unmapped
+        is_good = True
+        out_fh = maybe_gz_open(new_file_name, 'w')
+        pass_records = 0
+        records = 0
+        logger.info(f"Filtering SAM {sam_file_name} writing to {new_file_name}")
+        with maybe_gz_open(sam_file_name, 'r') as in_fh:
+            old_header = Header()
+            line = old_header.load_header(in_fh)
+            for name,sq in self.sq.items():
+                if name not in old_header.sq:
+                    logger.warning(f"No entry of {name} from {sam_file_name} is in this header")
+                    is_good = False
+            if not is_good:
+                logger.warning(f"Please ensure {sam_file_name} is from the same assembly")
+                raise
+            self.dump(out_fh)
+            while line is not None and len(line) > 0:
+                fields = line.split()
+                #logger.debug(f"processing {fields}")
+                if len(fields) < 3:
+                    logger.warning(f"what is {line} {fields}")
+                    continue
+                if fields[2] is None or fields[2] == "*":
+                    if include_unmapped:
+                        print(line, file=out_fh)
+                        pass_records += 1
+                elif fields[2] in self.sq:
+                    print(line, file=out_fh)
+                    pass_records += 1
+                records += 1
+                line = in_fh.readline().rstrip()
+        logger.info(f"Filtered {pass_records} of {records}")
+        out_fh.close()
+
+    def filter_assembly(self, assem_file_name:str, new_assem_file_name:str):
+        """
+        Prints the filtered assembly in the same order as this header
+        Skipping any records not in this header
+        """
+        logger.info(f"Filtering assembly {assem_file_name} saving to {new_assem_file_name}")
+        printable = dict()
+        out_fh = maybe_gz_open(new_assem_file_name, 'w')
+        printed = 0
+        with maybe_gz_open(assem_file_name, 'r') as in_fh:
+            # print in the correct order
+            last_n = ""
+            name = ""
+            fasta = ""
+            print_it = False
+            line_num = 0
+            for tgt,sq in self.sq.items():
+                logger.debug(f"Looking for {tgt} at {line_num}")
+                if tgt in printable:
+                    logger.debug(f"Found {tgt} before {line_num}")
+                    print(f"{printable[tgt]}", file=out_fh)
+                    del printable[tgt]
+                    printed += 1
+                    continue
+
+                # read and remember file until tgt is found
+                for l in in_fh:
+                    l = l.rstrip()
+                    line_num += 1
+                    if l[0] == ">":
+                        fields = l.split()
+                        n = fields[0][1:] # name minus '>' prefix and anything after the first whitespace
+                        if tgt == last_n:
+                            assert print_it
+                            print(f"{name}\n{fold(fasta)}", file=out_fh)
+                            logger.debug(f"Hit {tgt} at {line_num}")
+                            printed += 1
+                            name = l
+                            last_n = n
+                            fasta = ""
+                            print_it = n in self.sq
+                            break
+                        elif print_it:
+                            printable[last_n] = f"{name}\n{fold(fasta)}"
+                            logger.debug(f"Recorded {last_n} at {line_num}")
+                        name = l
+                        last_n = n
+                        fasta = ""
+
+                        if n in self.sq:
+                            print_it = True
+                            name = l
+                            fasta = ""
+                            logger.debug(f"Keeping {self.sq[n]}")
+                        else:
+                            print_it = False
+                            name = None
+                            fasta = ""
+                            logger.debug(f"Skipping {n}")
+                    else:
+                        fasta += l
+            if print_it:
+                printable[last_n] = f"{name}\n{fasta}"
+
+        if printed != len(self.sq):
+            logger.warning(f"Should not get here!! name={name} last_n={last_n} printed={printed} seqs={len(self.sq)}")
+                
+        out_fh.close()
+            
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser(add_help=True, prog = "filter_min_len.py", description = """
+    Filters SAM[.gz] and assembly.fa[.gz] files where all references have a minimum length
+    Optionally determines the optimal min_len to conform to the BAM standard (<=2^31-1 scaffolds)
+    """, epilog="")
+    argparser.add_argument("--assembly", default=None, type=str, help="Optional. The assembly file to filter")
+    argparser.add_argument("--min-len", default=None, type=int, help="If not set (default), then determine the optimal minimum scaffold length")
+    argparser.add_argument("--max-scaffolds", default=None, type=int, help="If not set (default), then use the maximum allowd by the BAM sepcification")
+    argparser.add_argument("--gzip-output", action='store_true', help="If set then the filtered files will be compressed")
+    argparser.add_argument("sam", nargs='+', type=str, help="one or more sam files to process serially")
+    options, unknown_options = argparser.parse_known_args()
+    logger.info(f"options {options} unknown {unknown_options}")
+
+    if len(options.sam) == 0:
+        logger.warning(f"You must specify at least one SAM file")
+        raise
+    
+    initial_header = Header()
+    sam_fh = maybe_gz_open(options.sam[0], 'r')
+    initial_header.load_header(sam_fh)
+    sam_fh.close()
+
+    min_len = options.min_len
+    if min_len is None:
+        min_len = initial_header.calculate_min_len(options.max_scaffolds)
+    else:
+        min_len = int(min_len)
+
+    suffix = ".gz" if options.gzip_output else ""
+    new_header = initial_header.filter_min_len(min_len)
+    if options.assembly:
+        new_header.filter_assembly(options.assembly, f"{options.assembly}-min{min_len}.fa{suffix}")
+
+    for sam in options.sam:
+        new_header.filter_sam(sam, f"{sam}-min{min_len}.sam{suffix}")
+    
+
+


=====================================
license.txt
=====================================
@@ -1,8 +1,8 @@
 **********
 
-MetaBAT (2014-075), The Regents of the University of California, through 
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from the U.S. Dept. of Energy).  All rights reserved.
+MetaBAT v1.0 to v2.0 Copyright (c) 2014, The Regents of the University of
+California, through Lawrence Berkeley National Laboratory (subject to receipt of
+any required approvals from the U.S. Dept. of Energy).  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without 
 modification, are permitted provided that the following conditions are met:


=====================================
merge_abundances.py
=====================================
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+"""
+This simple script combines multiple abundance files, generated by multiple executions of jgi_summarize_bam_contig_depths
+on some subset of the total bam files.
+This script will error if the columns between abundance files are named the same.
+This script will error if the order or name or length of the contig rows is different between the abundance files,
+as it is required that contigs are identical and against the same assembly fasta
+
+Each file must have the same format and be tab separated:
+contigName	contigLen	totalAvgDepth	contigs-1000.fastq.bam	contigs-1000.fastq.bam-var ...
+NODE_1_length_5374_cov_8.558988	5404	14.2158	14.2158	16.817 ...
+
+"""
+
+import sys
+
+def merge_files(outputFile, inputFiles):
+    isTitle = True
+    out = open(outputFile, "w")
+    inputs = []
+    for inputFile in inputFiles:
+        inputs.append(open(inputFile, "r"))
+    isEOF = False 
+    headers = dict()
+    while not isEOF:
+        filen = 0
+        contig = None
+        contigLen = None
+        total = 0.0
+        vals = []
+        for input_fh in inputs:
+            filen += 1
+            inputLine = input_fh.readline()
+            inputLine = inputLine.rstrip('\n')
+            if not inputLine:
+                if filen == 1:
+                    isEOF = True
+                elif not isEOF:
+                    raise(ValueError(f"input #{filen}({inputFiles[filen-1]}) has fewer lines than the first!"))
+                continue
+            if isEOF:
+                raise(ValueError(f"input #{filen}({inputFiles[filen-1]}) has more lines than the first!"))
+            input = inputLine.split('\t')
+            if len(input) < 5 or len(input) % 2 != 1:
+                raise(ValueError(f"Expected at least 5 tab-separated columns in input #{filen}({inputFiles[filen-1]}) and an odd number, got {len(input)} :'{inputLine}'"))
+            if contig is None:
+                contig = input[0]
+                contigLen = input[1]
+                if isTitle:
+                    total = input[2]
+            elif contig != input[0] or contigLen != input[1]:
+                raise(ValueError(f"Invalid contig ({contig}) or contigLen ({contigLen}) in input #{filen}({inputFiles[filen-1]}): '{input}'"))
+            if isTitle and total != input[2]:
+                raise(ValueError(f"Invalid abundance file in input #{filen}({inputFiles[filen-1]}) expected 3rd column to be '{total}' not '{input[2]}'"))
+            col=3
+             
+            while col < len(input)-1:
+                if not isTitle:
+                    total += float(input[col])
+                else:
+                    if input[col] in headers:
+                        raise(ValueError(f"Detected duplicate column {input[col]} in input #{filen}({inputFiles[filen-1]})"))
+                    headers[input[col]] = 1
+                vals.append(input[col])
+                vals.append(input[col+1]) 
+                col+=2
+        if contig is not None:
+            cols = [contig, contigLen, "totalAvgDepth" if isTitle else total ]
+            cols.extend(vals)     
+            out.write("\t".join(map(str,cols)))
+            out.write("\n")
+        isTitle = False
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print("Usage: merge_abundances.py output.txt input.txt ...")
+        exit(1) 
+    merge_files(sys.argv[1], sys.argv[2:])


=====================================
merge_depths.pl → merge_depths-DEPRECATED.pl
=====================================


=====================================
runMetaBat.sh
=====================================
@@ -106,9 +106,9 @@ if [ ! -f "${depth}" ] && ln -s "$(uname -n) $$" $lock
 then
     if [ ! -f "${depth}" ]
     then
-        sumopts="--outputDepth ${depth} --percentIdentity ${PCTID} --minContigLength 1000 --minContigDepth ${MINDEPTH} ${badmapopts} --referenceFasta ${assembly}"
+        sumopts="--outputDepth ${depth}.tmp --percentIdentity ${PCTID} --minContigLength 1000 --minContigDepth ${MINDEPTH} ${badmapopts} --referenceFasta ${assembly}"
         echo "Executing: '$SUM $sumopts $@' at `date`"
-        $SUM $sumopts $@
+        $SUM $sumopts $@ && mv ${depth}.tmp ${depth} || false
         echo "Finished $SUM at `date`"
     fi
 


=====================================
src/BamUtils.h
=====================================
@@ -9,10 +9,15 @@
 #include <vector>
 #include <stdint.h>
 
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/functional/hash.hpp>
+#include <unordered_map>
+#include <unordered_set>
+#include <memory>
+#include <functional>
+
+using std::unordered_map;
+using std::unordered_set;
+using std::shared_ptr;
+using std::hash;
 
 class BamFile;
 
@@ -65,9 +70,16 @@ class BamFile;
 
 using namespace std;
 
-typedef boost::shared_ptr<samfile_t> SamFileT;
-typedef boost::shared_ptr<bam_header_t> BamHeaderT;
-typedef boost::shared_ptr<bam_index_t> BamIndexT;
+typedef shared_ptr<samfile_t> SamFileT;
+typedef shared_ptr<bam_header_t> BamHeaderT;
+typedef shared_ptr<bam_index_t> BamIndexT;
+
+template <typename T, typename... Rest>
+void hash_combine(std::size_t& seed, const T& v, const Rest&... rest)
+{
+    seed ^= std::hash<T>{}(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+    (hash_combine(seed, rest), ...);
+};
 
 class BamCache {
 public:
@@ -75,13 +87,28 @@ public:
 	BamCache(int maxSize = 32) {
 		cached.reserve(maxSize);
 	}
+	BamCache(const BamCache &copy) = delete;
+	BamCache(BamCache &&move) {
+		this->swap(move);
+		move.cached.clear();
+	}
+	BamCache &operator=(const BamCache &copy) = delete;
+	BamCache &operator=(BamCache &&move) {
+		this->swap(move);
+		move.cached.clear();
+		return *this;
+	}
 	~BamCache() {
 		clear();
 	}
+	void swap(BamCache &other) {
+		cached.swap(other.cached);
+	}
 	void clear() {
 		for(BamVector::iterator it = cached.begin(); it != cached.end(); it++) {
 			//cerr << "BamCache::clear() destroy: " << bam1_qname(*it) << "," << (*it)->core.flag << " " << (long) *it << endl;
-			bam_destroy1(*it);
+			if (*it) bam_destroy1(*it);
+			*it = NULL;
 		}
 		cached.clear();
 	}
@@ -94,8 +121,10 @@ public:
 			bam = bam_init1();
 			//cerr << "BamCache::getBam() init : " << (long) bam  << endl;
 		}
-		if (copy != NULL)
-			bam_copy1(bam, copy);
+		if (copy != NULL) {
+			bam1_t *ret = bam_copy1(bam, copy);
+			assert(ret == bam);
+		}
 		return bam;
 	}
 	void putBam(bam1_t *bam) {
@@ -148,17 +177,39 @@ public:
 			throw e;
 		}
 	}
+	BamFile(const BamFile &copy) = delete;
+	BamFile(BamFile &&mv) {
+		this->close();
+		this->swap(mv);
+	}
+    BamFile &operator=(const BamFile *copy) = delete;
+	BamFile &operator=(BamFile &&mv) {
+		this->swap(mv);
+		mv.close();
+		return *this;
+	}
 	~BamFile() {
 		close();
 	}
+	void swap(BamFile &other) {
+		std::swap(samfile, other.samfile);
+		std::swap(header, other.header);
+		std::swap(index, other.index);
+		std::swap(filePath, other.filePath);
+		std::swap(bamName, other.bamName);
+		std::swap(start, other.start);
+	}
 
 	void close() {
-#pragma omp critical (BamFileClose)
+#pragma omp critical (BamFile)
 		{
 			index.reset();
 			header.reset();
 			samfile.reset();
+			bamCache.clear();
 		}
+		filePath.clear();
+		bamName.clear();
 		start = 0;
 	}
 #ifdef LEGACY_SAMTOOLS
@@ -195,7 +246,7 @@ private:
 	int64_t start;
 
 	void open(bool loadIdx) {
-		cerr << omp_get_thread_num() << ": Opening bam: " << filePath << "\n";
+		//cerr << omp_get_thread_num() << ": Opening bam: " << filePath << "\n";
 		samfile = getSamFileT( samopen(filePath.c_str(), "rb", 0) );
 		if (samfile == NULL) {
 			cerr << omp_get_thread_num() << "ERROR: Could not open bam: " << filePath << "\n";
@@ -238,11 +289,21 @@ class NameBamMap {
 	// upon erase, a hash of the name is remembered so further insertions of the same name will not occur
 	// bam1_t are duplicated and destroyed on insert & erase respectively
 public:
-	typedef boost::unordered_map< string, bam1_t * > NameBamMapType;
-	typedef boost::unordered_set< std::size_t > NameHashSetType;
+	typedef unordered_map< string, bam1_t * > NameBamMapType;
+	typedef unordered_set< std::size_t > NameHashSetType;
 	typedef NameBamMapType::iterator iterator;
 	typedef NameBamMapType::value_type value_type;
 	NameBamMap(BamNameTrackingChooser *_trackName = NULL) : bamCache(32*1024), trackName(_trackName) {}
+	NameBamMap(const NameBamMap &copy) = delete;
+	NameBamMap(NameBamMap &&mv)
+		: map(std::move(mv.map)), string_hash(mv.string_hash),
+			bamCache(std::move(mv.bamCache)), trackName(mv.trackName) {
+		mv.map.clear();
+		mv.bamCache.clear();
+		mv.trackName = nullptr;
+	}
+    NameBamMap &operator=(const NameBamMap &copy) = delete;
+	NameBamMap &operator=(NameBamMap &&move) = default;
 	~NameBamMap() {
 		if (trackName != NULL)
 			delete trackName;
@@ -268,7 +329,8 @@ public:
 		} else  {
 			if (existing != map.end()) {
 				//cerr << "Replacing " << bam1_qname(existing->second) << "," << existing->second->core.flag << " with " << bam1_qname(bam) << "," << bam->core.flag << endl;
-				bam_copy1(existing->second, bam);
+				bam1_t *ret = bam_copy1(existing->second, bam);
+				assert(ret == existing->second);
 				return existing;
 			} else {
 				if (trackName == NULL || !trackName->unsupportedRead(bam)) {
@@ -332,7 +394,7 @@ public:
 	}
 private:
 	NameBamMapType map;
-	boost::hash<std::string> string_hash;
+	std::hash<std::string> string_hash;
 	BamCache bamCache;
 	BamNameTrackingChooser *trackName;
 };
@@ -341,6 +403,27 @@ class BamUtils {
 public:
 	typedef vector< string > StringVector;
 
+	static bool validateHeader(const BamHeaderT &ref, BamHeaderT &test, string ref_name = string(), string test_name = string()) {
+		if (ref->n_targets != test->n_targets) {
+			cerr << "ERROR: validateHeader - Header count mismatch (" << ref->n_targets << " vs " << test->n_targets << ") between bam files " << ref_name << " and " << test_name << endl;
+			return false;
+		}
+		for(int32_t i = 0; i < ref->n_targets; i++) {
+			if (strcmp( ref->target_name[i], test->target_name[i]) != 0) {
+				cerr << "ERROR: validateHeader - sequence name mismatch at " << i << " (" << ref->target_name[i] << " vs " << test->target_name[i] << ") between bam files" << ref_name << " and " << test_name << endl;
+				return false;
+			}
+			if (ref->target_len[i] != test->target_len[i]) {
+				cerr << "ERROR: validateHeader - sequence size mismatch at " << i << ", " << ref->target_name[i] << " (" << ref->target_len[i] << " vs " << test->target_len[i] << ") between bam files" << ref_name << " and " << test_name << endl;
+				return false;
+			}
+		}
+		// they are the same, consolidate
+#pragma omp critical (BamFile)
+		test = ref;
+		return true;
+	}
+
 	// collapse two identical headers into one memory structure
 	static bool validateHeader(const BamFile &src, BamFile &test, bool nullOK = false) {
 		// assure that src and test have exactly the same header
@@ -352,32 +435,24 @@ public:
 		}
 		assert(src.header != NULL);
 
-		if (src.header->n_targets != test.header->n_targets) {
-			cerr << "ERROR: validateHeader - Header count mismatch (" << src.header->n_targets << " vs " << test.header->n_targets << ") between bam files " << src.getBamName().c_str() << " and " << test.getBamName().c_str() << endl;
-			return false;
-		}
-		for(int32_t i = 0; i < src.header->n_targets; i++) {
-			if (strcmp( src.header->target_name[i], test.header->target_name[i]) != 0) {
-				cerr << "ERROR: validateHeader - sequence name mismatch at " << i << " (" << src.header->target_name[i] << " vs " << test.header->target_name[i] << ") between bam files" << src.getBamName().c_str() << " and " << test.getBamName().c_str() << endl;
-				return false;
-			}
-			if (src.header->target_len[i] != test.header->target_len[i]) {
-				cerr << "ERROR: validateHeader - sequence size mismatch at " << i << ", " << src.header->target_name[i] << " (" << src.header->target_len[i] << " vs " << test.header->target_len[i] << ") between bam files" << src.getBamName().c_str() << " and " << test.getBamName().c_str() << endl;
-				return false;
-			}
-		}
-
-		test.header = src.header;
+		bool is_valid = validateHeader(src.header, test.header, src.getBamName(), test.getBamName());
+		
 #ifdef LEGACY_SAMTOOLS
 		test.samfile->header = test.header.get();
 #endif
-		return true;
+		return is_valid;
 	}
 
 	// open the bam, header and index
-	static BamFile openBam(const string bamPath, bool loadIdx = true) {
+	static BamFile openBam(const string bamPath, bool loadIdx = true, const BamHeaderT referenceHeader = {}) {
 		BamFile bam(bamPath, loadIdx);
 		assert(bam.header.get() != NULL);
+		if (referenceHeader) {
+			if (!validateHeader(referenceHeader, bam.header, "First", bamPath)) {
+				cerr << "ERROR new bam file has a header differing from the reference: " << bamPath << endl;
+				exit(1);
+			}
+		}
 		return bam;
 	}
 
@@ -432,6 +507,16 @@ public:
 		return bams[0].header;
 	}
 
+	static void preprocessContigNames(BamHeaderT header) {
+		// exercise the name2tid lookups that get populated while parsing the alignmnts
+		// this should reduce the overall memory footprint
+		for(int tid = 0; tid < header->n_targets; tid++) {
+			const char *name = sam_hdr_tid2name(header.get(), tid);
+			int tid2 = sam_hdr_name2tid(header.get(), name);
+			assert(tid == tid2);
+		}
+	}
+
 	static void clearPair(bam1_t *b) {
 		b->core.flag &= ~(BAM_FPAIRED|BAM_FPROPER_PAIR|BAM_FREAD2);
 		b->core.flag &= BAM_FREAD1;
@@ -579,7 +664,8 @@ private:
 		_findPairInFileData *data = (_findPairInFileData*) _data;
 		const string orphanName = getBaseName(orphan);
 		if (data->name.compare( orphanName ) == 0 && data->bam->core.flag != orphan->core.flag) {
-			bam_copy1(data->mate, orphan);
+			bam1_t *ret = bam_copy1(data->mate, orphan);
+			assert(ret == data->mate);
 			data->success = true;
 			return 1;
 		} else {
@@ -755,11 +841,11 @@ public:
                 std::size_t seed = 0;
                 char *name = bam1_qname(bam);
                 while (*name != '\0') {
-                        boost::hash_combine(seed, *name);
+                        hash_combine(seed, *name);
                         name++;
                 }
                 if ((bam->core.flag & (BAM_FPAIRED|BAM_FREAD2)) != 0)
-                        boost::hash_combine(seed, 2);
+                        hash_combine(seed, 2);
                 return seed;
         }
 


=====================================
src/IOThreadBuffer.h
=====================================
@@ -14,13 +14,13 @@
 #include <string>
 #include <sstream>
 
-#include <boost/shared_ptr.hpp>
+#include <memory>
 
 #include "OpenMP.h"
 
 class IOThreadBuffer {
 public:
-        typedef std::vector< boost::shared_ptr< std::stringstream > > ThreadBuffers;
+        typedef std::vector< std::shared_ptr< std::stringstream > > ThreadBuffers;
 	static const int FLUSH_TRIGGER = 1024*1024;
 private:
         static ThreadBuffers &_get(std::ostream &os) {


=====================================
src/contigOverlaps.cpp
=====================================
@@ -14,9 +14,9 @@
 #include <algorithm>
 #include <map>
 
-#include <boost/unordered_map.hpp>
-#include <boost/thread/thread.hpp>
-#include <boost/thread/mutex.hpp>
+#include <unordered_map>
+#include <thread>
+#include <mutex>
 #include <boost/ptr_container/ptr_vector.hpp>
 
 #include "metabat_version.h"
@@ -45,7 +45,7 @@ void abortMe(string msg) {
 	exit(1);
 }
 
-typedef boost::unordered_map< long, int > ReadhashContigMap;
+typedef std::unordered_map< long, int > ReadhashContigMap;
 typedef std::vector< int > ContigCountVector;
 typedef std::map< int, int > ContigCountMap;
 typedef std::vector< ContigCountMap > Contig2ContigCountMap;
@@ -183,20 +183,20 @@ int main(int argc, char *argv[]) {
 	cerr << "Allocating contig2contigCountMapMatrix" << std::endl;
 
 	Contig2ContigCountMap **contig2contigCountMapMatrix = new Contig2ContigCountMapPtr[numAssemblies];
-	boost::ptr_vector< boost::ptr_vector< boost::ptr_vector < boost::mutex > > > contig2contigCountMutexes;
+	boost::ptr_vector< boost::ptr_vector< boost::ptr_vector < std::mutex > > > contig2contigCountMutexes;
 	contig2contigCountMutexes.resize( numAssemblies );
 	for(int i = 0; i < numAssemblies; i++) {
 		contig2contigCountMapMatrix[i] = new Contig2ContigCountMap[numAssemblies];
-		contig2contigCountMutexes.push_back( new boost::ptr_vector< boost::ptr_vector < boost::mutex > >() );
+		contig2contigCountMutexes.push_back( new boost::ptr_vector< boost::ptr_vector < std::mutex > >() );
 		for(int j = 0; j < numAssemblies; j++) {
 
-			contig2contigCountMutexes[i].push_back( new boost::ptr_vector < boost::mutex >() );
+			contig2contigCountMutexes[i].push_back( new boost::ptr_vector < std::mutex >() );
 			if (i == j) continue;
 
 			contig2contigCountMapMatrix[i][j].resize( headers[i]->n_targets );
 
 			for(int k = 0; k < (int) contigCountArray[i].size(); k++) {
-				contig2contigCountMutexes[i][j].push_back( new boost::mutex() );
+				contig2contigCountMutexes[i][j].push_back( new std::mutex() );
 			}
 		}
 	}
@@ -218,7 +218,7 @@ int main(int argc, char *argv[]) {
 			int bytesRead;
 			bam1_t *b = bam_init1();;
 
-			boost::hash<string> hasher;
+			std::hash<string> hasher;
 #ifdef LEGACY_SAMTOOLS
 			while ((bytesRead = samread(myBam, b)) > 0) {
 #else
@@ -271,7 +271,7 @@ int main(int argc, char *argv[]) {
 					int contigi = iti->second;
 
 					// protect CountMap from multiple threaded inserts;
-					boost::mutex::scoped_lock lock( contig2contigCountMutexes[i][j][contigi] );
+					std::scoped_lock<std::mutex> lock( contig2contigCountMutexes[i][j][contigi] );
 					if (itj != readContigj.end()) {
 						// found matching read contigi to contigj
 						int contigj = itj->second;


=====================================
src/jgi_summarize_bam_contig_depths.cpp
=====================================
@@ -2,6 +2,9 @@
 // g++ -g -O3 -Wall -I$BOOST_DIR/include -I$SAMTOOLS_DIR/include/bam -L$SAMTOOLS_DIR/lib -o jgi_summarize_bam_contig_depths jgi_summarize_bam_contig_depths.cpp -lpthread -lz -lbam -fopenmp
 
 #include <cassert>
+#include <mutex>
+#include <condition_variable>
+
 #include "jgi_summarize_bam_contig_depths.h"
 #include "KseqReader.h"
 #include "RunningStats.h"
@@ -31,6 +34,7 @@ static struct option long_options[] = {
 		{"outputGC", 1, 0, 0},
 		{"gcWindow", 1, 0, 0},
 		{"outputKmers", 1, 0, 0},
+		{"checkpoint", 0, 0, 0}
 };
 
 void usage() {
@@ -38,6 +42,7 @@ void usage() {
 			"Usage: jgi_summarize_bam_contig_depths <options> sortedBam1 [ sortedBam2 ...]\n"
 			"where options include:\n"
 			"\t--outputDepth       arg  The file to put the contig by bam depth matrix (default: STDOUT)\n"
+			"\t--checkpoint             The write checkpoints for every bam processed (default: off)\n"
 			"\t--percentIdentity   arg  The minimum end-to-end %% identity of qualifying reads (default: 97)\n"
 			"\t--pairedContigs     arg  The file to output the sparse matrix of contigs which paired reads span (default: none)\n"
 			"\t--unmappedFastq     arg  The prefix to output unmapped reads from each bam file suffixed by 'bamfile.bam.fastq.gz'\n"
@@ -66,59 +71,127 @@ void abortMe(string msg) {
 	exit(1);
 }
 
-void printDepthTable(ostream &of,const BamFileVector& bams,
-		bool intraDepthVariance, const CountTypeMatrix& bamContigDepths,
-		const vector<int>& averageReadSize, float percentIdentity,
-		const VarianceTypeMatrix& bamContigVariances,
-		bam_header_t* header,
-		BoolVector &contigLengthPass, BoolVector &contigDepthPass, float minContigDepth, bool includeEdgeBases = false, int maxEdgeBases = 0) {
+bool file_exists(const string fname) {
+  ifstream ifs(fname, std::ios_base::binary);
+  return ifs.is_open();
+}
+
+string get_basename(const string fname) {
+	string basename(fname);
+	auto pos = basename.find_last_of('/');
+	if (pos != string::npos) basename = basename.substr(pos+1);
+	pos = basename.find_last_of('.');
+	if (pos != string::npos) basename = basename.substr(0, pos);
+	return basename;
+}
 
+void printDepthTableHeader(ostream &of, const vector<string>& names, bool intraDepthVariance) {
 	of << "contigName\tcontigLen\ttotalAvgDepth";
-	for (int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
-		of << "\t" << bams[bamIdx].getBamName();
+	for (int bamIdx = 0; bamIdx < (int) names.size(); bamIdx++) {
+		string shortName = get_basename(names[bamIdx]);
+		of << "\t" << shortName;
 		if (intraDepthVariance) {
-			of << "\t" << bams[bamIdx].getBamName() << "-var";
+			of << "\t" << shortName << "-var";
 		}
 	}
 	of << "\n";
+}
 
-	std::vector<float> averageDepths(bams.size(), 0.0);
-	for (int32_t contigIdx = 0; contigIdx < header->n_targets; contigIdx++) {
-		CountType sum = 0;
-		if (!contigLengthPass[contigIdx])
-			continue;
+class ContigDepth {
+public:
+  int numSamples, contigLen;
+  bool includeEdgeBases;
+  int maxEdgeBases;
+  double mean, variance;
+  double weightedSum = 0.0;
+  uint64_t totalCorrectedLength = 0;
+  vector<double> means;
+  vector<double> variances;
+  ContigDepth(int numSamples, int contigLen, bool includeEdgeBases,
+              int maxEdgeBases, bool intraDepthVariance)
+      : numSamples(numSamples), contigLen(contigLen),
+        includeEdgeBases(includeEdgeBases),
+        maxEdgeBases(maxEdgeBases), mean{}, variance{}, weightedSum{},
+        totalCorrectedLength{}, means{}, variances{} {
+    means.reserve(numSamples);
+    if (intraDepthVariance)
+      variances.resize(numSamples);
+  }
+  void addDepth(int averageReadSize, CountType contigDepth) {
+    if (!variances.empty())
+      throw;
+    auto correctedLen = getCorrectedLen(averageReadSize);
+    totalCorrectedLength += correctedLen;
+    double mean = contigDepth / correctedLen;
+    weightedSum += mean * correctedLen;
+    means.push_back(mean);
+  }
+  void addDepth(int averageReadSize, VarianceType variance) {
+    if (variances.empty())
+      throw;
+    auto correctedLen = getCorrectedLen(averageReadSize);
+    totalCorrectedLength += correctedLen;
+    variances[means.size()] = variance.variance;
+    means.push_back(variance.mean);
+    weightedSum += variance.mean * correctedLen;
+  }
+  int getCorrectedLen(int averageReadSize) {
+    int correctedLen =
+        (includeEdgeBases |
+         (2 * std::min(maxEdgeBases, averageReadSize) >= contigLen))
+            ? contigLen
+            : contigLen - (2 * std::min(maxEdgeBases, averageReadSize));
+    assert(correctedLen > 0);
+    return correctedLen;
+  }
+  double getTotalCorrectedDepth() const {
+    double totalCorrectedDepth = 0.0;
+    if (totalCorrectedLength > 0.0) {
+      totalCorrectedDepth = weightedSum / totalCorrectedLength;
+    }
+    totalCorrectedDepth *= numSamples; // scale back up because we did not
+                                       // divide by avgCorrectedLength
+	return totalCorrectedDepth;
+  }
+  bool printDepth(ostream &of, string name, float minContigDepth) {
+    auto totalAverageDepth = getTotalCorrectedDepth();
+    std::string nameonly = name;
+    int pos = nameonly.find_first_of(" \t");
+    nameonly = nameonly.substr(0, pos);
+    of << nameonly << "\t" << contigLen << "\t" << (float) totalAverageDepth;
+    for (int i = 0; i < numSamples; i++) {
+      if (variances.size()) {
+        of << "\t" << (float) means[i] << "\t" << (float) variances[i];
+      } else {
+        of << "\t" << (float) means[i];
+      }
+    }
+    of << "\n";
+    return totalAverageDepth >= minContigDepth;
+  }
+};
 
-		float len = header->target_len[contigIdx];
-		float totalAverageDepth = 0.0, totalCorrectedLength = 0.0;
-		float correctedLen;
-		for (int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
-			correctedLen = (includeEdgeBases | (2 * std::min(maxEdgeBases, averageReadSize[bamIdx]) >= len)) ?
-					len : len - (2 * std::min(maxEdgeBases, averageReadSize[bamIdx]));
-			assert(correctedLen > 0);
-			float mean = intraDepthVariance ? bamContigVariances[bamIdx][contigIdx].mean : (bamContigDepths[bamIdx][contigIdx] / correctedLen);
-			sum += (CountType) (mean * correctedLen + 0.5);
-			averageDepths[bamIdx] = mean;
-			totalCorrectedLength += correctedLen;;
-		}
-		if (totalCorrectedLength > 0.0) {
-			totalAverageDepth = (float) sum / totalCorrectedLength;
-		}
+void printDepthTable(ostream &of, const vector<string>& names,
+		bool intraDepthVariance, const CountTypeMatrix& bamContigDepths,
+		const vector<int>& averageReadSize, float percentIdentity,
+		const VarianceTypeMatrix& bamContigVariances,
+		bam_header_t* header,
+		BoolVector &contigLengthPass, BoolVector &contigDepthPass, float minContigDepth, 
+		bool includeEdgeBases = false, int maxEdgeBases = 0) {
 
-		totalAverageDepth *= bams.size();
+	printDepthTableHeader(of, names, intraDepthVariance);
 
-		of << header->target_name[contigIdx] << "\t"
-			<< len << "\t"
-			<< totalAverageDepth;
-		for (int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
-			if (intraDepthVariance) {
-				of << "\t" << bamContigVariances[bamIdx][contigIdx].mean << "\t" << bamContigVariances[bamIdx][contigIdx].variance;
-			} else {
-				of << "\t" << averageDepths[bamIdx];
-			}
+	for (int32_t contigIdx = 0; contigIdx < header->n_targets; contigIdx++) {
+		if (!contigLengthPass[contigIdx])
+			continue;
+		
+		int contigLen = header->target_len[contigIdx];
+		ContigDepth contigDepth(names.size(), contigLen, includeEdgeBases, maxEdgeBases, intraDepthVariance);
+		for (int bamIdx = 0; bamIdx < (int) names.size(); bamIdx++) {
+			if (intraDepthVariance) contigDepth.addDepth(averageReadSize[bamIdx], bamContigVariances[bamIdx][contigIdx]);
+			else contigDepth.addDepth(averageReadSize[bamIdx], bamContigDepths[bamIdx].get()[contigIdx]);
 		}
-		of << "\n";
-		if (totalAverageDepth < minContigDepth)
-			contigDepthPass[contigIdx] = false;
+		contigDepthPass[contigIdx] = contigDepth.printDepth(of, header->target_name[contigIdx], minContigDepth);
 	}
 }
 
@@ -192,209 +265,315 @@ ostream &writeReadStats(ostream &os, const bam1_t *b, ReadStatistics readstats,
 	return os;
 }
 
-int main(int argc, char *argv[]) {
-
-	// set and parse options
-	float percentIdentity = (float) 97 / (float) 100.0;
-	string outputTableFile, pairedContigsFile, unmappedFastqFile, referenceFastaFile;
-	string outputReadStatsFile, outputGCFile, outputKmersFile;
-	int gcWindow = 100;	
-	int shredLength = 16000, shredDepth = 5, minContigLength = 1, minMapQual = 0;
-	float weightMapQual = 0.0;
-	bool normalizeWeightMapQual = false;
-	float minContigDepth = 0;
-	bool intraDepthVariance = true;
-	bool showDepth = false;
-	bool includeEdgeBases = false;
-	int maxEdgeBases = 75;
-
-	while(1) {
-		int option_index = 0;
-		int c = getopt_long(argc, argv, "h",
-				long_options, &option_index);
-		if (c == -1)
-			break;
-		switch(c) {
-		case 0:
-			if (strcmp(long_options[option_index].name, "help") == 0) {
-				usage();
-				exit(0);
-			} else if (strcmp(long_options[option_index].name, "percentIdentity") == 0) {
-				percentIdentity = atoi(optarg) / 100.0;
-				cerr << "Minimum percent identity for a mapped read: " << percentIdentity << endl;
-			} else if (strcmp(long_options[option_index].name, "outputDepth") == 0) {
-				outputTableFile = optarg;
-				cerr << "Output depth matrix to " << outputTableFile << endl;
-			} else if (strcmp(long_options[option_index].name, "outputReadStats") == 0) {
-				outputReadStatsFile = optarg;
-				cerr << "Output Read Stats file to " << outputReadStatsFile << endl;
-			} else if (strcmp(long_options[option_index].name, "outputGC") == 0) {
-				outputGCFile = optarg;
-				cerr << "Output GC stats file to " << outputGCFile << endl;
-			} else if (strcmp(long_options[option_index].name, "gcWindow") == 0) {
-				gcWindow = atoi(optarg);
-				cerr << "GC sliding window: " << gcWindow << endl;
-			} else if (strcmp(long_options[option_index].name, "outputKmers") == 0) {
-				outputKmersFile = optarg;
-				cerr << "Output Perfect Kmers file to " << outputKmersFile << endl;
-			} else if (strcmp(long_options[option_index].name, "pairedContigs") == 0) {
-				pairedContigsFile = optarg;
-				cerr << "Output pairedContigs lower triangle to " << pairedContigsFile << endl;
-			} else if (strcmp(long_options[option_index].name, "unmappedFastq") == 0) {
-				unmappedFastqFile = optarg;
-				cerr << "Output Unmapped Fastq to " << unmappedFastqFile << endl;
-			} else if (strcmp(long_options[option_index].name, "referenceFasta") == 0) {
-				referenceFastaFile = optarg;
-				cerr << "Reference fasta file " << referenceFastaFile << endl;
-			} else if (strcmp(long_options[option_index].name, "shredLength") == 0) {
-				shredLength = atoi(optarg);
-				cerr << "shredLength: " << shredLength << endl;
-			} else if (strcmp(long_options[option_index].name, "shredDepth") == 0) {
-				shredDepth = atoi(optarg);
-				cerr << "shredDepth: " << shredDepth << endl;
-			} else if (strcmp(long_options[option_index].name, "minContigLength") == 0) {
-				minContigLength = atoi(optarg);
-				cerr << "minContigLength: " << minContigLength << endl;
-			} else if (strcmp(long_options[option_index].name, "minMapQual") == 0) {
-				minMapQual = atoi(optarg);
-				cerr << "minMapQual: " << minMapQual << endl;
-			} else if (strcmp(long_options[option_index].name, "weightMapQual") == 0) {
-				weightMapQual = atof(optarg);
-				cerr << "weightMapQual: " << weightMapQual << endl;
-			} else if (strcmp(long_options[option_index].name, "minContigDepth") == 0) {
-				minContigDepth = atof(optarg);
-				cerr << "minContigDepth: " << minContigDepth << endl;
-			} else if(strcmp(long_options[option_index].name, "noIntraDepthVariance") == 0) {
-				intraDepthVariance = false;
-				cerr << "Calculating intra contig depth variance\n";
-			} else if(strcmp(long_options[option_index].name, "showDepth") == 0) {
-				showDepth = true;
-				cerr << "Outputing a .depth file for each bam\n";
-			} else if(strcmp(long_options[option_index].name, "includeEdgeBases") == 0) {
-				includeEdgeBases = true;
-				cerr << "Edge bases will be included in all calculations\n";
-			} else if(strcmp(long_options[option_index].name, "maxEdgeBases") == 0) {
-				maxEdgeBases = atoi(optarg);
-				cerr << "Edge bases will be included up to " << maxEdgeBases << " bases\n";
-			} else {
-				usage();
-				cerr << "Unrecognized option: " << long_options[option_index].name << endl;
-				exit(1);
-			}
-			break;
-		default:
-			usage();
-			exit(0);
-		};
-	}
-	if (argc - optind < 1) {
-		usage();
-		if (argc - optind < 1)
-			cerr << "You must specify one or more bam files\n\n";
-		return 0;
-	}
-        cerr << "jgi_summarize_bam_contig_depths " << VERSION << " " << BUILD_TIMESTAMP << endl;
-	cerr << "Output matrix to " << (outputTableFile.empty() ? "STDOUT" : outputTableFile.c_str()) << endl;
-
-	// assign names and allocate samfile handles
-	BamFileVector bams;
-	StringVector bamFilePaths;
-	for(int i = optind; i < argc; i++) {
-		bamFilePaths.push_back(argv[i]);
-	}
-
-	std::vector<string> referenceSequences;
-	std::vector<string> referenceSequenceNames;
-	if (!referenceFastaFile.empty()) {
-		cout << "Reading reference fasta file: " << referenceFastaFile << endl;
-                KseqReader reference(referenceFastaFile);
-                while (reference.hasNext()) {
-                        referenceSequences.push_back(reference.getSeq());
-			referenceSequenceNames.push_back(reference.getName());
-		}
-		cout << "... " << referenceSequences.size() << " sequences" << endl;
-		if (referenceSequences.empty()) {
-			cerr << "ERROR: the reference was empty!: " << referenceFastaFile << endl;
-			exit(1);
-		}
-	}
-
-	SafeOfstream *readStats = NULL;
-	if (!outputReadStatsFile.empty()) {
-		readStats = new SafeOfstream(outputReadStatsFile.c_str());
-		writeReadStatsHeader(*readStats);
-	}
-
-	std::vector< float > refGC;
-	ReadGCStats readGCStats;;
-	std::vector< std::vector<uint8_t> > refGCWindows;
-	if (!outputGCFile.empty() && !referenceSequences.empty()) {
-		refGC.resize(101,0);	
-		readGCStats.resize(101,RunningStats());	
-		refGCWindows.resize(referenceSequences.size());
-		for(long i = 0; i < (long) referenceSequences.size(); i++) {
-			std::vector< uint8_t > &refGCs = refGCWindows[i];
-			refGCs.reserve(referenceSequences[i].length() - gcWindow + 1);
-			for(int j = 0 ; j < (int) referenceSequences[i].length() - gcWindow; j++) {
-				int gc = getGC( referenceSequences[i].c_str() + j, gcWindow, false);
-				refGCs.push_back(gc);
-			}
-		}
-	}
-
-	// open all bams
-	BamHeaderT header = BamUtils::openBamsAndConsolidateHeaders(bamFilePaths, bams, false);
-	assert(header.get() != NULL);
-	BoolVector contigLengthPass, contigDepthPass;
-	contigLengthPass.resize(header->n_targets);
-	contigDepthPass.resize(header->n_targets, true);
-	if (!referenceSequences.empty() && header->n_targets != (long) referenceSequences.size()) {
-		cerr << "Error: referenceFile: " << referenceFastaFile << " is not the same as in the bam headers! (targets: " << header->n_targets << " from the bam vs " << referenceSequences.size() << " from the ref)" << endl;
-		if (referenceSequences.empty()) cerr << "no reference was loaded for " << referenceFastaFile << endl;
-		exit(1);
-	}
-	for(int32_t i = 0; i < header->n_targets; i++) {
-		if ((int) header->target_len[i] >= minContigLength) {
-			contigLengthPass[i] = true;
-		} else {
-			contigLengthPass[i] = false;
-		}
-		if (!referenceSequences.empty() && header->target_len[i] != referenceSequences[i].length()) {
-			cerr << "Error: referenceFile: " << referenceFastaFile << " contig " << i << " is not the same as in the bam headers (bam reports " << header->target_name[i] << " with " << header->target_len[i] << " len, reference loaded " << referenceSequenceNames[i]<< " with " << referenceSequences[i].length() << " len)! " << endl;
-			exit(1);
-		}
-	}
-
-	// make vector of unmappedFastq files, to reuse
-	std::vector< gzipFileBufPtr > bamUnmappedFastqof;
-	bamUnmappedFastqof.resize( bams.size() );
-
-	// allocate memory for depth and optionally variance matrixes
-	CountTypeMatrix bamContigDepths;
-	bamContigDepths.resize(bams.size());
-	vector< int > averageReadSize;
-	averageReadSize.resize(bams.size(), 0);
-
-	VarianceTypeMatrix bamContigVariances;
-	if (intraDepthVariance) {
-		bamContigVariances.resize(bams.size());
-		VarianceType dummy;
-		for(int i = 0; i < (int) bams.size(); i++) {
-			bamContigVariances[i].resize( header->n_targets, dummy);
-		}
-	}
+void writeUnmapedFastqFile(BamFile &myBam, string unmappedFastqFile, BamHeaderT header,
+                           BoolVector &contigLengthPass, BoolVector &contigDepthPass,
+                           gzipFileBufPtr &bamUnmappedFastqof,
+                           NameBamMap &bamReadIds) {
+  cerr << "Sequestering reads to unmappedFastq files for " << myBam.getBamFile() << endl;
+  // write any reads from low depth contigs
+
+  cerr << "Sequestering reads on low abundance contigs for "
+       << myBam.getBamName() << endl;
+  for (int32_t contigIdx = 0; contigIdx < header->n_targets; contigIdx++) {
+    if (contigLengthPass[contigIdx] && (!contigDepthPass[contigIdx])) {
+      // for each bam find and write all reads on these newly failed contigs
+
+      ostream os(bamUnmappedFastqof.get());
+      BamUtils::writeFastqByContig(os, bamReadIds, myBam, contigIdx);
+    }
+  }
+
+  cerr << "Sequestering read mates not yet output for " << myBam.getBamName()
+       << endl;
+  // for each bam write all orphans...
+
+  string name = unmappedFastqFile + "-" + myBam.getBamName() + "-single.fastq";
+  {
+
+    SafeOfstream singles(name.c_str());
+    ostream pairs(bamUnmappedFastqof.get());
+    BamUtils::writeFastqOrphans(pairs, singles, bamReadIds, myBam);
+    cerr << "Closing pair and single unmaps for " << myBam.getBamName() << endl;
+    // close the two files now
+    bamUnmappedFastqof.reset();
+    cerr << "Closed pair and single unmaps for " << myBam.getBamName() << endl;
+  }
+  struct stat filestatus;
+  stat(name.c_str(), &filestatus);
+
+  if (filestatus.st_size <= 20) {
+    unlink(name.c_str());
+  } else {
+    cerr << "Additional orphaned reads are output as singles to: " << name
+         << endl;
+  }
+
+  cerr << "Freeing up memory for " << myBam.getBamName() << endl;
+  bamReadIds.clear();
+}
 
-	std::vector< NameBamMap > bamReadIds;
-	bamReadIds.resize( bams.size() );
+int main(int argc, char *argv[]) {
 
-	int numThreads = 1;
+  // set and parse options
+  float percentIdentity = (float)97 / (float)100.0;
+  string outputTableFile, pairedContigsFile, unmappedFastqFile,
+      referenceFastaFile;
+  string outputReadStatsFile, outputGCFile, outputKmersFile;
+  int gcWindow = 100;
+  int shredLength = 16000, shredDepth = 5, minContigLength = 1, minMapQual = 0;
+  float weightMapQual = 0.0;
+  bool normalizeWeightMapQual = false;
+  float minContigDepth = 0;
+  bool intraDepthVariance = true;
+  bool showDepth = false;
+  bool includeEdgeBases = false;
+  int maxEdgeBases = 75;
+  bool checkpoint = false;
+
+  while (1) {
+    int option_index = 0;
+    int c = getopt_long(argc, argv, "h", long_options, &option_index);
+    if (c == -1)
+      break;
+    switch (c) {
+    case 0:
+      if (strcmp(long_options[option_index].name, "help") == 0) {
+        usage();
+        exit(0);
+      } else if (strcmp(long_options[option_index].name, "percentIdentity") ==
+                 0) {
+        percentIdentity = atoi(optarg) / 100.0;
+        cerr << "Minimum percent identity for a mapped read: "
+             << percentIdentity << endl;
+      } else if (strcmp(long_options[option_index].name, "outputDepth") == 0) {
+        outputTableFile = optarg;
+        cerr << "Output depth matrix to " << outputTableFile << endl;
+      } else if (strcmp(long_options[option_index].name, "checkpoint") == 0) {
+        checkpoint = true;
+      } else if (strcmp(long_options[option_index].name, "outputReadStats") ==
+                 0) {
+        outputReadStatsFile = optarg;
+        cerr << "Output Read Stats file to " << outputReadStatsFile << endl;
+      } else if (strcmp(long_options[option_index].name, "outputGC") == 0) {
+        outputGCFile = optarg;
+        cerr << "Output GC stats file to " << outputGCFile << endl;
+      } else if (strcmp(long_options[option_index].name, "gcWindow") == 0) {
+        gcWindow = atoi(optarg);
+        cerr << "GC sliding window: " << gcWindow << endl;
+      } else if (strcmp(long_options[option_index].name, "outputKmers") == 0) {
+        outputKmersFile = optarg;
+        cerr << "Output Perfect Kmers file to " << outputKmersFile << endl;
+      } else if (strcmp(long_options[option_index].name, "pairedContigs") ==
+                 0) {
+        pairedContigsFile = optarg;
+        cerr << "Output pairedContigs lower triangle to " << pairedContigsFile
+             << endl;
+      } else if (strcmp(long_options[option_index].name, "unmappedFastq") ==
+                 0) {
+        unmappedFastqFile = optarg;
+        cerr << "Output Unmapped Fastq to " << unmappedFastqFile << endl;
+      } else if (strcmp(long_options[option_index].name, "referenceFasta") ==
+                 0) {
+        referenceFastaFile = optarg;
+        cerr << "Reference fasta file " << referenceFastaFile << endl;
+      } else if (strcmp(long_options[option_index].name, "shredLength") == 0) {
+        shredLength = atoi(optarg);
+        cerr << "shredLength: " << shredLength << endl;
+      } else if (strcmp(long_options[option_index].name, "shredDepth") == 0) {
+        shredDepth = atoi(optarg);
+        cerr << "shredDepth: " << shredDepth << endl;
+      } else if (strcmp(long_options[option_index].name, "minContigLength") ==
+                 0) {
+        minContigLength = atoi(optarg);
+        cerr << "minContigLength: " << minContigLength << endl;
+      } else if (strcmp(long_options[option_index].name, "minMapQual") == 0) {
+        minMapQual = atoi(optarg);
+        cerr << "minMapQual: " << minMapQual << endl;
+      } else if (strcmp(long_options[option_index].name, "weightMapQual") ==
+                 0) {
+        weightMapQual = atof(optarg);
+        cerr << "weightMapQual: " << weightMapQual << endl;
+      } else if (strcmp(long_options[option_index].name, "minContigDepth") ==
+                 0) {
+        minContigDepth = atof(optarg);
+        cerr << "minContigDepth: " << minContigDepth << endl;
+      } else if (strcmp(long_options[option_index].name,
+                        "noIntraDepthVariance") == 0) {
+        intraDepthVariance = false;
+        cerr << "Calculating intra contig depth variance\n";
+      } else if (strcmp(long_options[option_index].name, "showDepth") == 0) {
+        showDepth = true;
+        cerr << "Outputing a .depth file for each bam\n";
+      } else if (strcmp(long_options[option_index].name, "includeEdgeBases") ==
+                 0) {
+        includeEdgeBases = true;
+        cerr << "Edge bases will be included in all calculations\n";
+      } else if (strcmp(long_options[option_index].name, "maxEdgeBases") == 0) {
+        maxEdgeBases = atoi(optarg);
+        cerr << "Edge bases will be included up to " << maxEdgeBases
+             << " bases\n";
+      } else {
+        usage();
+        cerr << "Unrecognized option: " << long_options[option_index].name
+             << endl;
+        exit(1);
+      }
+      break;
+    default:
+      usage();
+      exit(0);
+    };
+  }
+  if (argc - optind < 1) {
+    usage();
+    if (argc - optind < 1)
+      cerr << "You must specify one or more bam files\n\n";
+    return 0;
+  }
+
+  cerr << "jgi_summarize_bam_contig_depths " << VERSION << " "
+       << BUILD_TIMESTAMP << endl;
+  cerr << "Running with " << omp_get_max_threads()
+       << " threads to save memory you can reduce the number of threads with "
+          "the OMP_NUM_THREADS variable"
+       << endl;
+  cerr << "Output matrix to "
+       << (outputTableFile.empty() ? "STDOUT" : outputTableFile.c_str())
+       << endl;
+  if (checkpoint)
+    cerr << "Writing checkpoints as each bam has been processed" << endl;
+
+  // assign names and allocate samfile handles
+  BamFileVector bams;
+  StringVector bamFilePaths;
+  for (int i = optind; i < argc; i++) {
+    bamFilePaths.push_back(argv[i]);
+  }
+  int num_bams = bamFilePaths.size();
+
+  std::vector<string> referenceSequences;
+  std::vector<string> referenceSequenceNames;
+  if (!referenceFastaFile.empty()) {
+    cout << "Reading reference fasta file: " << referenceFastaFile << endl;
+    KseqReader reference(referenceFastaFile);
+    while (reference.hasNext()) {
+      referenceSequences.push_back(reference.getSeq());
+      referenceSequenceNames.push_back(reference.getName());
+    }
+    cout << "... " << referenceSequences.size() << " sequences" << endl;
+    if (referenceSequences.empty()) {
+      cerr << "ERROR: the reference was empty!: " << referenceFastaFile << endl;
+      exit(1);
+    }
+  }
+
+  SafeOfstream *readStats = NULL;
+  if (!outputReadStatsFile.empty()) {
+    readStats = new SafeOfstream(outputReadStatsFile.c_str());
+    writeReadStatsHeader(*readStats);
+  }
+
+  std::vector<float> refGC;
+  ReadGCStats readGCStats;
+  ;
+  std::vector<std::vector<uint8_t>> refGCWindows;
+  if (!outputGCFile.empty() && !referenceSequences.empty()) {
+    refGC.resize(101, 0);
+    readGCStats.resize(101, RunningStats());
+    refGCWindows.resize(referenceSequences.size());
+    for (long i = 0; i < (long)referenceSequences.size(); i++) {
+      std::vector<uint8_t> &refGCs = refGCWindows[i];
+      refGCs.reserve(referenceSequences[i].length() - gcWindow + 1);
+      for (int j = 0; j < (int)referenceSequences[i].length() - gcWindow; j++) {
+        int gc = getGC(referenceSequences[i].c_str() + j, gcWindow, false);
+        refGCs.push_back(gc);
+      }
+    }
+  }
+
+  // open first bam
+  bams.resize(num_bams);
+  BamFile &firstBam = bams[0];
+  BamHeaderT header;
+  bool preprocess_complete = false;
+  if (checkpoint) {
+    cerr << "Opening the first bam file for a baseline header: "
+         << bamFilePaths[0] << endl;
+    firstBam = BamUtils::openBam(bamFilePaths[0], false);
+    header = firstBam.header;
+  } else {
+    cerr << "Opening all bam files and validating headers" << endl;
+    header = BamUtils::openBamsAndConsolidateHeaders(bamFilePaths, bams, false);
+    preprocess_complete = true;
+  }
+
+  assert(header.get() != NULL);
+  BoolVector contigLengthPass, contigDepthPass;
+  contigLengthPass.resize(header->n_targets);
+  contigDepthPass.resize(header->n_targets, true);
+  if (!referenceSequences.empty() &&
+      header->n_targets != (long)referenceSequences.size()) {
+    cerr << "Error: referenceFile: " << referenceFastaFile
+         << " is not the same as in the bam headers! (targets: "
+         << header->n_targets << " from the bam vs "
+         << referenceSequences.size() << " from the ref)" << endl;
+    if (referenceSequences.empty())
+      cerr << "no reference was loaded for " << referenceFastaFile << endl;
+    exit(1);
+  }
+  for (int32_t i = 0; i < header->n_targets; i++) {
+    if ((int)header->target_len[i] >= minContigLength) {
+      contigLengthPass[i] = true;
+    } else {
+      contigLengthPass[i] = false;
+    }
+    if (!referenceSequences.empty() &&
+        header->target_len[i] != referenceSequences[i].length()) {
+      cerr << "Error: referenceFile: " << referenceFastaFile << " contig " << i
+           << " is not the same as in the bam headers (bam reports "
+           << header->target_name[i] << " with " << header->target_len[i]
+           << " len, reference loaded " << referenceSequenceNames[i] << " with "
+           << referenceSequences[i].length() << " len)! " << endl;
+      exit(1);
+    }
+  }
+
+  // make vector of unmappedFastq files, to reuse
+  std::vector<gzipFileBufPtr> bamUnmappedFastqof;
+  bamUnmappedFastqof.resize(num_bams);
+
+  // allocate memory for depth and optionally variance matrixes
+  CountTypeMatrix bamContigDepths;
+  bamContigDepths.resize(num_bams);
+  vector<int> averageReadSize;
+  averageReadSize.resize(num_bams, 0);
+
+  VarianceTypeMatrix bamContigVariances;
+  if (intraDepthVariance) {
+    bamContigVariances.resize(num_bams);
+    if (!checkpoint) {
+      VarianceType dummy;
+      for (int i = 0; i < (int)num_bams; i++) {
+        bamContigVariances[i].resize(header->n_targets, dummy);
+      }
+    }
+  }
+
+  std::vector<NameBamMap> bamReadIds;
+  bamReadIds.resize(num_bams);
+
+  int numThreads = 1;
+  int largest_contig = 0;
 #pragma omp parallel
 	{
 		if (omp_get_thread_num() == 0)
 			numThreads = omp_get_num_threads();
+		#pragma omp for reduction(max:largest_contig)
+		for(int64_t i = 0; i < (int64_t) header->n_targets; i++)
+			if (header->target_len[i] > largest_contig) largest_contig = header->target_len[i];
 	}
-	if (numThreads > (int) bams.size()) {
-		numThreads = bams.size();
+	if (numThreads > (int)num_bams) {
+		numThreads = num_bams;
 		omp_set_num_threads( numThreads );
 	}
 	std::vector< PairedCountTypeMatrix > bamPairedContigs;
@@ -418,15 +597,14 @@ int main(int argc, char *argv[]) {
 	}
 
 
-	cerr << "Processing bam files" << endl;
+	cerr << "Processing bam files with largest_contig=" << largest_contig << endl;
 	bool hasAnyPairedContigs = false;
 
 	// preallocate all forking for unmapped fastq, so we do not fork when memory is tight
 	if (!unmappedFastqFile.empty()) {
 #pragma omp parallel for
-		for(int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
-			BamFile &myBam = bams[bamIdx];
-			string name = unmappedFastqFile + "-" + myBam.getBamName() + ".fastq.gz";
+		for(int bamIdx = 0; bamIdx < (int) num_bams; bamIdx++) {
+			string name = unmappedFastqFile + "-" + bamFilePaths[bamIdx] + ".fastq.gz";
 			bamUnmappedFastqof[bamIdx] = gzipOutputFile(name);
 			cerr << "Outputting any unmapped reads to " << name << endl;
 		}
@@ -436,42 +614,99 @@ int main(int argc, char *argv[]) {
 		ourMappedKmersStats = new MappedKmersStats();
 	}
 	DepthCounts noDepthCounts;
+	vector<DepthCounts> workingDepthCounts(omp_get_max_threads());
+	#pragma omp parallel for schedule(dynamic, 1)
+	for(int threadIdx = 0 ; threadIdx < workingDepthCounts.size(); threadIdx++) 
+		workingDepthCounts[threadIdx].resetBaseCounts(largest_contig+1, weightMapQual > 0.0);
 
+	std::mutex m;
+	std::condition_variable cv;
 	bool isSorted = true;
-#pragma omp parallel for schedule(static, 1)
-	for(int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
+	CountTypeMatrix threadWorkingContigDepths(omp_get_max_threads());
+	VarianceTypeMatrix threadWorkingVariance(omp_get_max_threads());
+
+	vector<string> partialFiles(num_bams);
+#pragma omp parallel for schedule(dynamic, 1)
+	for(int bamIdx = 0; bamIdx < (int) num_bams; bamIdx++) {
 
 		int threadNum = omp_get_thread_num();
+		string baseName = get_basename(bamFilePaths[bamIdx]);
+
+		if (checkpoint && bamIdx == 0) {
+			// block processing all bam file alignments until preprocessing of contig name 2 id map
+			std::lock_guard<std::mutex> lk(m);
+#pragma omp critical (OUTPUT)
+			cerr << "Thread " << threadNum << " preprocessing contig2id map" << endl;
+			BamUtils::preprocessContigNames(header);
+			preprocess_complete = true;
+			cv.notify_all();
+		}
+
+		string checkpoint_depth = partialFiles[bamIdx] = outputTableFile + ".partial-" + baseName + ".data";
+		if (checkpoint && file_exists(checkpoint_depth)) {
+#pragma omp critical (OUTPUT)
+			cerr << "partial output for bam " << bamIdx << ": " << baseName << " already checkpointed." << endl;
+			continue;
+		}
+
 		BamFile &myBam = bams[bamIdx];
+#pragma omp critical (OUTPUT)
+		cerr << "Thread " << threadNum << " opening and reading the header for file: " << bamFilePaths[bamIdx] << endl;
+		if (bamIdx > 0) myBam = BamUtils::openBam(bamFilePaths[bamIdx], false, header);
+#pragma omp critical (OUTPUT)
+		cerr << "Thread " << threadNum << " opened the file: " << bamFilePaths[bamIdx] << endl;
+		if (checkpoint && !preprocess_complete) {
+#pragma omp critical (OUTPUT)
+			cerr << "Thread " << threadNum << " waiting for preprocessing to complete before reading alignments in : " << bamFilePaths[bamIdx] << endl;
+			std::unique_lock<std::mutex> lk(m);
+			if (!preprocess_complete) {
+	       			cv.wait(lk, [&preprocess_complete]{return preprocess_complete;});
+			}
+		}
 		ostream *unmappedFastq = NULL;
 		if (!unmappedFastqFile.empty()) {
 			unmappedFastq = new std::ostream( bamUnmappedFastqof[bamIdx].get() );
 		}
-//		cerr << "Thread " << threadNum << " processing: " << myBam.getBamName() << endl;
 
-		// initialize and allocate memory structures
+#pragma omp critical (OUTPUT)
+		cerr << "Thread " << threadNum << " processing bam " << bamIdx << ": " << myBam.getBamName() << endl;
 
-		bamContigDepths[bamIdx] = (CountType *) calloc(header->n_targets, sizeof(CountType));
-		if (bamContigDepths[bamIdx] == NULL) {
+		// initialize and allocate memory structures
+		if (!checkpoint)
+			bamContigDepths[bamIdx].swap(threadWorkingContigDepths[threadNum]);
+		if (!bamContigDepths[bamIdx]) bamContigDepths[bamIdx].reset(new CountType[header->n_targets]);
+		else memset(bamContigDepths[bamIdx].get(), 0, header->n_targets * sizeof(CountType));
+		if (!bamContigDepths[bamIdx]) {
 			cerr << "Could not allocate enough memory to track depth per contig" << endl;
 			exit(1);
 		}
-		CountType *contigDepths = bamContigDepths[bamIdx];
+
+		if (intraDepthVariance) {
+			assert(bamContigVariances.size() > bamIdx);
+			if (!checkpoint)
+				bamContigVariances[bamIdx].swap(threadWorkingVariance[threadNum]);
+			if (bamContigVariances[bamIdx].empty()) {
+				VarianceType dummy;
+				bamContigVariances[bamIdx].resize( header->n_targets, dummy);
+			} else {
+				for(auto &x : bamContigVariances[bamIdx]) x.reset();
+			}
+		}
+		CountType *contigDepths = bamContigDepths[bamIdx].get();
 		PairedCountTypeMatrix &pairedContigs = bamPairedContigs[threadNum];
 		NameBamMap &readIds = bamReadIds[bamIdx];
 		NameBamMap *tempMates = NULL; // new BamNameTrackingChooser();
 
 		int lastMinPos = -1;
 
-		DepthCounts depthCounts;
-		boost::shared_ptr< std::ofstream > depthFile;
+		DepthCounts &depthCounts = workingDepthCounts[omp_get_thread_num()];
+		std::shared_ptr< std::ofstream > depthFile;
 		if (intraDepthVariance || !readGCStats.empty()) {
-			depthCounts.resetBaseCounts(1024*1024, weightMapQual > 0.0);
+			depthCounts.resetBaseCounts(largest_contig+1, weightMapQual > 0.0);
 			if (showDepth) {
 				depthFile.reset( new SafeOfstream( string(myBam.getFilePath() + ".depth").c_str() ) );
 			}
 		}
-
 		
 		MappedKmersStats *mappedKmersStats = NULL;
 		if (!outputKmersFile.empty() && !referenceSequences.empty()) {
@@ -502,7 +737,7 @@ int main(int argc, char *argv[]) {
 			readCounts++;
 			if (tid >= 0) {
 				if (lastTid > tid || (lastTid == tid && lastPos > pos)) {
-					#pragma omp critical (BAM_WARN_UNSORTED)
+#pragma omp critical (BAM_WARN_UNSORTED)
 					cerr << "ERROR: the bam file '" << myBam.getBamName() << "' is not sorted!" << endl;
 					isSorted = false;
 					break;
@@ -539,7 +774,8 @@ int main(int argc, char *argv[]) {
 				// calculate statistics for the previous contig
 
 				if (lastTid >= 0) {
-					bamContigVariances[bamIdx][lastTid] = calculateVarianceContig(header.get(), lastTid, depthCounts, includeEdgeBases ? 0 : std::min(maxEdgeBases,avgRead), weightMapQual, normalizeWeightMapQual);
+					if (intraDepthVariance)
+						bamContigVariances[bamIdx][lastTid] = calculateVarianceContig(header.get(), lastTid, depthCounts, includeEdgeBases ? 0 : std::min(maxEdgeBases,avgRead), weightMapQual, normalizeWeightMapQual);
 					if (!readGCStats.empty() && depthCounts && contigDepths[ lastTid ] / header->target_len[ lastTid ] > minContigDepth) {
 						addGCCounts(readGCStats, refGCWindows[ lastTid ], gcWindow, depthCounts);
 					}
@@ -551,14 +787,16 @@ int main(int argc, char *argv[]) {
 							lastPos = 0;
 							break;
 						}
-						bamContigVariances[bamIdx][lastTid] = VarianceType();
+						if (intraDepthVariance)
+							bamContigVariances[bamIdx][lastTid] = VarianceType();
 						if (showDepth) {
 							depthCounts.resetBaseCounts(header->target_len[ lastTid ], weightMapQual > 0.0);
 							*depthFile << getContigDepthByBase(header.get(), lastTid, depthCounts, includeEdgeBases ? 0 : std::min(maxEdgeBases, avgRead), weightMapQual);
 						}
 					}
 				}
-			}
+			} // finish stats for previous contigs
+
 			if (depthCounts && lastTid != tid && tid >= 0) {
 				depthCounts.resetBaseCounts(header->target_len[ tid ], weightMapQual > 0.0);
 			}
@@ -658,7 +896,7 @@ int main(int argc, char *argv[]) {
 							tempMates->insert(baseName, b);
 						}
 					}
-				}
+				} // if unmappedFastq && unmapped
 
 				if (!pairedContigsFile.empty() && (b->core.flag & (BAM_FPAIRED | BAM_FMUNMAP)) == BAM_FPAIRED && tid >= 0 && b->core.mtid >= 0) {
 					assert(tid < (int) pairedContigs.size());
@@ -672,23 +910,26 @@ int main(int argc, char *argv[]) {
 
 					hasAnyPairedContigs = true;
 				}
-			}
+			} // if mapped
 			if (tempMates != NULL)
 				delete tempMates;
 
-		}
+		} // while read the bam file
 
 		if (unmappedFastq != NULL)
 			delete unmappedFastq;
 
 		if (mappedKmersStats != NULL && ourMappedKmersStats != NULL) {
-			#pragma omp critical (MAPPED_KMERS_STATS)
+#pragma omp critical (MAPPED_KMERS_STATS)
 			{
 				*ourMappedKmersStats += *mappedKmersStats;
 			}
 			delete mappedKmersStats;
 		}
 
+#pragma omp critical (OUTPUT)
+		cerr << "Thread " << threadNum << " finished reading bam " << bamIdx << ": " << myBam.getBamName() << endl;
+
 		if (!isSorted) //skip
 			continue;
 
@@ -698,6 +939,7 @@ int main(int argc, char *argv[]) {
 		// calculate the statistics for the last contig
 		if (intraDepthVariance) {
 			while (lastTid >= 0 && lastTid < header->n_targets) {
+				assert(bamContigVariances.size() > bamIdx && bamContigVariances[bamIdx].size() > lastTid);
 				bamContigVariances[bamIdx][lastTid] = calculateVarianceContig(header.get(), lastTid, depthCounts, includeEdgeBases ? 0 : std::min(maxEdgeBases, avgRead), weightMapQual, normalizeWeightMapQual);
 				if (!readGCStats.empty() && depthCounts && contigDepths[ lastTid ] / header->target_len[ lastTid ] > minContigDepth) {
 					addGCCounts(readGCStats, refGCWindows[ lastTid ], gcWindow, depthCounts);
@@ -711,15 +953,54 @@ int main(int argc, char *argv[]) {
 					depthCounts.resetBaseCounts(header->target_len[ lastTid ], weightMapQual > 0.0);
 				}
 			}
-		}
-		if (depthCounts)
-			depthCounts.clear();
-#pragma omp critical (THREAD_INFO_2)
-		cerr << "Thread " << threadNum << " finished: " << myBam.getBamName() << " with " << readCounts << " reads and " << readsWellMapped << " readsWellMapped" << endl;
+		} // intraDepthVariance
+
+		if (checkpoint) {
+#pragma omp critical (OUTPUT)
+			cerr << "Writing partial output for bam " << bamIdx << ": " << myBam.getBamName() << " as " << checkpoint_depth << endl;
+			string tmp = checkpoint_depth + ".tmp";
+			ofstream out(tmp);
+			assert(contigDepths == bamContigDepths[bamIdx].get());
+			
+			for(int i = 0; i < header->n_targets; i++) {
+				if (intraDepthVariance) {
+					auto &bcv = bamContigVariances[bamIdx];
+					auto &cv = bcv[i];
+					out.write((const char*) &cv, sizeof(VarianceType));
+				} else {
+					out.write((const char*) (contigDepths + i), sizeof(*contigDepths));
+				}
+			}
+			threadWorkingContigDepths[threadNum].swap(bamContigDepths[bamIdx]);
+			if (intraDepthVariance) {
+				auto &bcv = bamContigVariances[bamIdx];
+				threadWorkingVariance[threadNum].swap(bcv);
+			}
+			out.close();
+			if (rename(tmp.c_str(), checkpoint_depth.c_str()) != 0) {
+				cerr << "Could not rename " << tmp << " to " << checkpoint_depth << "! " << strerror(errno);
+				exit(1);
+			}
+		} // checkpoint
+
+#pragma omp critical (OUTPUT)
+		cerr << "Thread " << threadNum << " finished: " << myBam.getBamName() << " with " << readCounts << " reads and " << readsWellMapped << " readsWellMapped (" << 100. * readsWellMapped / readCounts << "%)" << endl;
 		if (readStats != NULL) {
 			IOThreadBuffer::flush(*readStats);
 		}
-	}
+		if (!unmappedFastqFile.empty()) {
+			writeUnmapedFastqFile(myBam, unmappedFastqFile, header,
+                           contigLengthPass, contigDepthPass,
+                           bamUnmappedFastqof[bamIdx],
+                           bamReadIds[bamIdx]);
+		}
+		if (bamIdx > 0) {
+#pragma omp critical (OUTPUT)
+			cerr << "Closing bam " << bamIdx << ": " << myBam.getBamFile() << endl;
+			myBam.close();
+		}
+	} // foreach bamIdx
+	workingDepthCounts.clear();
 	if (!isSorted) {
 		cerr << "Please execute 'samtools sort' on unsorted input bam files and try again!" << endl;
 		exit(1);
@@ -740,76 +1021,62 @@ int main(int argc, char *argv[]) {
 	}
 
 	// output the matrix
-	{
-		cerr << "Creating depth matrix file: " << outputTableFile << endl;
-		streambuf *buf;
-		ofstream of;
-		if (!outputTableFile.empty() || outputTableFile.compare("-") != 0) {
-			of.open( outputTableFile.c_str() );
-			buf = of.rdbuf();
-		} else {
-			buf = cout.rdbuf();
+	streambuf *buf;
+	ofstream of;
+	if (!outputTableFile.empty() || outputTableFile.compare("-") != 0) {
+		of.open( outputTableFile.c_str() );
+		buf = of.rdbuf();
+	} else {
+		buf = cout.rdbuf();
+	}
+	ostream out(buf);
+	if (checkpoint) {
+		cerr << "All partial depths are written, combining partials ionto " << outputTableFile << endl;
+		printDepthTableHeader(out, bamFilePaths, intraDepthVariance);
+		vector<ifstream> partials;
+		partials.reserve(partialFiles.size());
+		for(auto &checkpoint_file : partialFiles) {
+			partials.emplace_back(checkpoint_file);
 		}
-		ostream out(buf);
-		printDepthTable(out, bams, intraDepthVariance,
+		for(int contigIdx = 0; contigIdx < header->n_targets; contigIdx++) {
+			if (!contigLengthPass[contigIdx])
+				continue;
+		
+			auto contigLen = header->target_len[contigIdx];
+			ContigDepth contigDepth(partials.size(), contigLen, includeEdgeBases, maxEdgeBases, intraDepthVariance);
+			int bamIdx = 0;
+			for (auto &in : partials) {
+				if (!in.good()) throw;
+				if (intraDepthVariance) {
+					VarianceType variance;
+					in.read((char*) &variance, sizeof(VarianceType));
+					contigDepth.addDepth(averageReadSize[bamIdx], variance);
+				} else {
+					CountType depth;
+					in.read((char*) &depth, sizeof(CountType));
+					contigDepth.addDepth(averageReadSize[bamIdx], depth);
+				}
+				if (!in.eof() && !in.good()) throw;
+				bamIdx++;
+			}
+			contigDepthPass[contigIdx] = contigDepth.printDepth(out, header->target_name[contigIdx], minContigDepth);
+		}
+	} else {
+		cerr << "Creating depth matrix file: " << outputTableFile << endl;
+		
+		printDepthTable(out, bamFilePaths, intraDepthVariance,
 						bamContigDepths, averageReadSize, percentIdentity,
 						bamContigVariances, header.get(),
 						contigLengthPass, contigDepthPass, minContigDepth, includeEdgeBases, maxEdgeBases);
 
 	}
-	for(int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
-		free(bamContigDepths[bamIdx]);
-		if (intraDepthVariance) {
-			bamContigVariances[bamIdx].clear();
-		}
-	}
-
-	if (!unmappedFastqFile.empty()) {
-		cerr << "Sequestering reads to unmappedFastq files" << endl;
+	out.flush();
+	of.close();
 
-#pragma omp parallel for schedule(static, 1)
-		for(int bamIdx = 0; bamIdx < (int) bams.size(); bamIdx++) {
-			BamFile &myBam = bams[bamIdx];
-			// write any reads from low depth contigs
+	bamContigDepths.clear();
+	bamContigVariances.clear();
 
-			cerr << "Sequestering reads on low abundance contigs for " << myBam.getBamName() << endl;
-			for(int32_t contigIdx = 0; contigIdx < header->n_targets; contigIdx++) {
-				if ( contigLengthPass[contigIdx] && (!contigDepthPass[contigIdx]) ) {
-					// for each bam find and write all reads on these newly failed contigs
-
-
-					ostream os(bamUnmappedFastqof[bamIdx].get());
-					BamUtils::writeFastqByContig(os, bamReadIds[bamIdx], myBam, contigIdx);
-				}
-			}
-
-			cerr << "Sequestering read mates not yet output for " << myBam.getBamName() << endl;
-			// for each bam write all orphans...
-
-			string name = unmappedFastqFile + "-" + myBam.getBamName() + "-single.fastq";
-			{ 
-
-				SafeOfstream singles(name.c_str());
-				ostream pairs(bamUnmappedFastqof[bamIdx].get());
-				BamUtils::writeFastqOrphans(pairs, singles, bamReadIds[bamIdx], myBam);
-				cerr << "Closing pair and single unmaps for " << myBam.getBamName() << endl;
-				// close the two files now
-				bamUnmappedFastqof[bamIdx].reset();
-				cerr << "Closed pair and single unmaps for " << myBam.getBamName() << endl;
-			}
-			struct stat filestatus;
-			stat( name.c_str(), &filestatus );
-
-			if (filestatus.st_size <= 20) {
-				unlink(name.c_str());
-			} else {
-				cerr << "Additional orphaned reads are output as singles to: " << name << endl;
-			}
-
-			cerr << "Freeing up memory for " << myBam.getBamName() << endl;
-			bamReadIds[bamIdx].clear();
-		}
-	}
+	if (checkpoint) for(auto &checkpoint_file : partialFiles) unlink(checkpoint_file.c_str());
 
 	if (!referenceFastaFile.empty() && !unmappedFastqFile.empty()) {
 		string shredFileName = unmappedFastqFile + "-contigShreds.fasta";
@@ -825,11 +1092,6 @@ int main(int argc, char *argv[]) {
 		}
 	}
 
-	cerr << "Closing most bam files" << endl;
-	for(int bamIdx = 1; bamIdx < (int) bams.size(); bamIdx++) {
-		bams[bamIdx].close();
-	}
-
 	if (!outputGCFile.empty() && !refGCWindows.empty()) {
 		for(int contigIdx = 0; contigIdx < (int) refGCWindows.size(); contigIdx++) {
 			if (contigDepthPass[contigIdx]) {


=====================================
src/jgi_summarize_bam_contig_depths.h
=====================================
@@ -27,9 +27,8 @@
 #include <boost/iostreams/filtering_streambuf.hpp>
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/filesystem/fstream.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/lexical_cast.hpp>
-#include <boost/shared_ptr.hpp>
+#include <unordered_map>
+#include <memory>
 
 #include "OpenMP.h"
 #include "BamUtils.h"
@@ -43,7 +42,7 @@ typedef BamUtils::StringVector StringVector;
 
 typedef uint32_t ContigIdxType;
 typedef uint64_t CountType;
-typedef vector< CountType * > CountTypeMatrix;
+typedef vector< std::shared_ptr<CountType[]> > CountTypeMatrix;
 typedef map<ContigIdxType, CountType> PairedCountType;
 typedef vector< PairedCountType > PairedCountTypeMatrix;
 
@@ -77,7 +76,7 @@ extern ThreadBlocker tb;
 class DepthCounts {
 public:
 	typedef int32_t BaseCountType;
-	typedef boost::shared_ptr< BaseCountType > Mem;
+	typedef std::shared_ptr< BaseCountType[] > Mem;
 private:
 	int _allocLen;
 	Mem _alloc;
@@ -91,8 +90,14 @@ public:
 	operator bool() {
 		return _alloc && baseCounts != NULL && _allocLen > 0;
 	}
+	void add_depth(int refpos, int len) {
+		for(int i = refpos; i < refpos + len ; i++) baseCounts[i]++;
+	}
+	void add_map_qual(int refpos, int len, int mq) {
+		for(int i = refpos; i < refpos + len; i++) mapQualities[i] += mq;
+	}
 	void resetBaseCounts(int targetLen, bool mapQualitiesToo) {
-		targetLen++; // protect against off-by-one errors in alignment
+		targetLen++; // protect against off-by-one errors in reported alignment
 		int requiredLen = targetLen;
 		if (mapQualitiesToo) requiredLen *= 2;
 		if (_allocLen < requiredLen) {
@@ -131,7 +136,7 @@ public:
 				exit(1);
 			}
 		}
-		memset(baseCounts, 0, _allocLen * sizeof(BaseCountType));
+		memset(baseCounts, 0, (mapQualitiesToo ? 2 : 1) * targetLen * sizeof(BaseCountType));
 	}
 	void clear() {
 		_alloc.reset();
@@ -149,12 +154,13 @@ public:
 	typedef float T;
 	T mean,variance;
 	VarianceType(): mean(0), variance(0) {}
+	void reset() { mean = 0.0; variance = 0.0; }
 	~VarianceType() {}
 };
 
 typedef vector< vector< VarianceType > > VarianceTypeMatrix;
 typedef boost::iostreams::filtering_streambuf<boost::iostreams::output> gzipFileBuf;
-typedef boost::shared_ptr< gzipFileBuf > gzipFileBufPtr;
+typedef std::shared_ptr< gzipFileBuf > gzipFileBufPtr;
 
 
 class ReadStatistics {
@@ -170,14 +176,13 @@ public:
 		// seqlen remains the length of the sequence...
 		const char *failMsg = NULL;
 		if (nm != substitutions + insertions + deletions) { failMsg = "nm != sub + ins + del"; }
-		if (seqlen != exactmatches + substitutions + insertions + softclips) { failMsg = "seqlen != exactmatches + substitutions + insertions + softclips"; }
+		if (seqlen != 0 && seqlen != exactmatches + substitutions + insertions + softclips) { failMsg = "seqlen != exactmatches + substitutions + insertions + softclips"; }
 		if (alignlen != alignend - alignstart) { failMsg = "alignlen != alignend - alignstart"; }
 		if (alignlen - skipRef != exactmatches + substitutions + deletions) { failMsg = "alignlen - skipRef != exactmatches + substitutions + deletions"; }
-		if (insertions - deletions + softclips != seqlen - (alignlen - skipRef)) { failMsg = "insertions - deletions + softclips != seqlen - (alignlen - skipRef)"; }
+		if (seqlen != 0 && insertions - deletions + softclips != seqlen - (alignlen - skipRef)) { failMsg = "insertions - deletions + softclips != seqlen - (alignlen - skipRef)"; }
 		if (failMsg != NULL) { cerr << bam1_qname(b) << " " << failMsg << ": cigar mismatch alignlen=" << alignlen << " seqlen=" << seqlen << " skipRef=" << skipRef << " exactmatches=" <<  exactmatches << " substitutions=" << substitutions << " deletions=" << deletions << " insertions=" << insertions << " softclips=" << softclips << " hardclips=" << hardclips << " astart=" << alignstart << " aend=" << alignend << " seqstart=" <<  seqstart << " seqend=" << seqend << " nm=" << nm << endl; }
 		assert( failMsg == NULL );
-		assert(seqlen > 0);
-		assert(seqlen >= seqend - seqstart);
+		assert(seqlen == 0 || seqlen >= seqend - seqstart);
 	}
 
 	static uint32_t calculateAlignment(bam1_t *b, uint32_t &alignstart, uint32_t &alignend, uint32_t &seqstart, uint32_t &seqend) {
@@ -255,11 +260,9 @@ public:
 		return ((float) exactmatches) / (alignlen - skipRef);
 	}
 	float getPctId6() const {
-		assert(seqlen > 0);
-		return ((float) exactmatches) / seqlen;
+		return seqlen > 0 ? ((float) exactmatches) / seqlen : 1;
 	}
 	float getPctId7() const {
-		assert(seqlen > 0);
 		return ((float) exactmatches) / (alignlen-skipRef > seqlen ? alignlen-skipRef : seqlen);
 	}
 	float getPctId8() const {
@@ -467,6 +470,7 @@ CountType caldepth(bam1_t *b, DepthCounts depthCounts = DepthCounts(), int32_t r
 	int len = 0;
 	uint32_t *cigar = bam1_cigar(b), insertions = 0, deletions = 0, seqpos = 0, refpos = b->core.pos, mismatches = 0, exactmatches = 0, softclips = 0, hardclips = 0, skipRef = 0;
 	if (refpos >= 0 && refpos >= (uint32_t) refLen && refLen > 0) {
+		#pragma omp critical (OUTPUT)
 		cerr << "WARNING: bam has improper refpos (" << refpos << ") vs reflen (" << refLen << ") ref=" << b->core.tid << ": " << bam1_qname(b) << endl;
 		return 0;
 	}
@@ -481,7 +485,7 @@ CountType caldepth(bam1_t *b, DepthCounts depthCounts = DepthCounts(), int32_t r
 		switch(op) {
 		case BAM_CMATCH: // M is match or mismatch!
 			cmatches += oplen;
-			if (ref != NULL) {
+			if (ref != NULL && b->core.l_qseq >= seqpos + oplen) {
 				mismatchesInOp = calcMismatches(b, seqpos, ref, refpos, oplen);
 				mismatches += mismatchesInOp;
 			}
@@ -504,22 +508,15 @@ CountType caldepth(bam1_t *b, DepthCounts depthCounts = DepthCounts(), int32_t r
 			if (depthCounts) {
 				assert(depthCounts.baseCounts != NULL);
 				assert(depthCounts.getAllocLen() >= refpos + oplen);
-				DepthCounts::BaseCountType *endBase = depthCounts.baseCounts + refpos + oplen;
-				assert( depthCounts.getAllocLen() >= endBase - depthCounts.baseCounts );
 				if(refLen > 0 && oplen + refpos > (uint32_t) refLen) {
 					cerr << "WARNING: bam has improper refpos + oplen: " << bam1_qname(b) << endl;
 					cerr << "WARNING: bam has improper depthCount pos refpos ("<<refpos<<") + oplen ("<<oplen<<") refLen ("<<refLen<<"): " << bam1_qname(b) << endl;
 					return 0;
 				}
-				for(DepthCounts::BaseCountType *baseCount = depthCounts.baseCounts + refpos; baseCount != endBase; baseCount++) {
-					(*baseCount)++;
-				}
+				depthCounts.add_depth(refpos, oplen);
+
 				if (depthCounts.mapQualities != NULL) {
-					endBase = depthCounts.mapQualities + refpos + oplen;
-					int mq = b->core.qual;
-					for(DepthCounts::BaseCountType *mapQual = depthCounts.mapQualities + refpos; mapQual != endBase; mapQual++) {
-						(*mapQual) += mq;
-					}
+					depthCounts.add_map_qual(refpos, oplen, b->core.qual);
 				}
 			}
 			if (ignoreEdges > 0 && refLen > 2*ignoreEdges) {
@@ -564,6 +561,7 @@ CountType caldepth(bam1_t *b, DepthCounts depthCounts = DepthCounts(), int32_t r
 		int32_t nm = bam_aux2i(NM);
 		if (nm < (int32_t) (insertions + deletions) ) {
 			if ( !(WARNING_FLAG&0x1) ) {
+				#pragma omp critical (OUTPUT)
 				fprintf(stderr, "WARNING: your aligner reports an incorrect NM field.  You should run samtools calmd! nm < ins + del: cmatch=%d nm=%d ( insert=%d + del=%d + mismatch=%d == %d) %s\n", cmatches, nm, insertions, deletions, mismatches, insertions+deletions+mismatches, bam1_qname(b));
 			}
 			WARNING_FLAG |= 0x1;
@@ -586,6 +584,7 @@ CountType caldepth(bam1_t *b, DepthCounts depthCounts = DepthCounts(), int32_t r
 							exactmatches -= cmatches;
 						} else {
 							if (!(WARNING_FLAG&0x2)) {
+								#pragma omp critical (OUTPUT)
 								fprintf(stderr, "Warning: Ambiguous base in maping (cmatch=%d) may be the source of NM (%d) discrepency ( insert=%d + del=%d + mismatch=%d == %d). corrected mismatch count to %d: %s\n", cmatches, nm, insertions, deletions, mismatches, insertions+deletions+mismatches, nm - (insertions + deletions), bam1_qname(b));
 							}
 							WARNING_FLAG |= 0x2;
@@ -610,6 +609,7 @@ CountType caldepth(bam1_t *b, DepthCounts depthCounts = DepthCounts(), int32_t r
 		}
 		if ( nm != (int32_t) (insertions + deletions + mismatches) ) {
 			if (!(WARNING_FLAG&0x04)) {
+				#pragma omp critical (OUTPUT)
 				fprintf(stderr, "Warning: consider running calmd on your bamfile! nm (%d) != insertions (%d) + deletions (%d) + mismatches (%d) (== %d) for read %s\n", nm, insertions, deletions, mismatches, insertions+deletions+mismatches, bam1_qname(b));
 			}
 			WARNING_FLAG |= 0x04;
@@ -742,7 +742,10 @@ VarianceType calculateVarianceContig(bam_header_t *header, uint32_t contigIdx, D
 std::string getContigDepthByBase(bam_header_t *header, uint32_t contigIdx, DepthCounts depthCounts, int32_t ignoreEdges = 0, float weightMapQual = 0.0) {
 	assert(depthCounts);
 	std::stringstream ss;
-	ss << header->target_name[contigIdx];
+	std::string nameonly = header->target_name[contigIdx];
+	int pos = nameonly.find_first_of(" \t");
+	nameonly = nameonly.substr(0,pos);
+	ss << nameonly;
 	uint32_t start = 0, end = header->target_len[contigIdx];
 
 	if ((int) end > 2*ignoreEdges) {
@@ -757,14 +760,14 @@ std::string getContigDepthByBase(bam_header_t *header, uint32_t contigIdx, Depth
 	ss << "\n";
 
 	if (weightMapQual > 0.0 && depthCounts.mapQualities != NULL) {
-		ss << header->target_name[contigIdx] << "-avgMapQual";
+		ss << nameonly << "-avgMapQual";
 		std::vector<float> avgMapQuals = getAvgMapQuals(header, contigIdx, depthCounts, ignoreEdges);
 		for(std::vector<float>::iterator itr = avgMapQuals.begin(); itr != avgMapQuals.end(); itr++) {
 			ss << "\t" << (int) (*itr+0.5); // round
 		}
 		ss << "\n";
 		std::vector<float> weights = avgMapQualsToWeights(avgMapQuals, weightMapQual);
-		ss << header->target_name[contigIdx] << "-mapWeights";
+		ss << nameonly << "-mapWeights";
 		for(std::vector<float>::iterator itr = weights.begin(); itr != weights.end(); itr++) {
 			ss << "\t" << *itr;
 		}


=====================================
src/metabat2.cpp
=====================================
@@ -81,12 +81,16 @@ int main(int ac, char* av[]) {
 	minCVSum = std::max(minCV, minCVSum);
 
 	boost::filesystem::path dir(outFile);
+	boost::system::error_code ec;
 	if (dir.parent_path().string().length() > 0) {
 		if (boost::filesystem::is_regular_file(dir.parent_path())) {
 			cerr << "Cannot create directory: " << dir.parent_path().string() << ", which exists as a regular file." << endl;
 			return 1;
 		}
-		boost::filesystem::create_directory(dir.parent_path());
+		if (!boost::filesystem::is_directory(dir.parent_path()) && !boost::filesystem::create_directory(dir.parent_path(), ec)) {
+			cerr << "Cannot create directory: " << dir.parent_path().string() << ": " << ec << endl;
+			return 1;
+		}
 	}
 
 	print_message("MetaBAT 2 (%s) using minContig %d, minCV %2.1f, minCVSum %2.1f, maxP %2.0f%%, minS %2.0f, maxEdges %d and minClsSize %d. with random seed=%lld\n", version.c_str(), minContig, minCV, minCVSum, maxP, minS, maxEdges, minClsSize, seed);
@@ -217,14 +221,19 @@ int main(int ac, char* av[]) {
 			return 1;
 		} else {
 			std::ofstream *os = NULL;
+			char os_buffer[buf_size];
+			std::string filteredFile_cls;
 			if (outUnbinned) {
-				std::string filteredFile_cls;
 				filteredFile_cls = outFile + ".";
 				filteredFile_cls.append("tooShort");
 				if (!onlyLabel) {
 					filteredFile_cls.append(".fa");
 				}
 				os = new std::ofstream(filteredFile_cls.c_str());
+				if (!os->is_open() || os->fail() || !*os) {
+					cerr << "[Error!] can't open the output bin file: " << filteredFile_cls << endl;
+					return 1;
+				}
 				os->rdbuf()->pubsetbuf(os_buffer, buf_size);
 				if (verbose) verbose_message("Outputting contigs that are too short to %s\n", filteredFile_cls.c_str());
 			}
@@ -256,7 +265,14 @@ int main(int ac, char* av[]) {
 			kseq_destroy(kseq);
 			kseq = NULL;
 			gzclose(f);
-			if (os) delete os;
+			if (os) {
+				os->close();
+				if (!*os) {
+					cerr << "[Error!] Failed to write to " << filteredFile_cls << endl;
+					return 1;
+				}
+				delete os;
+			}
 		}
 	}
 
@@ -282,14 +298,19 @@ int main(int ac, char* av[]) {
 		size_t num = 0, num1 = 0, nskip = 0, nskip1 = 0;
 
 		std::ofstream *os = NULL;
+		char os_buffer[buf_size];
+		std::string filteredFile_cls;
 		if (outUnbinned) {
-			std::string filteredFile_cls;
 			filteredFile_cls = outFile + ".";
 			filteredFile_cls.append("lowDepth");
 			if (!onlyLabel) {
 				filteredFile_cls.append(".fa");
 			}
 			os = new std::ofstream(filteredFile_cls.c_str());
+			if (!os->is_open() || os->fail() || !*os) {
+				cerr << "[Error!] Failed to open to " << filteredFile_cls << endl;
+				return 1;
+			}
 			os->rdbuf()->pubsetbuf(os_buffer, buf_size);
 			if (verbose) verbose_message("Outputting contigs that are too low depth to %s\n", filteredFile_cls.c_str());
 		}
@@ -435,7 +456,14 @@ int main(int ac, char* av[]) {
 			}
 		}
 		is.close();
-		if (os) delete os;
+		if (os) {
+			os->close();
+			if (!*os) {
+				cerr << "[Error!] Failed to write to " << filteredFile_cls << endl;
+				return 1;
+			}
+			delete os;
+		}
 
 		assert(nobs == num && nobs1 == num1);
 
@@ -674,7 +702,12 @@ int main(int ac, char* av[]) {
 					verbose_message("Building SCR Graph and Binning (%d vertices and %d edges) [P = %2.2f%%; %.1fGb / %.1fGb]                           \n", g2.connected_nodes.size(), g2.getEdgeCount(), p_schedule2[which_p] * 100, getUsedPhysMem(), getTotalPhysMem() / 1024 / 1024);
 
 					if (debug) {
-						std::ofstream os("cluster.log." + boost::lexical_cast<std::string>(which_p));
+						std::string osfileName("cluster.log." + boost::lexical_cast<std::string>(which_p));
+						std::ofstream os(osfileName);
+						if (!os) {
+							cerr << "[Error!] Failed to write to " << osfileName << endl;
+							return 1;
+						}
 						ClassMap _cls;
 						for (size_t i = 0; i < nobs; ++i) {
 							_cls[mems[i]].push_back(i);
@@ -691,6 +724,10 @@ int main(int ac, char* av[]) {
 							}
 						}
 						os.close();
+						if (!os) {
+							cerr << "[Error!] Failed to write to " << osfileName << endl;
+							return 1;
+						}
 					}
 
 					if (++which_p == p_schedule2.size())
@@ -756,43 +793,52 @@ int main(int ac, char* av[]) {
 			//1. calculate mean corr within bins
 			//2. cal mean corr from a contig to  a bin greater than mean and assign it to the best bin over the threshold
 			std::unordered_map<size_t, double> cls_corr;
+			#pragma omp parallel
+			#pragma omp single
 			for (auto it = cls.begin(); it != cls.end(); ++it) {
 				size_t kk = it->first;
-				size_t cs = cls[kk].size();
+				size_t cs = it->second.size();
 				if (cs >= minCS) {
-					double corr = 0;
-					#pragma omp parallel for schedule(dynamic) reduction(+:corr)
-					for (size_t i = 0; i < cs; ++i) {
-						for (size_t j = i + 1; j < cs; ++j) {
-							corr += cal_abd_corr(cls[kk][i], cls[kk][j]);
+					#pragma omp task
+					{
+						double corr = 0;
+						const auto &c = it->second;
+						for (size_t i = 0; i < cs; ++i) {
+							for (size_t j = i + 1; j < cs; ++j) {
+								corr += cal_abd_corr(c[i], c[j]);
+							}
 						}
+							
+						double x = corr / (cs * (cs - 1) / 2);
+						#pragma omp critical(CALC_MEAN_CORR)
+						cls_corr[kk] = x;
 					}
-					cls_corr[kk] = corr / (cs * (cs - 1) / 2);
 				}
 			}
 
 			verbose_message("Binning lost contigs...          \n");
 			ClassMap cls_leftovers;
-			#pragma omp parallel for schedule(dynamic)
+			#pragma omp parallel for schedule(dynamic,1)
 			for (size_t l = 0; l < leftovers.size(); ++l) {
 				int best_cls = -1;
 				for (auto it = cls.begin(); it != cls.end(); ++it) {
 					size_t kk = it->first;
-					size_t cs = cls[kk].size();
+					const auto &c = it->second;
+					size_t cs = c.size();
 					if (cs >= minCS) {
 						double corr = 0;
 						size_t i = 0;
 
 						//subset
 						for (; i < minCS; ++i)
-							corr += cal_abd_corr(cls[kk][i], leftovers[l]);
+							corr += cal_abd_corr(c[i], leftovers[l]);
 
 						//early stop
 						if (corr / minCS < cls_corr[kk])
 							continue;
 
 						for (; i < cs; ++i)
-							corr += cal_abd_corr(cls[kk][i], leftovers[l]);
+							corr += cal_abd_corr(c[i], leftovers[l]);
 
 						corr /= cs;
 						if (corr >= cls_corr[kk]) {
@@ -826,20 +872,21 @@ int main(int ac, char* av[]) {
 					int best_cls = -1;
 					for (auto it = cls.begin(); it != cls.end(); ++it) {
 						size_t kk = it->first;
-						size_t cs = cls[kk].size();
+						const auto &c = it->second;
+						size_t cs = c.size();
 						if (cs >= minCS) {
 							double corr = 0;
 							size_t i = 0;
 							//subset
 							for (; i < minCS; ++i)
-								corr += cal_abd_corr(cls[kk][i], s, true);
+								corr += cal_abd_corr(c[i], s, true);
 
 							//early stop
 							if (corr / minCS < cls_corr[kk])
 								continue;
 
 							for (; i < cs; ++i)
-								corr += cal_abd_corr(cls[kk][i], s, true);
+								corr += cal_abd_corr(c[i], s, true);
 
 							corr /= cs;
 							if (corr >= cls_corr[kk]) {
@@ -1321,13 +1368,15 @@ void output_bins (ClassMap& cls) {
 #pragma omp single
 {
 	Distance binnedSize = 0, binnedSize1 = 0;
+	std::vector<size_t> clsMap(nobs + nobs1, 0);
 
 	size_t bin_id = 1; // start with bin #1
 	for (auto it = cls.begin(); it != cls.end(); ++it) {
 		size_t kk = it->first;
+		assert(kk>=0);
 		size_t s = 0, s1 = 0;
 		{
-			auto &cluster = it->second; // in new block for compatiblity with old OpenMP standard that does not support references in private vars
+			const auto &cluster = it->second; // in new block for compatiblity with old OpenMP standard that does not support references in private vars
 
 			for (auto it2 = cluster.begin(); it2 != cluster.end(); ++it2) {
 				if (*it2 < (int) nobs) {
@@ -1340,6 +1389,11 @@ void output_bins (ClassMap& cls) {
 			if (s + s1 < minClsSize) {
 				continue;
 			}
+
+			for (size_t i = 0; i < cluster.size(); ++i) {
+				assert(cluster[i] < (int ) clsMap.size());
+				clsMap[cluster[i]] = kk + 1;
+			}
 		}
 
 		binnedSize += s;
@@ -1356,6 +1410,11 @@ void output_bins (ClassMap& cls) {
 
 			size_t bases = 0;
 			std::ofstream os(outFile_cls.c_str());
+			if (!os) {
+				cerr << "[Error!] Could not write to " << outFile_cls << endl;
+				exit(1);
+			}
+			char os_buffer[buf_size];
 			os.rdbuf()->pubsetbuf(os_buffer, buf_size);
 			for (auto it2 = cluster.begin(); it2 != cluster.end(); ++it2) {
 				std::string& label = (*it2 < (int) nobs) ? contig_names[*it2] : small_contig_names[*it2 - nobs];
@@ -1368,6 +1427,10 @@ void output_bins (ClassMap& cls) {
 				}
 			}
 			os.close();
+			if (!os) {
+				cerr << "[Error!] Failed to write to " << outFile_cls << endl;
+				exit(1);
+			}
 
 			if (debug)
 				cout << "Bin " << bin_id << " (" << bases << " bases in " << cluster.size() << " contigs) was saved to: " << outFile_cls << endl;
@@ -1376,23 +1439,18 @@ void output_bins (ClassMap& cls) {
 		bin_id++;
 	}
 
-#pragma omp task
-	if (saveCls || outUnbinned) {
-
-		std::vector<size_t> clsMap(nobs + nobs1, 0);
-
-		for (auto it = cls.begin(); it != cls.end(); ++it) {
-			size_t kk = it->first;
-			for (size_t i = 0; i < cls[kk].size(); ++i) {
-				assert(cls[kk][i] < (int ) clsMap.size());
-				clsMap[cls[kk][i]] = it->first + 1;
-			}
-		}
+	if (saveCls) {
+		#pragma omp task
+		{
 
-		if (saveCls) {
 			if (verbose) verbose_message("Saving cluster membership matrix to %s\n", outFile.c_str());
 
 			std::ofstream os(outFile.c_str());
+			if (!os) {
+				cerr << "[Error!] Could not write cluster membership to " << outFile << endl;
+				exit(1);
+			}
+			char os_buffer[buf_size];
 			os.rdbuf()->pubsetbuf(os_buffer, buf_size);
 
 			for (size_t i = 0; i < nobs; ++i) {
@@ -1404,9 +1462,16 @@ void output_bins (ClassMap& cls) {
 
 			os.flush();
 			os.close();
+			if (!os) {
+				cerr << "[Error!] Failed to write cluster membership to " << outFile << endl;
+				exit(1);
+			}
 		}
+	}
 
-		if (outUnbinned) {
+	if (outUnbinned) {
+		#pragma omp task
+		{
 			std::string outFile_cls = outFile + ".";
 			outFile_cls.append("unbinned");
 			if (!onlyLabel)
@@ -1415,6 +1480,11 @@ void output_bins (ClassMap& cls) {
 			if (verbose) verbose_message("Saving unbinned contigs to %s\n", outFile_cls.c_str());
 
 			std::ofstream os(outFile_cls.c_str());
+			if (!os) {
+				cerr << "[Error!] Could not to write unbinned contigs to " << outFile_cls << endl;
+				exit(1);
+			}
+			char os_buffer[buf_size];
 			os.rdbuf()->pubsetbuf(os_buffer, buf_size);
 
 			for (size_t i = 0; i < clsMap.size(); ++i) {
@@ -1430,6 +1500,10 @@ void output_bins (ClassMap& cls) {
 			}
 			os.flush();
 			os.close();
+			if (!os) {
+				cerr << "[Error!] Failed to write unbinned contigs to " << outFile_cls << endl;
+				exit(1);
+			}
 		}
 
 	}


=====================================
src/metabat2.h
=====================================
@@ -41,10 +41,11 @@ KSEQ_INIT(gzFile, gzread)
 #define BOOST_UBLAS_TYPE_CHECK 0
 
 #include <boost/program_options.hpp>
-#include <boost/algorithm/string.hpp>
+#include <string>
 #include <boost/math/distributions.hpp>
 #include <boost/dynamic_bitset.hpp>
 #include <boost/filesystem.hpp>
+#include <boost/system/error_code.hpp>
 
 #if (BOOST_VERSION / 100000 == 1) && (BOOST_VERSION / 100 % 1000 == 64)
 #include <boost/serialization/array_wrapper.hpp>
@@ -98,7 +99,6 @@ static const char line_delim = '\n';
 static const char tab_delim = '\t';
 static const char fasta_delim = '>';
 static const std::size_t buf_size = 1024 * 1024;
-static char os_buffer[buf_size];
 
 static std::vector<std::string> contig_names;
 static std::vector<std::string> small_contig_names;
@@ -259,12 +259,6 @@ public:
 
 void gen_tnf_graph(Graph& g, Similarity cutoff);
 
-unsigned long long rdtsc() {
-	unsigned int lo, hi;
-	__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-	return ((unsigned long long) hi << 32) | lo;
-}
-
 static void trim_fasta_label(std::string &label) {
 	size_t pos = label.find_first_of(" \t");
 	if (pos != std::string::npos)


=====================================
test/CMakeLists.txt
=====================================
@@ -1,14 +1,28 @@
 
 set(CTEST_OUTPUT_ON_FAILURE 1)
 
+file(MAKE_DIRECTORY  ${CMAKE_BINARY_DIR}/test-each)
 add_test(NAME test_depths
-         COMMAND ${CMAKE_BINARY_DIR}/src/jgi_summarize_bam_contig_depths --referenceFasta ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa --outputDepth ${CMAKE_BINARY_DIR}/contigs_depth.txt ${CMAKE_CURRENT_SOURCE_DIR}/contigs-1000.fastq.bam
+         COMMAND ${CMAKE_BINARY_DIR}/src/jgi_summarize_bam_contig_depths --referenceFasta ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa --outputDepth contigs_depth.txt ${CMAKE_CURRENT_SOURCE_DIR}/contigs-1000.fastq.bam
+         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/test-each
         )
 
+file(MAKE_DIRECTORY  ${CMAKE_BINARY_DIR}/test-each)
 add_test(NAME test_metabat1
-         COMMAND ${CMAKE_BINARY_DIR}/src/metabat1 --inFile ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa --abdFile ${CMAKE_BINARY_DIR}/contigs_depth.txt --outFile ${CMAKE_BINARY_DIR}/contigs_bins -v
+         COMMAND ${CMAKE_BINARY_DIR}/src/metabat1 --inFile ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa --abdFile contigs_depth.txt --outFile metabat1/contigs_bins -v
+         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/test-each
         )
 
+file(MAKE_DIRECTORY  ${CMAKE_BINARY_DIR}/test-each)
 add_test(NAME test_metabat2
-         COMMAND ${CMAKE_BINARY_DIR}/src/metabat2 --inFile ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa --abdFile ${CMAKE_BINARY_DIR}/contigs_depth.txt --outFile ${CMAKE_BINARY_DIR}/contigs_bins -v
+         COMMAND ${CMAKE_BINARY_DIR}/src/metabat2 --inFile ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa --abdFile contigs_depth.txt --outFile metabat2/contigs_bins -v
+         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/test-each
         )
+
+file(COPY ${CMAKE_SOURCE_DIR}/runMetaBat.sh DESTINATION ${CMAKE_BINARY_DIR}/src)
+file(MAKE_DIRECTORY  ${CMAKE_BINARY_DIR}/test-runMetaBat)
+add_test(NAME runMetaBat.sh
+         COMMAND  ${CMAKE_BINARY_DIR}/src/runMetaBat.sh ${CMAKE_CURRENT_SOURCE_DIR}/contigs.fa ${CMAKE_CURRENT_SOURCE_DIR}/contigs-1000.fastq.bam
+         WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/test-runMetaBat
+        )
+


=====================================
test/contigs.fa
=====================================
@@ -1,4 +1,4 @@
->NODE_1_length_5374_cov_8.558988
+>NODE_1_length_5374_cov_8.558988 with comment
 TAGCCTCGGTACGGTCAGGCATCCACGGCGCTTTAAAATAGTTGTTATAGATATTCAAAT
 AACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGTGCCAAGAAAAG
 CGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCAT



View it on GitLab: https://salsa.debian.org/med-team/metabat/-/compare/67d37d7f8b6c14d75997ae9cbf8e0e56b854ae95...ffd476cb77d1e683118f86eaeca3ddc3914e021f

-- 
View it on GitLab: https://salsa.debian.org/med-team/metabat/-/compare/67d37d7f8b6c14d75997ae9cbf8e0e56b854ae95...ffd476cb77d1e683118f86eaeca3ddc3914e021f
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20231108/f26f23e7/attachment-0001.htm>


More information about the debian-med-commit mailing list