[med-svn] [canu] 02/06: Imported Upstream version 1.3+dfsg
Afif Elghraoui
afif at moszumanska.debian.org
Wed Jul 13 15:49:18 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository canu.
commit bce57ac2ee79b176f5a35ce4b380e09995b79f36
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Wed Jul 13 07:30:06 2016 -0700
Imported Upstream version 1.3+dfsg
---
README.md | 32 +-
addCopyrights-BuildData.pl | 10 +-
addCopyrights.dat | 3119 +++-----------------
addCopyrights.pl | 2 +-
documentation/source/commands/bogart.rst | 34 +-
.../source/commands/gatekeeperDumpFASTQ.rst | 1 +
documentation/source/commands/ovStoreDump.rst | 10 +-
documentation/source/conf.py | 4 +-
documentation/source/faq.rst | 124 +
documentation/source/index.rst | 2 +
documentation/source/parameter-reference.rst | 124 +-
documentation/source/quick-start.rst | 140 +-
documentation/source/tutorial.rst | 27 +-
src/AS_UTL/AS_UTL_stackTrace.C | 14 +-
src/AS_UTL/intervalList.H | 6 +-
.../intervalListTest.C} | 56 +-
src/AS_UTL/memoryMappedFile.H | 8 +-
src/AS_UTL/stddev.H | 92 +-
src/AS_UTL/stddevTest.C | 116 +
src/AS_global.H | 6 +
src/Makefile | 39 +-
src/bogart/AS_BAT_BestOverlapGraph.C | 1015 +++----
src/bogart/AS_BAT_BestOverlapGraph.H | 192 +-
src/bogart/AS_BAT_Breaking.C | 332 ---
src/bogart/AS_BAT_Breaking.H | 112 -
src/bogart/AS_BAT_ChunkGraph.C | 28 +-
src/bogart/AS_BAT_ChunkGraph.H | 14 +-
src/bogart/AS_BAT_Datatypes.H | 339 ---
src/bogart/AS_BAT_FragmentInfo.C | 18 +-
src/bogart/AS_BAT_FragmentInfo.H | 123 +
src/bogart/AS_BAT_Instrumentation.C | 494 +++-
src/bogart/AS_BAT_Instrumentation.H | 3 +-
src/bogart/AS_BAT_IntersectBubble.C | 660 -----
src/bogart/AS_BAT_IntersectSplit.C | 334 ---
src/bogart/AS_BAT_IntersectSplit.H | 151 -
src/bogart/AS_BAT_Joining.C | 405 ---
src/bogart/AS_BAT_Logging.C | 76 +-
src/bogart/AS_BAT_Logging.H | 17 +-
src/bogart/AS_BAT_MarkRepeatReads.C | 1133 +++++++
...BAT_PlaceZombies.H => AS_BAT_MarkRepeatReads.H} | 27 +-
src/bogart/AS_BAT_MergeSplitJoin.C | 1763 -----------
src/bogart/AS_BAT_MergeSplitJoin.H | 57 -
src/bogart/AS_BAT_MergeUnitigs.C | 246 ++
.../stddev.C => bogart/AS_BAT_MergeUnitigs.H} | 24 +-
src/bogart/AS_BAT_Outputs.C | 490 +--
src/bogart/AS_BAT_Outputs.H | 17 +-
src/bogart/AS_BAT_OverlapCache.C | 67 +-
src/bogart/AS_BAT_OverlapCache.H | 70 +-
src/bogart/AS_BAT_PlaceContains.C | 279 +-
src/bogart/AS_BAT_PlaceContains.H | 11 +-
src/bogart/AS_BAT_PlaceFragUsingOverlaps.C | 501 +---
src/bogart/AS_BAT_PlaceFragUsingOverlaps.H | 13 +-
src/bogart/AS_BAT_PlaceZombies.C | 110 -
src/bogart/AS_BAT_PopBubbles.C | 679 +++++
.../{AS_BAT_Joining.H => AS_BAT_PopBubbles.H} | 27 +-
src/bogart/AS_BAT_PopBubbles.txt | 87 +
src/bogart/AS_BAT_PopulateUnitig.C | 54 +-
src/bogart/AS_BAT_PromoteToSingleton.C | 4 +-
.../AS_BAT_PromoteToSingleton.H} | 18 +-
src/bogart/AS_BAT_ReconstructRepeats.C | 15 +-
src/bogart/AS_BAT_RepeatJunctionEvidence.H | 232 --
src/bogart/AS_BAT_SetParentAndHang.C | 280 +-
src/bogart/AS_BAT_SetParentAndHang.H | 2 +
src/bogart/AS_BAT_SplitDiscontinuous.C | 52 +-
src/bogart/AS_BAT_Unitig.C | 505 +++-
src/bogart/AS_BAT_Unitig.H | 280 +-
src/bogart/AS_BAT_UnitigVector.C | 218 ++
src/bogart/AS_BAT_UnitigVector.H | 61 +
src/bogart/AS_BAT_Unitig_AddAndPlaceFrag.C | 144 -
src/bogart/AS_BAT_Unitig_AddFrag.C | 32 +-
src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C | 561 +---
src/bogart/AS_BAT_findEdges.C | 178 --
src/bogart/bogart.C | 299 +-
src/bogart/bogart.mk | 13 +-
src/bogart/buildGraph.C | 324 +-
src/correction/errorEstimate.C | 206 ++
src/{mhap/mhap.mk => correction/errorEstimate.mk} | 11 +-
src/falcon_sense/falcon_sense.C | 10 +-
src/falcon_sense/libfalcon/falcon.C | 19 +-
src/falcon_sense/libfalcon/falcon.H | 2 +-
src/falcon_sense/libfalcon/kmer_lookup.C | 0
src/main.mk | 4 +-
src/merTrim/merTrimAdapter.C | 0
src/meryl/libleaff/fastaFile.C | 3 -
src/meryl/libleaff/fastaStdin.C | 14 +-
src/meryl/libleaff/fastqFile.C | 4 -
src/meryl/libleaff/fastqStdin.C | 7 +-
src/meryl/libleaff/gkStoreFile.C | 2 +-
src/meryl/libleaff/seqCache.C | 3 -
src/meryl/libleaff/seqFile.H | 2 +
src/meryl/libmeryl.C | 125 +-
src/meryl/libmeryl.H | 7 +-
src/meryl/meryl-build.C | 65 +-
src/meryl/meryl-dump.C | 4 +-
src/mhap/mhap.mk | 4 +-
.../correctOverlaps-Correct_Frags.C | 13 +-
.../correctOverlaps-Prefix_Edit_Distance.C | 6 +-
.../correctOverlaps-Read_Olaps.C | 7 +
.../correctOverlaps-Redo_Olaps.C | 21 +-
src/overlapErrorAdjustment/correctOverlaps.C | 30 +
src/overlapErrorAdjustment/correctOverlaps.H | 6 +
.../findErrors-Analyze_Alignment.C | 30 +-
src/overlapErrorAdjustment/findErrors-Dump.C | 97 +
.../findErrors-Dump.mk} | 11 +-
src/overlapErrorAdjustment/findErrors.C | 8 +
.../overlapInCore-Process_String_Overlaps.C | 11 +-
src/overlapInCore/overlapInCore.C | 8 +-
src/overlapInCore/overlapInCore.H | 30 +-
src/overlapInCore/overlapPair.C | 27 +-
src/pipelines/bogart-sweep.pl | 185 ++
src/pipelines/canu.pl | 27 +-
src/pipelines/canu/Configure.pm | 118 +-
src/pipelines/canu/CorrectReads.pm | 30 +-
src/pipelines/canu/CorrectReads.txt | 41 +
src/pipelines/canu/Defaults.pm | 118 +-
src/pipelines/canu/ErrorEstimate.pm | 258 ++
src/pipelines/canu/Execution.pm | 103 +-
src/pipelines/canu/Execution.txt | 85 +
src/pipelines/canu/Gatekeeper.pm | 22 +-
src/pipelines/canu/Grid_LSF.pm | 9 +
src/pipelines/canu/Grid_PBSTorque.pm | 33 +-
src/pipelines/canu/Grid_SGE.pm | 11 +
src/pipelines/canu/Grid_Slurm.pm | 43 +-
src/pipelines/canu/HTML.pm | 144 +-
src/pipelines/canu/Meryl.pm | 44 +-
src/pipelines/canu/Output.pm | 8 +-
src/pipelines/canu/OverlapBasedTrimming.pm | 9 +-
src/pipelines/canu/OverlapErrorAdjustment.pm | 59 +-
src/pipelines/canu/OverlapInCore.pm | 13 +
src/pipelines/canu/OverlapMMap.pm | 77 +-
src/pipelines/canu/OverlapMhap.pm | 160 +-
src/pipelines/canu/OverlapStore.pm | 48 +-
src/pipelines/canu/Unitig.pm | 58 +-
src/stores/gatekeeperCreate.C | 6 +-
src/stores/gatekeeperDumpFASTQ.C | 90 +-
src/stores/gkStore.C | 24 +-
src/stores/gkStore.H | 10 +-
src/stores/gkStoreEncode.C | 4 +-
src/stores/ovOverlap.C | 19 +
src/stores/ovStore.H | 6 +
src/stores/ovStoreBuild.C | 103 +-
src/stores/ovStoreDump.C | 389 ++-
src/stores/ovStoreStats.C | 73 +-
src/stores/tgStoreCompress.C | 170 ++
src/{mhap/mhap.mk => stores/tgStoreCompress.mk} | 12 +-
src/stores/tgStoreCoverageStat.C | 66 +-
src/stores/tgStoreLoad.C | 6 +-
src/utgcns/libNDFalcon/dw.C | 0
src/utgcns/libNDFalcon/dw.H | 0
src/utgcns/libcns/NOTES | 38 +
src/utgcns/utgcns.C | 1 +
151 files changed, 9426 insertions(+), 11243 deletions(-)
diff --git a/README.md b/README.md
index eb6c47b..da8c900 100644
--- a/README.md
+++ b/README.md
@@ -1,32 +1,36 @@
# Canu
-<img style="float: left; margin: 50px 50px;" align=left src="https://raw.githubusercontent.com/marbl/canu/master/logo.jpg" width="125" /> Canu is a fork of the [Celera Assembler](http://wgs-assembler.sourceforge.net/wiki/index.php?title=Main_Page "Celera Assembler") designed for high-noise single-molecule sequencing (such as the PacBio RSII or Oxford Nanopore MinION). The software is currently alpha level, feel free to use and report issues encountered.
+Canu is a fork of the [Celera Assembler](http://wgs-assembler.sourceforge.net/wiki/index.php?title=Main_Page), designed for high-noise single-molecule sequencing (such as the [PacBio](http://www.pacb.com) [RS II](http://www.pacb.com/products-and-services/pacbio-systems/rsii/) or [Oxford Nanopore](https://www.nanoporetech.com/) [MinION](https://www.nanoporetech.com/products-services/minion-mki)).
-## Build
+Canu is a hierachical assembly pipeline which runs in four steps:
+
+* Detect overlaps in high-noise sequences using [MHAP](https://github.com/marbl/MHAP)
+* Generate corrected sequence consensus
+* Trim corrected sequences
+* Assemble trimmed corrected sequences
+
+## Build:
git clone https://github.com/marbl/canu.git
cd canu/src
make -j <number of threads>
-
-For a quick user-quide, run:
+
+## Run:
+
+Brief command line help:
../<achitechture>/bin/canu
-For full list of options, run:
+Full list of parameters:
../<architecture>/bin/canu -options
-## Docs
-Canu is a hierachical assembly pipeline which runs in four steps:
+## Learn:
-* Detect overlaps in high-noise sequences using [MHAP](https://github.com/marbl/MHAP "MHAP")
-* Generate corrected sequence consensus
-* Trim corrected sequences
-* Assemble trimmed corrected sequences
+The [quick start](http://canu.readthedocs.io/en/stable/quick-start.html) will get you assembling quickly, while the [tutorial](http://canu.readthedocs.io/en/stable/tutorial.html) explains things in more detail.
-Read the [documentation](http://canu.readthedocs.org/ "docs")
+## Citation:
-## Citation
- - Berlin K, Koren S, Chin CS, Drake PJ, Landolin JM, Phillippy AM [Assembling Large Genomes with Single-Molecule Sequencing and Locality Sensitive Hashing](http://www.nature.com/nbt/journal/v33/n6/abs/nbt.3238.html "nb"). Nature Biotechnology. (2015).
+ - Berlin K, Koren S, Chin CS, Drake PJ, Landolin JM, Phillippy AM [Assembling Large Genomes with Single-Molecule Sequencing and Locality Sensitive Hashing](http://www.nature.com/nbt/journal/v33/n6/abs/nbt.3238.html). Nature Biotechnology. (2015).
- Stay tuned for a Canu-specific citation
diff --git a/addCopyrights-BuildData.pl b/addCopyrights-BuildData.pl
index 7b6c46e..82362c2 100644
--- a/addCopyrights-BuildData.pl
+++ b/addCopyrights-BuildData.pl
@@ -22,10 +22,16 @@ if (! -e "logs") {
system("git log --name-status > logs");
}
+# Update this after each copyright update commit, please. Best method is to commit
+# the copyright changes -- none of the addCopyrights files, -- update this file and
+# Then commit the addCopyrights files.
+
my %stoppingCommits;
-$stoppingCommits{"6950cb74e302a97673a5ba482b3b8992eea72c37"} = 1; # Initial copyright addition.
-$stoppingCommits{"72c27c95d61cb8f37e859c4039456eb2acc5c55b"} = 1; # Second copyright addition.
+$stoppingCommits{"6950cb74e302a97673a5ba482b3b8992eea72c37"} = 1; # 20 AUG 2015 - Initial copyright addition.
+$stoppingCommits{"72c27c95d61cb8f37e859c4039456eb2acc5c55b"} = 1; # 19 NOV 2015 - Second copyright addition.
+$stoppingCommits{"b2df5790f77d38cc31fe77a7f65360e02389f92e"} = 1; # 04 MAR 2016
+$stoppingCommits{"1ef335952342ef06ad1651a888f09c312f54dab8"} = 1; # 18 MAY 2016
open(F, "< logs") or die "Failed to open 'logs': $!\n";
diff --git a/addCopyrights.dat b/addCopyrights.dat
index 3bc6877..dba4a79 100644
--- a/addCopyrights.dat
+++ b/addCopyrights.dat
@@ -9005,28 +9005,6 @@ A src/overlapInCore/overlapImport.C nihh20160217Sergey Koren
A src/AS_global.H nihh20160215Sergey Koren
A src/Makefile nihh20160215Sergey Koren
A src/overlapInCore/liboverlap/prefixEditDistance-matchLimitGenerate.C nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/config/compiler/clang.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/config/stdlib/libcpp.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/move/detail/iterator_traits.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/move/detail/meta_utils_core.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/move/detail/type_traits.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/move/detail/workaround.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/move/utility_core.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/insert_impl.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/mpl/insert.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/mpl/insert_range_fwd.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/insert_range_impl.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/is_transparent.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/ord_index_impl_fwd.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/promotes_arg.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/raw_ptr.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/predef/os/haiku.h nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/predef/version.h nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/expand.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/detail/is_empty.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/detail/is_single_return.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_base_clang.hpp nihh20160215Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_copy_assignable.hpp nihh20160215Sergey Koren
A src/correction/generateCorrectionLayouts.C nihh20160212Sergey Koren
A src/overlapInCore/overlapPair.C nihh20160212Sergey Koren
A src/overlapInCore/overlapPair.mk nihh20160212Sergey Koren
@@ -9397,1812 +9375,6 @@ A documentation/source/tutorial.rst nihh20151230Sergey Koren
A documentation/source/tutorial.rst nihh20151230Sergey Koren
A documentation/source/quick-start.rst nihh20151229Sergey Koren
A src/utgcns/libpbutgcns/LICENSE nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/aligned_storage.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/archive_exception.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/detail/abi_prefix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/detail/abi_suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/detail/decl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/call_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/checked_delete.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/backward_compatibility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/concept_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/concept_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/general.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/has_constraints.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/usage.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept_archetype.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept_check.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/abi_prefix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/abi_suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/compiler/gcc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/no_tr1/cmath.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/no_tr1/memory.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/no_tr1/utility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/platform/linux.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/platform/macos.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/posix_features.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/select_compiler_config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/select_platform_config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/select_stdlib_config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/stdlib/libstdcpp3.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/user.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/addressof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/checked_delete.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/demangle.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/enable_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/explicit_operator_bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/no_exceptions_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/noncopyable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/ref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/typeinfo.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/cstdint.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/current_function.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/allocator_utilities.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/call_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/container_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/indirect_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/no_exceptions_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/reference_content.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/select_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/sp_typeinfo.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/workaround.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/exception/exception.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/foreach_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/alt_sstream.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/alt_sstream_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/compat_workarounds.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/config_macros.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/msvc_disambiguater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/unset_macros.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/workarounds_gcc-2_95.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/workarounds_stlport.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/exceptions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/feed_args.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/format_class.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/format_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/format_implementation.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/free_funcs.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/group.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/internals.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/internals_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/parsing.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/detail/float_functions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/detail/hash_float.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/detail/limits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/extensions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/hash.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/hash_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/adjacency_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/adjacency_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/buffer_concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/detail/adj_list_edge_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/detail/adjacency_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/detail/edge.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_mutability_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_selectors.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/named_graph.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/numeric_values.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/properties.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/property_maps/constant_property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/property_maps/null_property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/integer/static_log2.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/integer_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/config_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/config_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/enable_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/facade_iterator_category.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/interoperable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_adaptor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_categories.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_facade.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/reverse_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/limits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/algorithm.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/core.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/detail/config_begin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/detail/config_end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/detail/meta_utils.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/move.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/utility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/O1_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/O1_size_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/advance.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/advance_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/always.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/apply_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/apply_wrap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/arg.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/arg_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/at.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/at_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/O1_size_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/adl_barrier.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/advance_backward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/advance_forward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arg_typedef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arithmetic_op.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arity_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/at_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/begin_end_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/clear_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/common_name_wknd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/comparison_op.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/adl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/arrays.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/bcc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/bind.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/compiler.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/ctps.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/dtp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/eti.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/forwarding.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/gcc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/gpu.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/has_apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/has_xxx.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/integral.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/intel.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/msvc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/msvc_typename.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/nttp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/operators.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/overload_resolution.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/pp_counter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/preprocessor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/static_constant.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/ttp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/typeof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/use_preprocessed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/workaround.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/contains_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/empty_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/find_if_pred.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/full_lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_begin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_key_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/include_preprocessed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/inserter_algorithm.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/integral_wrapper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/is_msvc_eti_arg.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/iter_apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/iter_fold_if_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/lambda_arity_param.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/lambda_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/lambda_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/largest_int.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/msvc_eti_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/msvc_never_true.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/msvc_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na_assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/nested_type_wknd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/nttp_decl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/numeric_cast_utils.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/numeric_op.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/overload_names.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/advance_backward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/advance_forward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/arg.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/bind.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/greater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/iter_fold_if_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/less.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/quote.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/reverse_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/reverse_iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/vector.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/def_params_tail.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/default_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/ptr_to_ref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/push_back_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/push_front_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/reverse_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/reverse_iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/size_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/static_cast.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/template_arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/template_arity_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/traits_lambda_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/type_wrapper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/value_wknd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/yes_no.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/back_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/back_inserter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/begin_end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/begin_end_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bind.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bind_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bool_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/clear.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/clear_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/comparison.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/contains.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/contains_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/deref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/distance.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/distance_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/empty_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/erase_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/erase_key_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/eval_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/find.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/find_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/front_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/front_inserter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/greater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/greater_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/has_key.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/has_key_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/has_xxx.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/insert_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/inserter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/int.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/int_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/integral_c.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/integral_c_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/integral_c_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/is_sequence.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iter_fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iter_fold_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iterator_category.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iterator_range.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iterator_tags.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/key_type_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/lambda_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/less.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/limits/arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/limits/vector.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/logical.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/long.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/long_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/min_max.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/negate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/next.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/next_prior.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/not_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/numeric_cast.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pair.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pair_view.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/placeholders.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pop_back_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pop_front_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/prior.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/protect.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_back_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_front_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/quote.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/reverse_fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/reverse_iter_fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/same_as.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/sequence_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/sequence_tag_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/at_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/begin_end_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/clear_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/empty_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/erase_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/erase_key_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/has_key_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/insert_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/item.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/key_type_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/set0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/size_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/value_type_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/set0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size_t.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size_t_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/transform.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/value_type_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/O1_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/at.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/begin_end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/clear.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/include_preprocessed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/item.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/pop_back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/pop_front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/preprocessed/typeof_based/vector10.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/preprocessed/typeof_based/vector20.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/push_back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/push_front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/vector0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/vector0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/vector10.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/vector20.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/void.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/void_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/access_specifier.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/adl_swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/archive_constructed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/auto_space.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/base_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/bucket_array.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/converter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/copy_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/do_not_copy_elements_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/has_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/hash_index_args.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/hash_index_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/hash_index_node.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/header_holder.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_loader.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_matcher.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_node_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_saver.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/is_index_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/modify_key_adaptor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/no_duplicate_tags.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/node_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/ord_index_args.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/safe_mode.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/scope_guard.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/serialization_version.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/vartempl_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/hashed_index.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/hashed_index_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/identity_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/indexed_by.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/member.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/ordered_index_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index_container.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index_container_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/next_prior.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/noncopyable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/none.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/none_t.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/operators.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/bad_optional_access.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/optional.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/optional_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pending/container_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pending/detail/property.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pending/property.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pointer_to_other.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/alpha.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/arm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/blackfin.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/convex.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/ia64.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/m68k.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/mips.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/parisc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/ppc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/pyramid.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/rs6k.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/sparc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/superh.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/sys370.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/sys390.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/x86.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/x86/32.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/x86/64.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/z.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/borland.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/clang.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/comeau.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/compaq.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/diab.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/digitalmars.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/dignus.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/edg.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/ekopath.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/gcc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/gcc_xml.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/greenhills.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/hp_acc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/iar.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/ibm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/intel.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/kai.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/llvm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/metaware.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/metrowerks.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/microtec.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/mpw.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/palm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/pgi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/sgi_mipspro.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/sunpro.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/tendra.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/visualc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/watcom.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/_cassert.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/_exception.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/comp_detected.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/os_detected.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/test.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language/objc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language/stdc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language/stdcpp.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/_prefix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/gnu.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/uc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/vms.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/zos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/_prefix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/cxx.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/dinkumware.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/libcomo.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/modena.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/msl.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/roguewave.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/sgi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/stdcpp3.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/stlport.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/vacpp.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/make.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/aix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/amigaos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/android.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/beos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/bsdi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/dragonfly.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/free.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/net.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/open.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/cygwin.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/hpux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/ios.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/irix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/linux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/macos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/os400.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/qnxnto.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/solaris.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/unix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/vms.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/windows.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/other.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/other/endian.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/mingw.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_desktop.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_phone.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_runtime.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_store.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/version_number.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/add.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/dec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/detail/div_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/inc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/mod.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/sub.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/array/data.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/array/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/array/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/cat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/comma_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/comparison/less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/config/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/deduce_d.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/detail/while.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/expr_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/expr_iif.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/iif.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/while.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/debug/error.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/dec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/detail/auto_rec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/detail/check.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/detail/is_binary.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/intercept.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/overload.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/inc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/detail/bounds/lower1.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/detail/bounds/upper1.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/detail/iter/forward1.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/iterate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/adt.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/append.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/detail/fold_left.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/detail/fold_right.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/fold_left.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/fold_right.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/for_each_i.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/reverse.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/transform.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/bitand.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/compl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/punctuation/comma.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/punctuation/comma_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/punctuation/paren.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repeat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/detail/for.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum_binary_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum_trailing_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/for.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/repeat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/repeat_from_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/cat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/fold_left.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/for_each_i.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/seq.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/transform.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/slot/detail/def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/slot/detail/shared.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/slot/slot.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/stringize.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/eat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/rem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/to_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/variadic/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/variadic/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/property_map/property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/property_map/vector_property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/algorithm/equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/begin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/const_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/common.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/extract_optional_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/has_member_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/implementation_help.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/misc_concept.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/msvc_has_iterator_workaround.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/safe_bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/sfinae.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/difference_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/distance.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/functions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/has_range_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/irange.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator_range.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator_range_core.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator_range_io.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/mutable_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/range_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/rbegin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/rend.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/reverse_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/size_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/value_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/ref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/scoped_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/access.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/base_object.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/collection_size_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/force_include.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/is_bitwise_serializable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/level.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/level_enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/nvp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/pfto.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/serialization.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/split_free.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/split_member.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/strong_typedef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/tracking.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/tracking_enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/type_info_implementation.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/version.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/void_cast_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/wrapper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/shared_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/bad_weak_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/operator_bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/shared_count.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_convertible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_has_sync.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_nullptr_t.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/spinlock.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/spinlock_pool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/spinlock_sync.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/yield_k.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/scoped_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/shared_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/static_assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/throw_exception.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/tuple/detail/tuple_basic.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/tuple/tuple.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_const.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_cv.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_lvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_rvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_volatile.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/aligned_storage.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/alignment_of.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/common_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/conditional.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/conversion_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/cv_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/decay.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/bool_trait_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/bool_trait_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/cv_traits_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/false_result.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/has_binary_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/has_postfix_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/has_prefix_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_eq.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/is_function_ptr_helper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/size_t_trait_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/size_t_trait_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/template_arity_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/type_trait_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/type_trait_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/yes_no_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/extent.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/floating_point_promotion.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/function_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_and_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_or_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_xor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_xor_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_complement.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_dereference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_divides.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_divides_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_greater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_greater_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_left_shift.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_left_shift_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_less.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_logical_and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_logical_not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_logical_or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_minus_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_modulus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_modulus_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_multiplies.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_multiplies_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_negate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_new_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_not_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_constructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_copy.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_destructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_plus_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_post_decrement.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_post_increment.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_pre_decrement.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_pre_increment.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_right_shift.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_right_shift_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_constructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_copy.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_destructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_move_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_move_constructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_unary_minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_unary_plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_virtual_destructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/ice.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/integral_constant.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/integral_promotion.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/intrinsics.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_abstract.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_arithmetic.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_array.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_base_and_derived.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_base_of.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_class.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_complex.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_compound.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_const.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_convertible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_copy_constructible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_float.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_floating_point.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_function.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_fundamental.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_integral.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_lvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_member_function_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_member_object_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_member_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_nothrow_move_assignable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_nothrow_move_constructible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_object.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_pod.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_polymorphic.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_rvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_same.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_scalar.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_signed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_stateless.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_union.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_unsigned.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_virtual_base_of.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_void.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_volatile.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/make_signed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/make_unsigned.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/promote.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/rank.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_all_extents.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_bounds.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_const.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_cv.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_extent.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_volatile.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/same_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/type_with_alignment.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/typeof/message.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/typeof/native.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/typeof/typeof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/allocate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/buckets.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/equivalent.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/extract_key.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/table.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/unique.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/util.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_map_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_set.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_set_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered_set.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/addressof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/base_from_member.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/binary.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/compare_pointees.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/declval.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/detail/in_place_factory_prefix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/detail/in_place_factory_suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/enable_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/identity_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/in_place_factory.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/value_init.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/none_t.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/optional_io.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/aix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/amigaos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/android.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/beos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/bsdi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/dragonfly.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/free.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/net.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/open.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/cygwin.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/hpux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/irix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/linux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/macos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/os400.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/qnxnto.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/solaris.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/unix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/vms.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/windows.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/aix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/amigaos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/android.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/beos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/bsdi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/dragonfly.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/free.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/net.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/open.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/cygwin.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/hpux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/irix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/linux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/macos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/os400.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/qnxnto.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/solaris.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/unix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/vms.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/windows.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/aligned_storage.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/archive_exception.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/detail/abi_prefix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/detail/abi_suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/archive/detail/decl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/call_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/checked_delete.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/backward_compatibility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/concept_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/concept_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/general.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/detail/has_constraints.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept/usage.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept_archetype.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/concept_check.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/abi_prefix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/abi_suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/compiler/gcc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/no_tr1/cmath.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/no_tr1/memory.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/no_tr1/utility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/platform/linux.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/posix_features.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/select_compiler_config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/select_platform_config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/select_stdlib_config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/stdlib/libstdcpp3.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/config/suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/addressof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/checked_delete.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/demangle.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/enable_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/explicit_operator_bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/no_exceptions_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/noncopyable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/ref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/core/typeinfo.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/allocator_utilities.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/call_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/container_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/indirect_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/no_exceptions_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/reference_content.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/select_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/sp_typeinfo.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/detail/workaround.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/foreach_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/alt_sstream.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/alt_sstream_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/compat_workarounds.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/config_macros.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/msvc_disambiguater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/unset_macros.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/workarounds_gcc-2_95.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/detail/workarounds_stlport.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/exceptions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/feed_args.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/format_class.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/format_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/format_implementation.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/free_funcs.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/group.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/internals.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/internals_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/format/parsing.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/detail/float_functions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/detail/hash_float.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/detail/limits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/extensions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/hash.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash/hash_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/functional/hash_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/adjacency_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/adjacency_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/buffer_concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/detail/adj_list_edge_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/detail/adjacency_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/detail/edge.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_mutability_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_selectors.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/graph_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/named_graph.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/numeric_values.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/properties.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/property_maps/constant_property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/graph/property_maps/null_property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/integer/static_log2.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/integer_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/config_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/config_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/enable_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/detail/facade_iterator_category.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/interoperable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_adaptor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_categories.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_facade.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/iterator_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/iterator/reverse_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/limits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/algorithm.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/core.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/detail/config_begin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/detail/config_end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/detail/meta_utils.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/move.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/move/utility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/O1_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/O1_size_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/advance.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/advance_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/always.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/apply_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/apply_wrap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/arg.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/at.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/at_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/O1_size_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/adl_barrier.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/advance_backward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/advance_forward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arg_typedef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arithmetic_op.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/arity_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/at_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/begin_end_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/clear_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/common_name_wknd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/comparison_op.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/adl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/arrays.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/bcc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/bind.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/compiler.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/ctps.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/dtp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/eti.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/forwarding.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/gcc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/gpu.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/has_apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/has_xxx.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/integral.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/intel.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/msvc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/msvc_typename.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/nttp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/operators.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/overload_resolution.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/pp_counter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/preprocessor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/static_constant.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/ttp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/typeof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/use_preprocessed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/config/workaround.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/contains_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/empty_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/find_if_pred.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/full_lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_begin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_key_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/has_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/include_preprocessed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/inserter_algorithm.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/integral_wrapper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/is_msvc_eti_arg.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/iter_apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/iter_fold_if_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/lambda_arity_param.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/lambda_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/lambda_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/largest_int.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/msvc_eti_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/msvc_never_true.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/msvc_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na_assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/na_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/nested_type_wknd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/nttp_decl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/numeric_cast_utils.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/numeric_op.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/overload_names.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/advance_backward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/advance_forward.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/apply.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/arg.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/bind.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/greater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/iter_fold_if_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/less.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/quote.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/reverse_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/reverse_iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessed/gcc/vector.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/def_params_tail.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/default_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/preprocessor/params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/ptr_to_ref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/push_back_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/push_front_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/reverse_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/reverse_iter_fold_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/size_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/static_cast.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/template_arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/template_arity_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/traits_lambda_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/type_wrapper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/value_wknd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/aux_/yes_no.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/back_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/back_inserter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/begin_end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/begin_end_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bind_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/bool_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/clear.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/clear_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/comparison.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/contains.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/contains_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/deref.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/distance.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/distance_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/empty_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/erase_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/erase_key_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/eval_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/find.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/front_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/front_inserter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/greater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/greater_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/has_key.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/has_key_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/has_xxx.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/insert_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/inserter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/int.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/integral_c.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/integral_c_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/integral_c_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/is_sequence.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iter_fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iter_fold_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iterator_category.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iterator_range.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/iterator_tags.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/key_type_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/lambda.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/lambda_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/less.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/limits/arity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/limits/vector.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/logical.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/long.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/long_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/min_max.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/negate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/next.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/next_prior.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/not_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/numeric_cast.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pair_view.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/placeholders.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pop_back_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/pop_front_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/prior.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/protect.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_back_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/push_front_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/quote.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/reverse_fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/reverse_iter_fold.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/same_as.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/sequence_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/sequence_tag_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/at_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/begin_end_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/clear_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/empty_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/erase_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/erase_key_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/has_key_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/insert_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/item.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/key_type_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/set0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/size_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/aux_/value_type_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/set/set0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size_t.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/size_t_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/transform.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/value_type_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/O1_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/at.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/begin_end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/clear.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/include_preprocessed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/item.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/pop_back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/pop_front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/preprocessed/typeof_based/vector10.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/preprocessed/typeof_based/vector20.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/push_back.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/push_front.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/aux_/vector0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/vector0.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/vector10.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/vector/vector20.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/void.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/mpl/void_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/access_specifier.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/adl_swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/archive_constructed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/auto_space.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/base_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/bucket_array.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/converter.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/copy_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/do_not_copy_elements_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/has_tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/hash_index_args.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/hash_index_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/hash_index_node.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/header_holder.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_loader.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_matcher.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_node_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/index_saver.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/is_index_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/modify_key_adaptor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/no_duplicate_tags.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/node_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/ord_index_args.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/safe_mode.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/scope_guard.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/serialization_version.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/detail/vartempl_support.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/hashed_index.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/hashed_index_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/identity_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/indexed_by.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/member.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/ordered_index_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index/tag.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index_container.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/multi_index_container_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/next_prior.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/noncopyable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/none.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/operators.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/bad_optional_access.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/optional.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/optional/optional_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pending/container_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pending/detail/property.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pending/property.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/pointer_to_other.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/alpha.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/arm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/blackfin.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/convex.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/ia64.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/m68k.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/mips.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/parisc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/ppc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/pyramid.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/rs6k.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/sparc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/superh.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/sys370.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/sys390.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/x86.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/x86/32.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/x86/64.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/architecture/z.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/borland.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/clang.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/comeau.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/compaq.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/diab.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/digitalmars.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/dignus.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/edg.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/ekopath.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/gcc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/gcc_xml.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/greenhills.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/hp_acc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/iar.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/ibm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/intel.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/kai.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/llvm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/metaware.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/metrowerks.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/microtec.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/mpw.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/palm.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/pgi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/sgi_mipspro.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/sunpro.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/tendra.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/visualc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/compiler/watcom.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/_cassert.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/_exception.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/comp_detected.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/os_detected.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/detail/test.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language/objc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language/stdc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/language/stdcpp.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/_prefix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/gnu.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/uc.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/vms.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/c/zos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/_prefix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/cxx.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/dinkumware.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/libcomo.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/modena.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/msl.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/roguewave.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/sgi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/stdcpp3.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/stlport.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/library/std/vacpp.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/make.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/amigaos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/android.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/beos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/bsdi.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/dragonfly.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/free.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/net.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/bsd/open.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/cygwin.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/hpux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/ios.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/irix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/linux.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/macos.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/os400.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/qnxnto.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/solaris.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/unix.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/vms.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/os/windows.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/other.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/other/endian.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/mingw.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_desktop.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_phone.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_runtime.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/platform/windows_store.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/predef/version_number.h nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/add.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/dec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/detail/div_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/inc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/mod.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/arithmetic/sub.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/array/data.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/array/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/array/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/cat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/comma_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/comparison/less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/config/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/deduce_d.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/detail/while.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/expr_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/expr_iif.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/iif.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/control/while.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/debug/error.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/dec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/detail/auto_rec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/detail/check.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/detail/is_binary.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/intercept.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/facilities/overload.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/identity.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/inc.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/detail/bounds/lower1.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/detail/bounds/upper1.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/detail/iter/forward1.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/iteration/iterate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/adt.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/append.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/detail/fold_left.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/detail/fold_right.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/fold_left.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/fold_right.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/for_each_i.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/reverse.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/list/transform.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/bitand.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/compl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/logical/not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/punctuation/comma.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/punctuation/comma_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/punctuation/paren.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repeat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/detail/for.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum_binary_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/enum_trailing_params.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/for.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/repeat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/repetition/repeat_from_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/cat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/fold_left.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/for_each_i.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/seq.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/seq/transform.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/slot/detail/def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/slot/detail/shared.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/slot/slot.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/stringize.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/eat.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/rem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/tuple/to_list.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/variadic/elem.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/preprocessor/variadic/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/property_map/property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/property_map/vector_property_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/algorithm/equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/begin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/concepts.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/const_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/common.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/extract_optional_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/has_member_size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/implementation_help.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/misc_concept.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/msvc_has_iterator_workaround.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/safe_bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/detail/sfinae.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/difference_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/distance.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/end.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/functions.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/has_range_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/irange.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator_range.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator_range_core.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/iterator_range_io.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/mutable_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/range_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/rbegin.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/rend.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/reverse_iterator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/size.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/size_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/range/value_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/scoped_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/access.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/base_object.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/collection_size_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/force_include.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/is_bitwise_serializable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/level.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/level_enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/nvp.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/pfto.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/serialization.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/split_free.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/split_member.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/strong_typedef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/tracking.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/tracking_enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/type_info_implementation.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/version.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/void_cast_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/serialization/wrapper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/shared_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/bad_weak_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/operator_bool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/shared_count.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_convertible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_base.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_counted_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_has_sync.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/sp_nullptr_t.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/spinlock.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/spinlock_pool.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/spinlock_sync.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/detail/yield_k.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/scoped_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/smart_ptr/shared_ptr.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/static_assert.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/throw_exception.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/tuple/detail/tuple_basic.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/tuple/tuple.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_const.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_cv.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_lvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_rvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/add_volatile.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/aligned_storage.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/alignment_of.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/common_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/conditional.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/config.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/conversion_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/cv_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/decay.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/bool_trait_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/bool_trait_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/cv_traits_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/false_result.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/has_binary_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/has_postfix_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/has_prefix_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_eq.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/ice_or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/is_function_ptr_helper.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/size_t_trait_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/size_t_trait_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/template_arity_spec.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/type_trait_def.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/type_trait_undef.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/detail/yes_no_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/extent.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/floating_point_promotion.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/function_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_and_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_or_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_xor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_bit_xor_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_complement.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_dereference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_divides.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_divides_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_greater.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_greater_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_left_shift.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_left_shift_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_less.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_less_equal.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_logical_and.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_logical_not.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_logical_or.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_minus_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_modulus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_modulus_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_multiplies.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_multiplies_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_negate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_new_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_not_equal_to.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_constructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_copy.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_nothrow_destructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_operator.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_plus_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_post_decrement.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_post_increment.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_pre_decrement.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_pre_increment.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_right_shift.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_right_shift_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_constructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_copy.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_destructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_move_assign.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_trivial_move_constructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_unary_minus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_unary_plus.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/has_virtual_destructor.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/ice.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/integral_constant.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/integral_promotion.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/intrinsics.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_abstract.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_arithmetic.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_array.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_base_and_derived.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_base_of.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_class.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_complex.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_compound.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_const.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_convertible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_copy_constructible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_empty.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_enum.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_float.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_floating_point.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_function.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_fundamental.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_integral.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_lvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_member_function_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_member_object_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_member_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_nothrow_move_assignable.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_nothrow_move_constructible.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_object.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_pod.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_polymorphic.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_rvalue_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_same.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_scalar.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_signed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_stateless.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_union.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_unsigned.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_virtual_base_of.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_void.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/is_volatile.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/make_signed.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/make_unsigned.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/promote.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/rank.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_all_extents.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_bounds.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_const.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_cv.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_extent.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_pointer.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_reference.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/remove_volatile.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/same_traits.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/type_traits/type_with_alignment.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/typeof/message.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/typeof/native.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/typeof/typeof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/allocate.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/buckets.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/equivalent.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/extract_key.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/table.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/unique.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/detail/util.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_map_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_set.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered/unordered_set_fwd.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered_map.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/unordered_set.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/addressof.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/base_from_member.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/binary.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/compare_pointees.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/declval.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/detail/in_place_factory_prefix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/detail/in_place_factory_suffix.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/enable_if.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/identity_type.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/in_place_factory.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/swap.hpp nihh20151229Sergey Koren
-A src/utgcns/libboost/boost/utility/value_init.hpp nihh20151229Sergey Koren
A documentation/source/quick-start.rst nihh20151228Sergey Koren
A documentation/source/quick-start.rst nihh20151228Sergey Koren
A documentation/source/quick-start.rst nihh20151228Sergey Koren
@@ -11216,883 +9388,9 @@ A src/pipelines/canu.pl nihh20151228Sergey Koren
A src/pipelines/canu/Configure.pm nihh20151228Sergey Koren
A src/pipelines/canu/Consensus.pm nihh20151228Sergey Koren
A src/pipelines/canu/Defaults.pm nihh20151228Sergey Koren
-A src/utgcns/libboost/LICENSE nihh20151228Sergey Koren
A src/utgcns/libNDFalcon/LICENSE nihh20151228Sergey Koren
A src/utgcns/libNDFalcon/dw.C nihh20151228Sergey Koren
A src/utgcns/libNDFalcon/dw.H nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/aligned_storage.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/archive/archive_exception.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/archive/detail/abi_prefix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/archive/detail/abi_suffix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/archive/detail/decl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/assert.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/call_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/checked_delete.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/assert.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/detail/backward_compatibility.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/detail/concept_def.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/detail/concept_undef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/detail/general.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/detail/has_constraints.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept/usage.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept_archetype.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/concept_check.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/abi_prefix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/abi_suffix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/compiler/gcc.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/no_tr1/cmath.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/no_tr1/memory.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/no_tr1/utility.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/platform/linux.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/posix_features.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/select_compiler_config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/select_platform_config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/select_stdlib_config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/stdlib/libstdcpp3.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/config/suffix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/addressof.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/checked_delete.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/demangle.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/enable_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/explicit_operator_bool.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/no_exceptions_support.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/noncopyable.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/ref.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/swap.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/core/typeinfo.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/allocator_utilities.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/call_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/container_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/indirect_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/no_exceptions_support.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/reference_content.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/select_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/sp_typeinfo.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/detail/workaround.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/foreach_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/alt_sstream.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/alt_sstream_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/detail/compat_workarounds.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/detail/config_macros.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/detail/msvc_disambiguater.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/detail/unset_macros.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/detail/workarounds_gcc-2_95.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/detail/workarounds_stlport.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/exceptions.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/feed_args.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/format_class.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/format_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/format_implementation.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/free_funcs.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/group.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/internals.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/internals_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/format/parsing.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash/detail/float_functions.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash/detail/hash_float.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash/detail/limits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash/extensions.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash/hash.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash/hash_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/functional/hash_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/adjacency_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/adjacency_list.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/buffer_concepts.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/detail/adj_list_edge_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/detail/adjacency_list.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/detail/edge.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/graph_concepts.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/graph_mutability_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/graph_selectors.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/graph_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/named_graph.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/numeric_values.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/properties.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/property_maps/constant_property_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/graph/property_maps/null_property_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/integer/static_log2.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/integer_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/detail/config_def.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/detail/config_undef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/detail/enable_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/detail/facade_iterator_category.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/interoperable.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/iterator_adaptor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/iterator_categories.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/iterator_concepts.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/iterator_facade.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/iterator_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/iterator/reverse_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/limits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/algorithm.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/core.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/detail/config_begin.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/detail/config_end.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/detail/meta_utils.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/move.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/move/utility.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/O1_size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/O1_size_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/advance.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/advance_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/always.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/and.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/apply.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/apply_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/apply_wrap.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/arg.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/assert.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/at.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/at_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/O1_size_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/adl_barrier.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/advance_backward.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/advance_forward.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/arg_typedef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/arithmetic_op.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/arity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/arity_spec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/at_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/begin_end_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/clear_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/common_name_wknd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/comparison_op.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/adl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/arrays.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/bcc.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/bind.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/compiler.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/ctps.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/dtp.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/eti.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/forwarding.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/gcc.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/gpu.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/has_apply.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/has_xxx.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/integral.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/intel.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/lambda.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/msvc.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/msvc_typename.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/nttp.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/operators.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/overload_resolution.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/pp_counter.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/preprocessor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/static_constant.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/ttp.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/typeof.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/use_preprocessed.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/config/workaround.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/contains_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/empty_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/find_if_pred.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/full_lambda.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/has_apply.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/has_begin.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/has_key_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/has_size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/has_tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/has_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/include_preprocessed.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/inserter_algorithm.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/integral_wrapper.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/is_msvc_eti_arg.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/iter_apply.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/iter_fold_if_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/iter_fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/lambda_arity_param.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/lambda_spec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/lambda_support.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/largest_int.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/msvc_eti_base.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/msvc_never_true.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/msvc_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/na.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/na_assert.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/na_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/na_spec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/nested_type_wknd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/nttp_decl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/numeric_cast_utils.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/numeric_op.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/overload_names.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/advance_backward.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/advance_forward.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/and.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/apply.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/arg.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/bind.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/greater.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/iter_fold_if_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/iter_fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/less.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/minus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/or.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/plus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/quote.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/reverse_fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/reverse_iter_fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessed/gcc/vector.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessor/def_params_tail.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessor/default_params.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessor/enum.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/preprocessor/params.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/ptr_to_ref.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/push_back_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/push_front_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/reverse_fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/reverse_iter_fold_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/size_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/static_cast.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/template_arity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/template_arity_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/traits_lambda_spec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/type_wrapper.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/value_wknd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/aux_/yes_no.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/back_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/back_inserter.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/begin_end.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/begin_end_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/bind_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/bool.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/bool_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/clear.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/clear_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/comparison.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/contains.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/contains_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/deref.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/distance.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/distance_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/empty.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/empty_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/equal_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/erase_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/erase_key_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/eval_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/find.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/fold.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/front_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/front_inserter.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/greater.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/greater_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/has_key.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/has_key_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/has_xxx.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/identity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/insert_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/inserter.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/int.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/integral_c.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/integral_c_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/integral_c_tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/is_sequence.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/iter_fold.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/iter_fold_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/iterator_category.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/iterator_range.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/iterator_tags.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/key_type_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/lambda.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/lambda_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/less.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/less_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/limits/arity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/limits/vector.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/logical.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/long.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/long_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/min_max.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/minus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/negate.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/next.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/next_prior.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/not.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/not_equal_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/numeric_cast.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/or.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/pair_view.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/placeholders.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/plus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/pop_back_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/pop_front_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/prior.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/protect.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/push_back.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/push_back_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/push_front.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/push_front_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/quote.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/reverse_fold.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/reverse_iter_fold.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/same_as.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/sequence_tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/sequence_tag_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/at_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/begin_end_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/clear_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/empty_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/erase_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/erase_key_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/has_key_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/insert_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/item.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/key_type_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/set0.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/size_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/aux_/value_type_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/set/set0.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/size_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/size_t.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/size_t_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/transform.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/value_type_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/O1_size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/at.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/back.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/begin_end.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/clear.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/empty.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/front.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/include_preprocessed.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/item.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/pop_back.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/pop_front.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/preprocessed/typeof_based/vector10.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/preprocessed/typeof_based/vector20.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/push_back.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/push_front.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/aux_/vector0.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/vector0.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/vector10.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/vector/vector20.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/void.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/mpl/void_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/access_specifier.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/adl_swap.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/archive_constructed.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/auto_space.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/base_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/bucket_array.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/converter.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/copy_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/do_not_copy_elements_tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/has_tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/hash_index_args.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/hash_index_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/hash_index_node.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/header_holder.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/index_base.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/index_loader.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/index_matcher.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/index_node_base.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/index_saver.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/is_index_list.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/modify_key_adaptor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/no_duplicate_tags.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/node_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/ord_index_args.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/safe_mode.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/scope_guard.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/serialization_version.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/detail/vartempl_support.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/hashed_index.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/hashed_index_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/identity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/identity_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/indexed_by.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/member.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/ordered_index_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index/tag.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index_container.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/multi_index_container_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/next_prior.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/noncopyable.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/none.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/operators.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/optional/bad_optional_access.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/optional/optional.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/optional/optional_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/pending/container_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/pending/detail/property.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/pending/property.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/pointer_to_other.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/alpha.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/arm.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/blackfin.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/convex.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/ia64.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/m68k.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/mips.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/parisc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/ppc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/pyramid.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/rs6k.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/sparc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/superh.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/sys370.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/sys390.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/x86.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/x86/32.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/x86/64.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/architecture/z.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/borland.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/clang.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/comeau.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/compaq.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/diab.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/digitalmars.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/dignus.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/edg.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/ekopath.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/gcc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/gcc_xml.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/greenhills.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/hp_acc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/iar.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/ibm.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/intel.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/kai.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/llvm.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/metaware.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/metrowerks.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/microtec.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/mpw.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/palm.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/pgi.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/sgi_mipspro.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/sunpro.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/tendra.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/visualc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/compiler/watcom.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/detail/_cassert.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/detail/_exception.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/detail/comp_detected.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/detail/os_detected.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/detail/test.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/language/objc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/language/stdc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/language/stdcpp.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/c.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/c/_prefix.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/c/gnu.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/c/uc.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/c/vms.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/c/zos.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/_prefix.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/cxx.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/dinkumware.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/libcomo.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/modena.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/msl.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/roguewave.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/sgi.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/stdcpp3.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/stlport.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/library/std/vacpp.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/make.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/amigaos.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/android.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/beos.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/bsd.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/bsd/bsdi.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/bsd/dragonfly.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/bsd/free.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/bsd/net.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/bsd/open.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/cygwin.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/hpux.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/ios.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/irix.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/linux.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/macos.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/os400.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/qnxnto.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/solaris.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/unix.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/vms.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/os/windows.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/other.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/other/endian.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/platform.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/platform/mingw.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/platform/windows_desktop.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/platform/windows_phone.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/platform/windows_runtime.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/platform/windows_store.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/predef/version_number.h nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/arithmetic/add.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/arithmetic/dec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/arithmetic/detail/div_base.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/arithmetic/inc.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/arithmetic/mod.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/arithmetic/sub.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/array/data.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/array/elem.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/array/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/cat.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/comma_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/comparison/less_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/config/config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/deduce_d.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/detail/while.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/expr_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/expr_iif.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/iif.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/control/while.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/debug/error.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/dec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/detail/auto_rec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/detail/check.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/detail/is_binary.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/empty.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/facilities/empty.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/facilities/identity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/facilities/intercept.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/facilities/overload.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/identity.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/inc.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/iteration/detail/bounds/lower1.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/iteration/detail/bounds/upper1.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/iteration/detail/iter/forward1.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/iteration/iterate.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/adt.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/append.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/detail/fold_left.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/detail/fold_right.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/fold_left.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/fold_right.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/for_each_i.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/reverse.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/list/transform.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/logical/and.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/logical/bitand.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/logical/bool.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/logical/compl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/logical/not.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/punctuation/comma.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/punctuation/comma_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/punctuation/paren.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repeat.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/detail/for.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/enum.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/enum_binary_params.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/enum_params.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/enum_trailing_params.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/for.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/repeat.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/repetition/repeat_from_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/cat.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/elem.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/enum.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/fold_left.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/for_each_i.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/seq.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/seq/transform.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/slot/detail/def.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/slot/detail/shared.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/slot/slot.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/stringize.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/tuple/eat.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/tuple/elem.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/tuple/rem.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/tuple/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/tuple/to_list.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/variadic/elem.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/preprocessor/variadic/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/property_map/property_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/property_map/vector_property_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/algorithm/equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/begin.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/concepts.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/const_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/common.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/extract_optional_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/has_member_size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/implementation_help.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/misc_concept.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/msvc_has_iterator_workaround.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/safe_bool.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/detail/sfinae.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/difference_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/distance.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/empty.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/end.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/functions.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/has_range_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/irange.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/iterator_range.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/iterator_range_core.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/iterator_range_io.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/mutable_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/range_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/rbegin.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/rend.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/reverse_iterator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/size.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/size_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/range/value_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/scoped_ptr.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/access.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/base_object.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/collection_size_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/force_include.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/is_bitwise_serializable.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/level.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/level_enum.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/nvp.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/pfto.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/serialization.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/split_free.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/split_member.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/strong_typedef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/tracking.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/tracking_enum.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/type_info_implementation.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/version.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/void_cast_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/serialization/wrapper.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/shared_ptr.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/bad_weak_ptr.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/operator_bool.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/shared_count.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/sp_convertible.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/sp_counted_base.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/sp_counted_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/sp_has_sync.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/sp_nullptr_t.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/spinlock.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/spinlock_pool.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/spinlock_sync.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/detail/yield_k.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/scoped_ptr.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/smart_ptr/shared_ptr.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/static_assert.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/swap.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/throw_exception.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/tuple/detail/tuple_basic.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/tuple/tuple.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_const.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_cv.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_lvalue_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_pointer.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_rvalue_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/add_volatile.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/aligned_storage.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/alignment_of.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/common_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/conditional.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/config.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/conversion_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/cv_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/decay.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/bool_trait_def.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/bool_trait_undef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/cv_traits_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/false_result.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/has_binary_operator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/has_postfix_operator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/has_prefix_operator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/ice_and.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/ice_eq.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/ice_not.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/ice_or.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/is_function_ptr_helper.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/size_t_trait_def.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/size_t_trait_undef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/template_arity_spec.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/type_trait_def.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/type_trait_undef.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/detail/yes_no_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/extent.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/floating_point_promotion.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/function_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_bit_and.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_bit_and_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_bit_or.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_bit_or_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_bit_xor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_bit_xor_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_complement.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_dereference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_divides.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_divides_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_equal_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_greater.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_greater_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_left_shift.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_left_shift_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_less.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_less_equal.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_logical_and.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_logical_not.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_logical_or.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_minus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_minus_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_modulus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_modulus_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_multiplies.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_multiplies_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_negate.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_new_operator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_not_equal_to.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_nothrow_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_nothrow_constructor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_nothrow_copy.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_nothrow_destructor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_operator.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_plus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_plus_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_post_decrement.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_post_increment.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_pre_decrement.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_pre_increment.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_right_shift.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_right_shift_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_trivial_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_trivial_constructor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_trivial_copy.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_trivial_destructor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_trivial_move_assign.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_trivial_move_constructor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_unary_minus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_unary_plus.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/has_virtual_destructor.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/ice.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/integral_constant.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/integral_promotion.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/intrinsics.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_abstract.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_arithmetic.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_array.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_base_and_derived.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_base_of.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_class.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_complex.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_compound.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_const.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_convertible.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_copy_constructible.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_empty.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_enum.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_float.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_floating_point.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_function.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_fundamental.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_integral.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_lvalue_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_member_function_pointer.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_member_object_pointer.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_member_pointer.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_nothrow_move_assignable.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_nothrow_move_constructible.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_object.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_pod.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_pointer.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_polymorphic.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_rvalue_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_same.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_scalar.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_signed.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_stateless.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_union.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_unsigned.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_virtual_base_of.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_void.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/is_volatile.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/make_signed.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/make_unsigned.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/promote.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/rank.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_all_extents.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_bounds.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_const.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_cv.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_extent.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_pointer.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_reference.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/remove_volatile.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/same_traits.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/type_traits/type_with_alignment.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/typeof/message.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/typeof/native.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/typeof/typeof.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/allocate.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/buckets.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/equivalent.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/extract_key.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/table.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/unique.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/detail/util.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/unordered_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/unordered_map_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/unordered_set.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered/unordered_set_fwd.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered_map.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/unordered_set.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/addressof.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/base_from_member.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/binary.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/compare_pointees.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/declval.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/detail/in_place_factory_prefix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/detail/in_place_factory_suffix.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/enable_if.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/identity_type.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/in_place_factory.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/swap.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/boost/utility/value_init.hpp nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/copy.sh nihh20151228Sergey Koren
-A src/utgcns/libboost/boost/list nihh20151228Sergey Koren
A src/utgcns/libcns/unitigConsensus.C nihh20151228Sergey Koren
A src/utgcns/libcns/unitigConsensus.H nihh20151228Sergey Koren
A src/utgcns/libpbutgcns/.gitignore nihh20151228Sergey Koren
@@ -12524,3 +9822,420 @@ A src/pipelines/canu/CorrectReads.pm nihh20151119Sergey Koren
A src/pipelines/canu/Defaults.pm nihh20151119Sergey Koren
A src/pipelines/canu/Meryl.pm nihh20151119Sergey Koren
A src/pipelines/canu/Defaults.pm nihh20151119Sergey Koren
+D src/bogart/AS_BAT_PromoteToSingleton.H bogart/AS_BAT_BreakRepeats.H
+D src/stores/tgStoreCompress.C src/stores/tgStore.C
+D src/bogart/AS_BAT_FragmentInfo.H src/bogart/AS_BAT_Datatypes.H
+A src/overlapErrorAdjustment/findErrors-Analyze_Alignment.C nihh20160518Brian P. Walenz
+A src/utgcns/libcns/NOTES nihh20160518Brian P. Walenz
+A src/bogart/AS_BAT_MergeUnitigs.C nihh20160517Brian P. Walenz
+A src/bogart/AS_BAT_MergeUnitigs.H nihh20160517Brian P. Walenz
+A src/bogart/bogart.C nihh20160517Brian P. Walenz
+A src/bogart/bogart.mk nihh20160517Brian P. Walenz
+A src/Makefile nihh20160516Sergey Koren
+A src/correction/errorEstimate.C nihh20160516Sergey Koren
+A src/correction/errorEstimate.mk nihh20160516Sergey Koren
+A src/main.mk nihh20160516Sergey Koren
+A src/pipelines/canu.pl nihh20160516Sergey Koren
+A src/pipelines/canu/Defaults.pm nihh20160516Sergey Koren
+A src/pipelines/canu/ErrorEstimate.pm nihh20160516Sergey Koren
+A src/pipelines/canu/Execution.pm nihh20160516Brian P. Walenz
+A src/pipelines/canu/Grid_LSF.pm nihh20160516Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160516Brian P. Walenz
+A src/pipelines/canu/Configure.pm nihh20160516Brian P. Walenz
+A src/pipelines/canu/OverlapStore.pm nihh20160516Brian P. Walenz
+A src/stores/ovStoreBuild.C nihh20160516Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160516Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160513Brian P. Walenz
+A src/pipelines/canu/Grid_Slurm.pm nihh20160513Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160513Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160513Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160513Brian P. Walenz
+A src/bogart/bogart.C nihh20160513Brian P. Walenz
+A src/bogart/bogart.mk nihh20160513Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160513Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.H nihh20160513Brian P. Walenz
+A src/bogart/bogart.C nihh20160513Brian P. Walenz
+A src/bogart/buildGraph.C nihh20160513Brian P. Walenz
+A src/pipelines/canu/Output.pm nihh20160513Brian P. Walenz
+A documentation/source/faq.rst nihh20160512Sergey Koren
+A src/bogart/AS_BAT_Unitig.H nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.H nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.H nihh20160510Brian P. Walenz
+A src/pipelines/canu/Unitig.pm nihh20160510Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160509Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.H nihh20160509Brian P. Walenz
+A src/pipelines/canu/OverlapStore.pm nihh20160509Brian P. Walenz
+A src/pipelines/canu/OverlapMMap.pm nihh20160509Brian P. Walenz
+A src/pipelines/canu/OverlapMhap.pm nihh20160509Brian P. Walenz
+A documentation/source/parameter-reference.rst nihh20160506Sergey Koren
+A documentation/source/conf.py nihh20160506Sergey Koren
+A documentation/source/faq.rst nihh20160506Sergey Koren
+A documentation/source/index.rst nihh20160506Sergey Koren
+A documentation/source/parameter-reference.rst nihh20160506Sergey Koren
+A documentation/source/commands/bogart.rst nihh20160506Sergey Koren
+A documentation/source/commands/gatekeeperDumpFASTQ.rst nihh20160506Sergey Koren
+A documentation/source/commands/ovStoreDump.rst nihh20160506Sergey Koren
+A documentation/source/index.rst nihh20160506Sergey Koren
+A documentation/source/parameter-reference.rst nihh20160506Sergey Koren
+A documentation/source/tutorial.rst nihh20160506Sergey Koren
+A documentation/source/quick-start.rst nihh20160506Sergey Koren
+A src/pipelines/canu/CorrectReads.pm nihh20160504Brian P. Walenz
+A src/pipelines/canu/Defaults.pm nihh20160504Brian P. Walenz
+A src/pipelines/canu/OverlapMhap.pm nihh20160504Brian P. Walenz
+A src/stores/tgStoreCoverageStat.C nihh20160504Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160504Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160504Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160504Brian P. Walenz
+A src/stores/tgStoreCoverageStat.C nihh20160503Brian P. Walenz
+A src/pipelines/canu.pl nihh20160502Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps-Correct_Frags.C nihh20160502Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps-Prefix_Edit_Distance.C nihh20160502Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps-Read_Olaps.C nihh20160502Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps-Redo_Olaps.C nihh20160502Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps.C nihh20160502Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps.H nihh20160502Brian P. Walenz
+A src/pipelines/canu/Configure.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/Configure.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapInCore.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapMMap.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapMhap.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapStore.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapMMap.pm nihh20160502Brian P. Walenz
+A src/pipelines/canu/OverlapMhap.pm nihh20160502Brian P. Walenz
+A README.md nihh20160428Brian P. Walenz
+A README.md nihh20160428Brian P. Walenz
+A src/stores/ovStoreDump.C nihh20160428Brian P. Walenz
+A src/stores/ovStoreDump.C nihh20160428Brian P. Walenz
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160428Brian P. Walenz
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160428Brian P. Walenz
+A src/overlapErrorAdjustment/correctOverlaps.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.H nihh20160428Brian P. Walenz
+A src/bogart/bogart.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.H nihh20160428Brian P. Walenz
+A addCopyrights.dat nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_ChunkGraph.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_ChunkGraph.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_FragmentInfo.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_FragmentInfo.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Joining.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PopulateUnitig.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_PromoteToSingleton.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_ReconstructRepeats.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_SetParentAndHang.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_SetParentAndHang.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_SplitDiscontinuous.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_UnitigVector.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_UnitigVector.H nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Unitig_AddFrag.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C nihh20160428Brian P. Walenz
+A src/bogart/bogart.C nihh20160428Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160427Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160427Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.H nihh20160427Brian P. Walenz
+A src/bogart/AS_BAT_SetParentAndHang.C nihh20160427Brian P. Walenz
+A src/bogart/bogart.C nihh20160427Brian P. Walenz
+A src/pipelines/canu/OverlapStore.pm nihh20160427Sergey Koren
+A src/AS_UTL/AS_UTL_stackTrace.C nihh20160426Sergey Koren
+A src/Makefile nihh20160426Sergey Koren
+A src/bogart/AS_BAT_OverlapCache.C nihh20160426Sergey Koren
+A src/falcon_sense/falcon_sense.C nihh20160426Sergey Koren
+A src/pipelines/canu/Defaults.pm nihh20160426Sergey Koren
+A src/pipelines/canu/OverlapMhap.pm nihh20160426Sergey Koren
+A src/utgcns/utgcns.C nihh20160426Sergey Koren
+A documentation/source/quick-start.rst nihh20160426Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160413Brian P. Walenz
+A src/AS_global.H nihh20160426Brian P. Walenz
+A src/pipelines/canu.pl nihh20160426Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160426Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160426Brian P. Walenz
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160426Sergey Koren
+A documentation/source/parameter-reference.rst nihh20160426Brian P. Walenz
+A src/pipelines/canu/Defaults.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Defaults.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Configure.pm nihh20160426Brian P. Walenz
+A src/stores/gkStore.C nihh20160426Brian P. Walenz
+A src/pipelines/canu/Defaults.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Grid_LSF.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Grid_PBSTorque.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Grid_SGE.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/Grid_Slurm.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu/OverlapBasedTrimming.pm nihh20160426Brian P. Walenz
+A src/pipelines/canu.pl nihh20160426Brian P. Walenz
+A src/stores/gatekeeperDumpFASTQ.C nihh20160426Brian P. Walenz
+A documentation/source/quick-start.rst nihh20160425Brian P. Walenz
+A src/pipelines/canu/Defaults.pm nihh20160422Sergey Koren
+A src/pipelines/canu/Unitig.pm nihh20160422Sergey Koren
+A src/pipelines/canu/CorrectReads.pm nihh20160422Sergey Koren
+A src/pipelines/canu/Defaults.pm nihh20160422Sergey Koren
+A src/pipelines/canu/OverlapMMap.pm nihh20160422Sergey Koren
+A src/pipelines/canu/OverlapMhap.pm nihh20160422Sergey Koren
+A src/pipelines/canu.pl nihh20160422Sergey Koren
+A src/pipelines/canu/Defaults.pm nihh20160422Sergey Koren
+A src/pipelines/canu/Unitig.pm nihh20160422Sergey Koren
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.H nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.H nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.H nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_ReconstructRepeats.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_UnitigVector.C nihh20160422Brian P. Walenz
+A src/bogart/AS_BAT_UnitigVector.H nihh20160422Brian P. Walenz
+A src/bogart/bogart.C nihh20160422Brian P. Walenz
+A src/pipelines/canu/OverlapStore.pm nihh20160422Sergey Koren
+A src/pipelines/canu/HTML.pm nihh20160420Brian P. Walenz
+A src/pipelines/canu/Execution.pm nihh20160420Sergey Koren
+A src/overlapInCore/overlapPair.C nihh20160420Sergey Koren
+A src/stores/ovStoreStats.C nihh20160420Sergey Koren
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160420Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160419Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160419Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160419Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_Logging.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_Logging.H nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_PopulateUnitig.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_SetParentAndHang.C nihh20160418Brian P. Walenz
+A src/bogart/bogart.C nihh20160418Brian P. Walenz
+A src/stores/ovStoreDump.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160418Brian P. Walenz
+A addCopyrights.dat nihh20160418Brian P. Walenz
+A src/main.mk nihh20160418Brian P. Walenz
+A src/stores/tgStoreCompress.C nihh20160418Brian P. Walenz
+A src/stores/tgStoreCompress.mk nihh20160418Brian P. Walenz
+A src/pipelines/canu/CorrectReads.txt nihh20160418Brian P. Walenz
+A src/pipelines/canu/Execution.txt nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160418Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.txt nihh20160418Brian P. Walenz
+A src/AS_UTL/stddevTest.C nihh20160418Brian P. Walenz
+A documentation/source/quick-start.rst nihh20160415Sergey Koren
+A src/pipelines/canu/Unitig.pm nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160413Brian P. Walenz
+A addCopyrights.dat nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.H nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_PromoteToSingleton.H nihh20160413Brian P. Walenz
+A src/bogart/bogart.C nihh20160413Brian P. Walenz
+A src/bogart/bogart.mk nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_Datatypes.H nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.C nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_SetParentAndHang.C nihh20160413Brian P. Walenz
+A src/bogart/bogart.mk nihh20160413Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160412Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160412Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160412Brian P. Walenz
+A src/bogart/bogart.C nihh20160412Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160412Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160411Sergey Koren
+A src/stores/tgStoreLoad.C nihh20160411Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160408Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160408Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160408Brian P. Walenz
+A src/bogart/bogart.C nihh20160408Brian P. Walenz
+A src/pipelines/canu/Configure.pm nihh20160408Sergey Koren
+A src/pipelines/canu/OverlapMMap.pm nihh20160408Sergey Koren
+A src/pipelines/canu/Execution.pm nihh20160408Sergey Koren
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160408Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160407Brian P. Walenz
+A src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C nihh20160407Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160407Brian P. Walenz
+A src/bogart/AS_BAT_Datatypes.H nihh20160407Brian P. Walenz
+A src/Makefile nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_Logging.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_Logging.H nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.H nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.H nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_ReconstructRepeats.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_UnitigVector.C nihh20160406Brian P. Walenz
+A src/bogart/AS_BAT_UnitigVector.H nihh20160406Brian P. Walenz
+A src/bogart/bogart.C nihh20160406Brian P. Walenz
+A src/bogart/bogart.mk nihh20160406Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160406Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160331Sergey Koren
+A src/stores/ovStoreStats.C nihh20160331Sergey Koren
+A src/pipelines/canu/Defaults.pm nihh20160330Sergey Koren
+A src/pipelines/canu.pl nihh20160330Sergey Koren
+A src/bogart/AS_BAT_Outputs.C nihh20160330Sergey Koren
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160330Sergey Koren
+A src/overlapErrorAdjustment/correctOverlaps.C nihh20160330Sergey Koren
+A src/pipelines/canu/Configure.pm nihh20160330Sergey Koren
+A src/AS_UTL/stddev.H nihh20160330Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160330Brian P. Walenz
+A src/main.mk nihh20160330Brian P. Walenz
+A src/pipelines/canu/Unitig.pm nihh20160329Sergey Koren
+A src/overlapInCore/overlapInCore-Process_String_Overlaps.C nihh20160323Sergey Koren
+A src/overlapInCore/overlapInCore.C nihh20160323Sergey Koren
+A src/overlapInCore/overlapInCore.H nihh20160323Sergey Koren
+A src/pipelines/canu/OverlapMhap.pm nihh20160327Sergey Koren
+A src/pipelines/canu.pl nihh20160327Sergey Koren
+A src/pipelines/canu/Configure.pm nihh20160327Sergey Koren
+A src/pipelines/canu/OverlapMhap.pm nihh20160327Sergey Koren
+A src/pipelines/canu/OverlapErrorAdjustment.pm nihh20160327Sergey Koren
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_IntersectBubble.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PopulateUnitig.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PromoteToSingleton.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.H nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_ReconstructRepeats.C nihh20160324Brian P. Walenz
+A src/bogart/bogart.C nihh20160324Brian P. Walenz
+A src/bogart/AS_BAT_ChunkGraph.C nihh20160324Brian P. Walenz
+A src/falcon_sense/falcon_sense.C nihh20160324Sergey Koren
+A src/falcon_sense/falcon_sense.C nihh20160324Sergey Koren
+A src/falcon_sense/libfalcon/falcon.C nihh20160324Sergey Koren
+A src/falcon_sense/libfalcon/falcon.H nihh20160324Sergey Koren
+A src/pipelines/canu/CorrectReads.pm nihh20160324Sergey Koren
+A src/pipelines/canu/Gatekeeper.pm nihh20160324Sergey Koren
+A src/pipelines/canu/OverlapStore.pm nihh20160323Sergey Koren
+A src/falcon_sense/libfalcon/falcon.C nihh20160323Sergey Koren
+A src/overlapInCore/overlapInCore-Process_String_Overlaps.C nihh20160323Sergey Koren
+A src/overlapInCore/overlapInCore.C nihh20160323Sergey Koren
+A src/overlapInCore/overlapInCore.H nihh20160323Sergey Koren
+A src/overlapErrorAdjustment/findErrors-Analyze_Alignment.C nihh20160322Sergey Koren
+A src/falcon_sense/libfalcon/falcon.C nihh20160322Sergey Koren
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160322Sergey Koren
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_Datatypes.H nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_PlaceContains.H nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_PlaceFragUsingOverlaps.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_PopulateUnitig.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_ReconstructRepeats.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_SplitDiscontinuous.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_Unitig_AddFrag.C nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C nihh20160322Brian P. Walenz
+A src/bogart/bogart.C nihh20160322Brian P. Walenz
+A src/bogart/bogart.mk nihh20160322Brian P. Walenz
+A src/bogart/AS_BAT_Datatypes.H nihh20160321Brian P. Walenz
+A src/bogart/AS_BAT_FragmentInfo.C nihh20160321Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160321Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160321Brian P. Walenz
+A src/bogart/AS_BAT_Outputs.C nihh20160317Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160317Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160317Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160317Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160317Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160316Brian P. Walenz
+A src/bogart/bogart.C nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_Datatypes.H nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.C nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.H nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_PopulateUnitig.C nihh20160316Brian P. Walenz
+A src/bogart/AS_BAT_Logging.C nihh20160316Brian P. Walenz
+A src/AS_UTL/memoryMappedFile.H nihh20160316Brian P. Walenz
+A src/pipelines/canu/OverlapMhap.pm nihh20160315Sergey Koren
+A src/bogart/bogart.C nihh20160315Sergey Koren
+A src/pipelines/canu/Unitig.pm nihh20160315Sergey Koren
+A src/AS_UTL/memoryMappedFile.H nihh20160315Sergey Koren
+A src/pipelines/canu.pl nihh20160315Sergey Koren
+A src/AS_UTL/memoryMappedFile.H nihh20160313Brian P. Walenz
+A src/stores/ovOverlap.C nihh20160311Sergey Koren
+A src/stores/ovStore.H nihh20160311Sergey Koren
+A src/stores/ovStoreDump.C nihh20160311Sergey Koren
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160311Sergey Koren
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160311Sergey Koren
+A src/bogart/AS_BAT_MergeSplitJoin.C nihh20160311Sergey Koren
+A src/bogart/AS_BAT_MergeSplitJoin.H nihh20160311Sergey Koren
+A src/bogart/bogart.C nihh20160311Sergey Koren
+A src/bogart/bogart.mk nihh20160311Sergey Koren
+A src/stores/ovOverlap.C nihh20160311Sergey Koren
+A src/stores/ovStore.H nihh20160311Sergey Koren
+A src/stores/ovStoreDump.C nihh20160311Sergey Koren
+A src/pipelines/canu.pl nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.C nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_BestOverlapGraph.H nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_BreakRepeats.C nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_BreakRepeats.H nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.C nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_MarkRepeatReads.H nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.C nihh20160311Brian P. Walenz
+A src/bogart/AS_BAT_PopBubbles.H nihh20160311Brian P. Walenz
+A src/bogart/bogart.C nihh20160311Brian P. Walenz
+A src/bogart/bogart.mk nihh20160311Brian P. Walenz
+A src/pipelines/bogart-sweep.pl nihh20160310Brian P. Walenz
+A src/AS_UTL/intervalListTest.C nihh20160310Brian P. Walenz
+A src/AS_UTL/stddev.H nihh20160310Brian P. Walenz
+A src/AS_UTL/stddevTest.C nihh20160310Brian P. Walenz
+A src/bogart/AS_BAT_PopulateUnitig.C nihh20160310Brian P. Walenz
+A src/bogart/AS_BAT_OverlapCache.C nihh20160310Brian P. Walenz
+A src/bogart/AS_BAT_Unitig.H nihh20160310Brian P. Walenz
+A src/bogart/bogart.C nihh20160310Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160310Brian P. Walenz
+A src/bogart/AS_BAT_Instrumentation.C nihh20160310Brian P. Walenz
+A src/stores/ovStore.H nihh20160310Brian P. Walenz
+A src/AS_UTL/intervalList.H nihh20160310Brian P. Walenz
+A src/AS_UTL/intervalList.H nihh20160310Brian P. Walenz
+A src/AS_UTL/memoryMappedFile.H nihh20160310Brian P. Walenz
+A documentation/source/quick-start.rst nihh20160309Sergey Koren
+A src/pipelines/canu/CorrectReads.pm nihh20160309Sergey Koren
+A documentation/source/quick-start.rst nihh20160307Sergey Koren
diff --git a/addCopyrights.pl b/addCopyrights.pl
index bdd9979..bf6778b 100644
--- a/addCopyrights.pl
+++ b/addCopyrights.pl
@@ -217,7 +217,7 @@ my %derived;
# Process each file.
open(FIN, "find kmer src -type f -print |") or die "Failed to launch 'find'\n";
-open(OUT, "> addCopyrights.dat.new") or die "Failed to open 'addCopyrights.dat.new' for writing: $!\n";
+#open(OUT, "> addCopyrights.dat.new") or die "Failed to open 'addCopyrights.dat.new' for writing: $!\n";
while (<FIN>) {
chomp;
diff --git a/documentation/source/commands/bogart.rst b/documentation/source/commands/bogart.rst
index 774bb88..981eeb6 100644
--- a/documentation/source/commands/bogart.rst
+++ b/documentation/source/commands/bogart.rst
@@ -16,11 +16,8 @@ bogart
-gs Genome size in bases.
- -RS Remove edges to spur reads from best overlap graph.
- -NS Don't seed promiscuous unitigs with suspicious reads.
- -CS Don't place contained reads in singleton unitigs.
- -RW t Remove weak overlaps, those in the lower t fraction of erates per overlap end.
-J Join promiscuous unitigs using unused best edges.
+
-SR Shatter repeats, don't rebuild.
-R Shatter repeats (-SR), then rebuild them
-RL len Force reads below 'len' bases to be singletons.
@@ -34,23 +31,14 @@ bogart
the following conditions:
When constructing the Best Overlap Graph and Promiscuous Unitigs ('g'raph):
- -eg 0.020 no more than 0.020 fraction (2.0%) error
-
- When popping bubbles ('b'ubbles):
- -eb 0.045 no more than 0.045 fraction (4.5%) error when bubble popping
-
- When merging unitig ends ('m'erging):
- -em 0.045 no more than 0.045 fraction (4.5%) error when merging unitig ends
-
- When detecting repeats ('r'epeats):
- -er 0.045 no more than 0.045 fraction (4.5%) error when detecting repeats
+ -eg 0.020 no more than 0.020 fraction (2.0%) error ** DEPRECATED **
When loading overlaps, an inflated maximum (to allow reruns with different error rates):
-eM 0.05 no more than 0.05 fraction (5.0%) error in any overlap loaded into bogart
the maximum used will ALWAYS be at leeast the maximum of the four error rates
For all, the lower limit on overlap length
- -el 40 no shorter than 40 bases
+ -el 500 no shorter than 40 bases
Overlap Storage
@@ -64,19 +52,13 @@ bogart
-D <name> enable logging/debugging for a specific component.
-d <name> disable logging/debugging for a specific component.
- overlapQuality
- overlapsUsed
+ overlapScoring
+ allBestEdges
chunkGraph
- intersections
- populate
- intersectionBreaking
- intersectionBubbles
- intersectionBubblesDebug
- intersectionJoining
- intersectionJoiningDebug
+ buildUnitig
+ placeUnplaced
+ bubbles
splitDiscontinuous
- containedPlacement
- happiness
intermediateUnitigs
setParentAndHang
stderr
diff --git a/documentation/source/commands/gatekeeperDumpFASTQ.rst b/documentation/source/commands/gatekeeperDumpFASTQ.rst
index a31fea5..d58314c 100644
--- a/documentation/source/commands/gatekeeperDumpFASTQ.rst
+++ b/documentation/source/commands/gatekeeperDumpFASTQ.rst
@@ -7,6 +7,7 @@ gatekeeperDumpFASTQ
-G gkpStore
-o fastq-prefix write files fastq-prefix.(libname).fastq, ...
if fastq-prefix is '-', all sequences output to stdout
+ if fastq-prefix ends in .gz, .bz2 or .xz, output is compressed
-l libToDump output only read in library number libToDump (NOT IMPLEMENTED)
-r id[-id] output only the single read 'id', or the specified range of ids
diff --git a/documentation/source/commands/ovStoreDump.rst b/documentation/source/commands/ovStoreDump.rst
index 9ea6069..2c61930 100644
--- a/documentation/source/commands/ovStoreDump.rst
+++ b/documentation/source/commands/ovStoreDump.rst
@@ -3,10 +3,10 @@ ovStoreDump
::
- usage: ovStoreDump -G gkpStore -O ovlStore [-b bgnID] [-e endID] ...
+ usage: ovStoreDump -G gkpStore -O ovlStore ...
There are three modes of operation:
- -d dump a store (range selected with -b and -e)
+ -d [a[-b]] dump overlaps for reads a to b, inclusive
-q a b report the a,b overlap, if it exists.
-p a dump a picture of overlaps to fragment 'a'.
@@ -15,6 +15,7 @@ ovStoreDump
-coords dump overlap showing coordinates in the reads (default)
-hangs dump overlap showing dovetail hangs unaligned
-raw dump overlap showing its raw native format (four hangs)
+ -paf dump overlaps in miniasm/minimap format
-binary dump overlap as raw binary data
-counts dump the number of overlaps per read
@@ -27,6 +28,11 @@ ovStoreDump
-dC Dump only overlaps that are contained in the A frag (B contained in A).
-dc Dump only overlaps that are containing the A frag (A contained in B).
-v Report statistics (to stderr) on some dumps (-d).
+ -unique Report only overlaps where A id is < B id, do not report both A to B and B to A overlap
+
+ -best prefix Annotate picture with status from bogart outputs prefix.edges, prefix.singletons, prefix.edges.suspicious
+ -noc With -best data, don't show overlaps to contained reads.
+ -nos With -best data, don't show overlaps to suspicious reads.
ERROR: no operation (-d, -q or -p) supplied.
ERROR: no input gkpStore (-G) supplied.
diff --git a/documentation/source/conf.py b/documentation/source/conf.py
index 25c0ed3..25ceed3 100644
--- a/documentation/source/conf.py
+++ b/documentation/source/conf.py
@@ -55,9 +55,9 @@ copyright = u'2015, Adam Phillippy, Sergey Koren, Brian Walenz'
# built documents.
#
# The short X.Y version.
-version = '1.1'
+version = '1.3'
# The full version, including alpha/beta/rc tags.
-release = '1.1'
+release = '1.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/documentation/source/faq.rst b/documentation/source/faq.rst
new file mode 100644
index 0000000..c65b23e
--- /dev/null
+++ b/documentation/source/faq.rst
@@ -0,0 +1,124 @@
+
+.. _faq:
+
+Canu FAQ
+========================
+
+**Q**:
+ What resources does Canu require for a bacterial genome assembly? A mammalian assembly?
+
+**A**:
+ Canu is designed to scale resources to the system it runs on. It will report if the a system does not meet the minimum requirements for a given genome size.
+
+ Typically, a bacterial genome can be assembled in 1-10 cpu hours, depending on coverage (~20 min on 16-cores) and 4GB of ram (8GB is recommended). A mammalian genome (such as human) can be assembled in 10-25K cpu hours, depending on coverage (a grid environment is recommended) and at least one machine with 64GB of ram (128GB is recommended).
+
+**Q**:
+ What parameters should I use for my genome? Sequencing type?
+
+**A**:
+ By default, Canu is designed to be universal on a large range of PacBio (C2-P6-C4) and Oxford Nanopore (R6-R9) data. You can adjust parameters to increase efficiency for your datatype. For example, for higher coverage PacBio datasets, especially from inbred samples, you can decrease the error rate (``errorRate=0.013``). For recent Nanopore data (R9) 2D data, you can also decrease the default error rate (``errorRate=0.013``).
+
+ With R7 1D sequencing data, multiple rounds of error correction are helpful. This should not be necessary for sequences over 85% identity. You can run just the correction from Canu with the options
+
+ ::
+
+ -correct corOutCoverage=500 corMinCoverage=0 corMhapSensitivity=high
+
+ for 5-10 rounds, supplying the asm.correctedReads.fasta.gz output from round ``i-1`` to round ``i``. Assemble with
+
+ ::
+
+ -nanopore-corrected <your data> errorRate=0.1 utgGraphDeviation=50
+
+**Q**:
+ How do I run Canu on my SLURM/SGE/PBS/LSF/Torque system?
+
+**A**:
+ Canu will auto-detect and configure itself to submit on most grids. If your grid requires special options (such as a partition on SLURM or an account code on SGE, specify it with ``gridOptions="<your options list>"`` which will passed to the sheduler by Canu. If you have a grid system but prefer to run locally, specify useGrid=false
+
+**Q**:
+ My asm.contigs.fasta is empty, why?
+
+**A**:
+ By default, canu will split the final output into three files:
+
+ asm.contigs.fasta
+ Everything which could be assembled and is part of the primary assembly, including both unique and repetitive elements. Each contig has several flags included on the fasta def line::
+
+ asm.bubbles.fasta
+ alternate paths in the graph which could not be merged into the primary assembly.
+
+ asm.unassembled.fasta
+ reads/tigs which could not be incorporated into the primary or bubble assemblies.
+
+ It is possible for tigs comprised of multiple reads to end up in asm.unassembled.fasta. The default filtering eliminates anything with < 2 reads, shorter than 1000bp, or comprised of mostly a single sequence (>75%). The filtering is controlled by the contigFilter parameter which takes 5 values.
+
+ ::
+
+ contigFilter
+ minReads
+ minLength
+ singleReadSpan
+ lowCovSpan
+ lowCovDepth
+
+ The default filtering is ``2 1000 0.75 0.75 2``. If you are assembling amplified data or viral data, it is possible your assembly will be flagged as unassembled. In those cases, you can turn off the filtering with the parameters
+
+ ::
+
+ contigFilter="2 1000 1.0 1.0 2"
+
+**Q**:
+ Why is my assembly is missing my favorite short plasmid X?
+
+**A**:
+ The first step in Canu is to find high-error overlaps and generate corrected sequences for subsequent assembly. This is currently the fastest step in Canu. By default, only the longest 40X of data (based on the specified genome size) is used for correction. If you have a dataset with uneven coverage or small plasmids, correcting the longest 40X may not give you sufficient coverage of your genome/plasmid. In these cases, you can set
+
+ ::
+
+ corOutCoverage=1000
+
+ Or any large value greater than your total input coverage which will correct and assemble all input data, at the expense of runtime. This option is also recommended for metagenomic datasets where all data is useful for assembly.
+
+**Q**:
+ Why do I get only 30X of corrected data?
+
+**A**:
+ By default, only the longest 40X of data (based on the specified genome size) is used for correction. Typically, some reads are trimmed during correction due to being chimeric or having erroneous sequence, resulting in a loss of 20-25% (30X output). You can force correction to be non-lossy by setting
+
+ ::
+
+ corMinCoverage=0
+
+ In which case the corrected reads output will be the same length as the input data, keeping any high-error unsupported bases. Canu will trim these in downstream steps before assembly.
+
+**Q**:
+ What is the minimum coverage required to run Canu?
+
+**A**:
+ We have found that on eukaryotic genomes >=20X typically begins to outperform current hybrid methods. For low coverage datasets (<=30X) we recommend the following parameters
+
+ ::
+
+ corMinCoverage=0 errorRate=0.035
+
+ For high-coverage datasets (typically >=60X) you can decrease the error rate since the higher number of reads should allow sufficient assembly from only the best subset
+
+ ::
+
+ errorRate=0.013
+
+ However, the above is mainly an optimization for speed and will not affect your assembly continuity.
+
+
+**Q**:
+ My genome is AT/GC rich, do I need to adjust parameters?
+
+**A**:
+ On bacterial genomes, typically no. On repetitive genomes with AT<=25 or 75>=AT (or GC) the sequence biases the Jaccard estimate used by MHAP. In those cases setting
+
+ ::
+
+ corMaxEvidenceErate=0.15
+
+ has been sufficient to correct for the bias in our testing. In general, with high coverage repetitive genomes (such as plants) it can be beneficial to set the above parameter as it will eliminate repetitive matches, speed up the assembly, and sometime improve unitigs.
diff --git a/documentation/source/index.rst b/documentation/source/index.rst
index a726961..4fb5474 100644
--- a/documentation/source/index.rst
+++ b/documentation/source/index.rst
@@ -34,6 +34,7 @@ Canu
:hidden:
quick-start
+ faq
tutorial
pipeline
parameter-reference
@@ -46,6 +47,7 @@ the PacBio RSII or Oxford Nanopore MinION). You can `download <http://github.com
any issues, please report them using the `github issues <http://github.com/marbl/canu/issues>`_ page.
* :ref:`Quick Start <quickstart>` - no experience or data required, download and assemble *Escherichia coli* today!
+* :ref:`FAQ <faq>` - Frequently asked questions
* :ref:`Canu tutorial <tutorial>` - a gentle introduction to the complexities of canu.
* :ref:`Canu pipeline <pipeline>` - what, exactly, is canu doing, anyway?
diff --git a/documentation/source/parameter-reference.rst b/documentation/source/parameter-reference.rst
index 856b573..e9de816 100644
--- a/documentation/source/parameter-reference.rst
+++ b/documentation/source/parameter-reference.rst
@@ -4,8 +4,11 @@
Canu Parameter Reference
========================
-**WARNING**: The default values aren't correct.
+To get the most up-to-date options, run
+ canu -options
+
+The default values below will vary based on the input data type and genome size.
.. _genomeSize:
@@ -15,10 +18,10 @@ Global Options
The catch all category.
errorRate <float=0.01>
- The expected error rate, as fraction error, in the corrected reads.
+ The expected error rate, as fraction error, in the corrected reads, set by default based on data type, typically not changed by the user.
genomeSize <float=unset>
- An estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g.
+ An estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g. Required.
canuIteration <internal parameter, do not use>
Which parallel iteration is being attempted.
@@ -257,35 +260,12 @@ intensive stages can run under grid control.
The useGrid* options control which algorithms run in parallel on the grid.
-useGrid <boolean=false>
- Master control. If false, no algorithms will run in parallel. Does not change the value of the other useGrid options.
-
-useGridcns <boolean=true>
- Use grid engine for unitig consensus computes
-useGridcor <boolean=true>
- Use grid engine for read correction computes
-useGridcormhap <boolean=true>
- Use grid engine for mhap overlaps for correction computes
-useGridcorovl <boolean=true>
- Use grid engine for overlaps for correction computes
-useGridmaster <boolean=true>
- Use grid engine for master script computes
-useGridobtmhap <boolean=true>
- Use grid engine for mhap overlaps for trimming computes
-useGridobtovl <boolean=true>
- Use grid engine for overlaps for trimming computes
-useGridoea <boolean=true>
- Use grid engine for overlap error adjustment computes
-useGridovb <boolean=true>
- Use grid engine for overlap store bucketizing computes
-useGridovs <boolean=true>
- Use grid engine for overlap store sorting computes
-useGridred <boolean=true>
- Use grid engine for read error detection computes
-useGridutgmhap <boolean=true>
- Use grid engine for mhap overlaps for unitig construction computes
-useGridutgovl <boolean=true>
- Use grid engine for overlaps for unitig construction computes
+useGrid <boolean=true>
+ Master control. If 'false', no algorithms will run under grid control. Does not change the value of the other useGrid options.
+
+ If 'remote', jobs are configured for grid execution, but not submitted. A message, with commands to launch the job, is reported and canu halts execution.
+
+ Note that the host used to run canu for 'remote' execution must know about the grid, that is, it must be able to submit jobs to the grid.
There are many options for configuring a new grid ('gridEngine*') and for configuring how canu
configures its computes to run under grid control ('gridOptions*'). The grid engine to use is
@@ -354,7 +334,7 @@ gridOptionsCNS <string=unset>
Grid submission command options applied to unitig consensus jobs
gridOptionsCOR <string=unset>
Grid submission command options applied to read correction jobs
-gridOptionsMaster <string=unset>
+gridOptionsExecutive <string=unset>
Grid submission command options applied to master script jobs
gridOptionsOEA <string=unset>
Grid submission command options applied to overlap error adjustment jobs
@@ -494,25 +474,6 @@ unitigger <string="bogart">
batOptions <unset>
Advanced options to bogart
-utgBubbleErrorRate <float=unset>
- Overlaps at or below this error rate are used to construct unitigs. For the bogart algorithm.
-utgGraphErrorRate <float=unset>
- Overlaps at or below this error rate are used to construct unitigs. For the bogart algorithm.
-utgMergeErrorRate <float=unset>
- Overlaps at or below this error rate are used to construct unitigs. For the bogart algorithm.
-utgRepeatErrorRate <float=unset>
- Overlaps at or below this error rate are used to construct unitigs. For the bogart algorithm.
-
-
-Unitig labeling
-------------------
-
-maxSingleReadSpan
-lowCoverageDepth
-lowCoverageAllowed
-minReadsUnique
-maxRepeatLength
-
Consensus Partitioning
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -537,6 +498,22 @@ cnsErrorRate
Read Correction
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The first step in Canu is to find high-error overlaps and generate corrected sequences for subsequent assembly. This is currently the fastest step in Canu. By default, only the longest 40X of data (based on the specified genome size) is used for correction. Typically, some reads are trimmed during correction due to being chimeric or having erroneous sequence, resulting in a loss of 20-25% (30X output). You can force correction to be non-lossy by setting
+
+::
+
+ corMinCoverage=0
+
+In which case the corrected reads output will be the same length as the input data, keeping any high-error unsupported bases. Canu will trim these in downstream steps before assembly.
+
+If you have a dataset with uneven coverage or small plasmids, correcting the longest 40X may not give you sufficient coverage of your genome/plasmid. In these cases, you can set
+
+::
+
+ corOutCoverage=400
+
+Or any large value greater than your total input coverage which will correct and assemble all input data, at the expense of runtime.
+
corConsensus <string="falconpipe">
Which algorithm to use for computing read consensus sequences. Only 'falcon' and 'falconpipe' are supported.
@@ -569,19 +546,30 @@ falconSense
Output Filtering
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-lowCoverageAllowed <unset>
- Unitigs with more than fraction lowCoverageAllowed bases at depth at most lowCoverageDepth bases are never labeled unique
-lowCoverageDepth <unset>
- Unitigs with more than fraction lowCoverageAllowed bases at depth at most lowCoverageDepth bases are never labeled unique
-maxRepeatLength <unset>
- Unitigs longer than this are always labeled unique
-maxSingleReadSpan <unset>
- Unitigs with a single read spanning more than this fraction of the unitig are never labeled unique
-minOverlapLength <unset>
- Overlaps shorter than this length are not computed
-minReadLength <unset>
- Reads shorter than this length are not loaded into the assembler
-minReadsUnique <unset>
- Unitigs with fewer reads that this are never labeled unique
-minUniqueLength <unset>
- Unitigs shorter than this are always labeled non-unique
+By default, canu will split the final output into three files:
+
+asm.contigs.fasta
+ Everything which could be assembled and is part of the primary assembly, including both unique and repetitive elements. Each contig has several flags included on the fasta def line::
+
+asm.bubbles.fasta
+ alternate paths in the graph which could not be merged into the primary assembly.
+
+asm.unassembled.fasta
+ reads/tigs which could not be incorporated into the primary or bubble assemblies.
+
+It is possible for tigs comprised of multiple reads to end up in asm.unassembled.fasta. The default filtering eliminates anything with < 2 reads, shorter than 1000bp, or comprised of mostly a single sequence (>75%). The filtering is controlled by the contigFilter parameter which takes 5 values.
+
+::
+
+ contigFilter
+ minReads
+ minLength
+ singleReadSpan
+ lowCovSpan
+ lowCovDepth
+
+The default filtering is "2 1000 0.75 0.75 2". If you are assembling amplified data or viral data, it is possible your assembly will be flagged as unassembled. In those cases, you can turn off the filtering with the parameters
+
+::
+
+ contigFilter="2 1000 1.0 1.0 2"
diff --git a/documentation/source/quick-start.rst b/documentation/source/quick-start.rst
index e226892..c406970 100644
--- a/documentation/source/quick-start.rst
+++ b/documentation/source/quick-start.rst
@@ -67,6 +67,12 @@ Find the Output
The canu progress chatter records statistics such as an input read histogram, corrected read histogram, and overlap types. Outputs from the assembly tasks are in:
+ecoli*/ecoli.correctedReads.fasta.gz
+ The sequences after correction, trimmed and split based on consensus evidence. Typically >99% for PacBio and >98% for Nanopore but it can vary based on your input sequencing quality.
+
+ecoli*/ecoli.trimmedReads.fasta.gz
+ The sequences after correction and final trimming. The corrected sequences above are overlapped again to identify any missed hairpin adapters or bad sequence that could not be detected in the raw sequences.
+
ecoli*/ecoli.layout
The layout provides information on where each read ended up in the final assembly, including contig and positions. It also includes the consensus sequence for each contig.
@@ -76,15 +82,30 @@ ecoli*/ecoli.gfa
The fasta output is split into three types:
ecoli*/asm.contigs.fasta
- everything which could be assembled and is part of the primary assembly. This includes both unique and repetitive elements. Each contig has several flags included on the fasta def line. These include:
-=============== ====== ==========
-tag values definition
-=============== ====== ==========
-len int length in bp
-reads int number of sequences comprising the contig
-suggestRepeat yes/no whether the contig is a repetitive element or unique
-suggestCircular yes/no currently unused
-=============== ====== ==========
+ Everything which could be assembled and is part of the primary assembly, including both unique and repetitive elements. Each contig has several flags included on the fasta def line::
+
+ >tig######## len=<integer> reads=<integer> covStat=<float> gappedBases=<yes|no> class=<contig|bubble|unassm> suggestRepeat=<yes|no> suggestCircular=<yes|no>
+
+ len
+ Length of the sequence, in bp.
+
+ reads
+ Number of reads used to form the contig.
+
+ covStat
+ The log of the ratio of the contig being unique versus being two-copy, based on the read arrival rate. Positive values indicate more likely to be unique, while negative values indicate more likely to be repetitive. See `Footnote 24 <http://science.sciencemag.org/content/287/5461/2196.full#ref-24>`_ in `Myers et al., A Whole-Genome Assembly of Drosophila <http://science.sciencemag.org/content/287/5461/2196.full>`_.
+
+ gappedBases
+ If yes, the sequence includes all gaps in the multialignment.
+
+ class
+ Type of sequence. Unassembled sequences are primarily low-coverage sequences spanned by a single read.
+
+ suggestRepeat
+ If yes, sequence was detected as a repeat based on graph topology or read overlaps to other sequences.
+
+ suggestCircular
+ If yes, sequence is likely circular. Not implemented.
ecoli*/asm.bubbles.fasta
alternate paths in the graph which could not be merged into the primary assembly.
@@ -111,24 +132,24 @@ Then, trim the output of the correction::
canu -trim \
-p ecoli -d ecoli \
genomeSize=4.8m \
- -pacbio-corrected ecoli/correction/ecoli.correctedReads.fastq
+ -pacbio-corrected ecoli/correction/ecoli.correctedReads.fasta.gz
And finally, assemble the output of trimming, twice::
canu -assemble \
- -p ecoli -d ecoli-erate-0.025 \
+ -p ecoli -d ecoli-erate-0.013 \
genomeSize=4.8m \
- errorRate=0.025 \
- -pacbio-corrected ecoli/trimming/ecoli.trimmedReads.fastq
+ errorRate=0.013 \
+ -pacbio-corrected ecoli/trimming/ecoli.trimmedReads.fasta.gz
canu -assemble \
- -p ecoli -d ecoli-erate-0.035 \
+ -p ecoli -d ecoli-erate-0.025 \
genomeSize=4.8m \
- errorRate=0.035 \
- -pacbio-corrected ecoli/trimming/ecoli.trimmedReads.fastq
+ errorRate=0.025 \
+ -pacbio-corrected ecoli/trimming/ecoli.trimmedReads.fasta.gz
The directory layout for correction and trimming is exactly the same as when we ran all tasks in the same command.
-Each unitig construction task needs its own private work space, and in there the 'correction' and 'trimming' directories are empty.
+Each unitig construction task needs its own private work space, and in there the 'correction' and 'trimming' directories are empty. The error rate always specifies the error in the corrected reads which is typically <1% for PacBio data and <2% for Nanopore data (<1% on newest chemistries).
Assembling Oxford Nanopore data
--------------------------------
@@ -176,7 +197,7 @@ Now you can assemble all the data::
Assembling Low Coverage Datasets
----------------------------------
-When you have 30X or less coverage, it helps to adjust the Canu assembly parameters. You can download a 20X subset of `S. cerevisae <http://gembox.cbcb.umd.edu/mhap/raw/yeast_filtered.20x.fastq.gz>`_
+When you have 30X or less coverage, it helps to adjust the Canu assembly parameters. Typically, assembly 20X of single-molecule data outperforms hybrid methods with higher coverage. You can download a 20X subset of `S. cerevisae <http://gembox.cbcb.umd.edu/mhap/raw/yeast_filtered.20x.fastq.gz>`_
or use the following curl command:
@@ -184,12 +205,12 @@ or use the following curl command:
curl -L -o yeast.20x.fastq.gz http://gembox.cbcb.umd.edu/mhap/raw/yeast_filtered.20x.fastq.gz
-and run the assembler adding sensitive parameters (**corMhapSensitivity=high corMinCoverage=2 errorRate=0.035 minOverlapLength=499 corMaxEvidenceErate=0.3**)::
+and run the assembler adding sensitive parameters (**errorRate=0.035**)::
canu \
-p asm -d yeast \
genomeSize=12.1m \
- corMhapSensitivity=high corMinCoverage=2 errorRate=0.035 minOverlapLength=499 corMaxEvidenceErate=0.3 \
+ errorRate=0.035 \
-pacbio-raw yeast.20x.fastq.gz
@@ -199,55 +220,44 @@ After the run completes, we can check the assembly statistics::
::
- lenSuggestRepeat sum 829257 (genomeSize 12100000)
- lenSuggestRepeat num 105
- lenSuggestRepeat ave 7897
- lenUnassembled ng10 12472 bp lg10 75 sum 1217659 bp
- lenUnassembled ng20 8623 bp lg20 192 sum 2420234 bp
- lenUnassembled ng30 5949 bp lg30 359 sum 3632595 bp
- lenUnassembled ng40 2851 bp lg40 640 sum 4842075 bp
- lenUnassembled sum 5325150 (genomeSize 12100000)
- lenUnassembled num 903
- lenUnassembled ave 5897
- lenContig ng10 719035 bp lg10 2 sum 1502223 bp
- lenContig ng20 646872 bp lg20 4 sum 2817020 bp
- lenContig ng30 565419 bp lg30 6 sum 3949850 bp
- lenContig ng40 485518 bp lg40 8 sum 4989743 bp
- lenContig ng50 329252 bp lg50 11 sum 6245693 bp
- lenContig ng60 257910 bp lg60 15 sum 7358747 bp
- lenContig ng70 196655 bp lg70 20 sum 8473224 bp
- lenContig ng80 119852 bp lg80 29 sum 9776539 bp
- lenContig ng90 85266 bp lg90 40 sum 10892166 bp
- lenContig sum 11972093 (genomeSize 12100000)
- lenContig num 66
- lenContig ave 181395
+ lenSuggestRepeat sum 160297 (genomeSize 12100000)
+ lenSuggestRepeat num 12
+ lenSuggestRepeat ave 13358
+ lenUnassembled ng10 13491 bp lg10 77 sum 1214310 bp
+ lenUnassembled ng20 11230 bp lg20 176 sum 2424556 bp
+ lenUnassembled ng30 9960 bp lg30 290 sum 3632411 bp
+ lenUnassembled ng40 8986 bp lg40 418 sum 4841978 bp
+ lenUnassembled ng50 8018 bp lg50 561 sum 6054460 bp
+ lenUnassembled ng60 7040 bp lg60 723 sum 7266816 bp
+ lenUnassembled ng70 6169 bp lg70 906 sum 8474192 bp
+ lenUnassembled ng80 5479 bp lg80 1114 sum 9684981 bp
+ lenUnassembled ng90 4787 bp lg90 1348 sum 10890099 bp
+ lenUnassembled ng100 4043 bp lg100 1624 sum 12103239 bp
+ lenUnassembled ng110 3323 bp lg110 1952 sum 13310167 bp
+ lenUnassembled ng120 2499 bp lg120 2370 sum 14520362 bp
+ lenUnassembled ng130 1435 bp lg130 2997 sum 15731198 bp
+ lenUnassembled sum 16139888 (genomeSize 12100000)
+ lenUnassembled num 3332
+ lenUnassembled ave 4843
+ lenContig ng10 770772 bp lg10 2 sum 1566457 bp
+ lenContig ng20 710140 bp lg20 4 sum 3000257 bp
+ lenContig ng30 669248 bp lg30 5 sum 3669505 bp
+ lenContig ng40 604859 bp lg40 7 sum 4884914 bp
+ lenContig ng50 552911 bp lg50 10 sum 6571204 bp
+ lenContig ng60 390415 bp lg60 12 sum 7407061 bp
+ lenContig ng70 236725 bp lg70 16 sum 8521520 bp
+ lenContig ng80 142854 bp lg80 23 sum 9768299 bp
+ lenContig ng90 94308 bp lg90 33 sum 10927790 bp
+ lenContig sum 12059140 (genomeSize 12100000)
+ lenContig num 56
+ lenContig ave 215341
Consensus Accuracy
-------------------
-While Canu corrects sequences and has 99% identity or greater with PacBio or Nanopore sequences, for the best accuracy we recommend polishing with a sequence-specific tool. We recommend `Quiver <http://github.com/PacificBiosciences/GenomicConsensus/blob/master/doc/HowToQuiver.rst>`_ for PacBio and `Nanopolish <http://github.com/jts/nanopolish>`_ for Oxford Nanpore data.
+While Canu corrects sequences and has 99% identity or greater with PacBio or Nanopore sequences, for the best accuracy we recommend polishing with a sequence-specific tool. We recommend `Quiver <http://github.com/PacificBiosciences/GenomicConsensus>`_ for PacBio and `Nanopolish <http://github.com/jts/nanopolish>`_ for Oxford Nanpore data.
If you have Illumina sequences available, `Pilon <http://www.broadinstitute.org/software/pilon/>`_ can also be used to polish either PacBio or Oxford Nanopore assemblies.
-Changes
--------------------
-
-- Support for reads up to 2Mbp in size (up from 130Kbp).
-- Incorporate MHAP 2.0 which is 5X faster than previous version and has higher specificity
-- Add GFA output
-- Improve diploid-aware assembly by categorizing output as primary contigs or unmerged bubbles. Annotate repeat and unique contigs in the output.
-- Enable parallel overlap store construction on large genomes
-- Enable minimap as an option for generating overlaps during correction step. Corrected reads are generated as before with falcon_sense.
-- Fix bug using shorter rather than longer reads for corrected reads/consensus computation
-- Fix bug resuming without providing input sequences which would incorrectly set error rates
-- Fix bug in bogart which would demote contained sequences as spurs incorrectly
-- Fix bugs in falcon_sense which would hang when input had N bases and limit corrected reads to 65Kbp
-- Fix falcon_sense support on OSX <10.10.
-- Fix various pipeline bugs
-
-Known Issues
+Futher Reading
-------------------
-
-- Bogart (unitigger) has false positives in repeat breaking. Currently, the temporary workaround is to increase the minimum overlap size to avoid detecting false repeats caused by short overlaps. Canu will automatically do this for large (>100MB) genomes while the fixed algorithm is tested.
-- LSF support has limited testing
-- Large memory usage while unitig consensus calling on unitigs over 100MB in size (140Mb contig uses approximate 75GB).
-- Distributed file systems (such as GPFS) causes issues with memory mapped files, slowing down parts of Canu, including meryl (0-mercounts) and falcon-sense (2-correction).
+See the `FAQ <faq.html>`_ page for commonly-asked questions and the `release <http://github.com/marbl/canu/releases>`_. notes page for information on what's changed and known issues.
diff --git a/documentation/source/tutorial.rst b/documentation/source/tutorial.rst
index a241dde..89f9cba 100644
--- a/documentation/source/tutorial.rst
+++ b/documentation/source/tutorial.rst
@@ -235,17 +235,12 @@ Fraction Error Percent Error
============== =============
Eventually, we want to have Canu take a single error rate, the error rate of a single input read,
-and derive all other rates from there. This is the parameter ``errorRate``. Currently, the defaults are 0.025 for PacBio sequences and 0.045 for Oxford Nanpore sequences. When you have low-coverage datasets it helps to lower the error rate by 0.01 and decrease the stringency for creating corrected sequences. See the :ref:`quick_low` section for details.
-
-The error rates are critical for unitig construction, but are also used when generating overlaps,
-and for trimming reads. Error rates are used in two ways: to limit what overlaps are generated, and to filter overlaps
-before using them. The former is more a computational shortcut - no need to compute what isn't
-going to be used - while the latter can be critical to a successful assembly.
+and derive all other rates from there. This is the parameter ``errorRate``. Currently, the defaults are 0.025 for PacBio sequences and 0.05 for Oxford Nanpore sequences. Typically, you should not need to modify this setting. However, the error rate does affect runtime and lowering it can significantly speed up your assembly. Thus, for low coverage datasets (<=30X) we recommend increasing the error rate slightly (by 1%, so errorRate=0.035 or PacBio) and for high-coverage (>=60X) datasets [...]
The following error rates are defined:
errorRate
- The expected error rate in a single read. This will set the remaining
+ The expected error rate in a corrected single read. This will set the remaining
error rates implicitly.
Recall there are three sets of overlaps generated: one for read correction, one for read trimming,
@@ -272,24 +267,6 @@ Be sure to not confuse ``obtOvlErrorRate`` with ``obtErrorRate``:
obtErrorRate
Filter overlaps during OBT's read trimming and read splitting.
-Unitig construction has four error rates:
-
-utgGraphErrorRate
- Only overlaps below this rate are used for forming initial 'best-edge' unitigs.
-
-utgBubbleErrorRate
- A short unitig that aligns (via overlaps, not sequence alignment) to a larger
- unitig at or below this rate will be merged into the larger unitig.
-
-utgMergeErrorRate
- After initial unitigs are formed and bubbles are popped, unitigs are merged if there is a
- 'second-best' overlap at or below this error rate. THIS ALGORITHM IS NOT IMPLEMENTED YET.
-
-utgRepeatErrorRate
- All non-best overlaps at or below this error rate are used to detect regions in unitigs that
- potentially span a repeat without sufficiently strong evidence. These regions are split into
- multiple unitigs.
-
In `celera-assembler`_, consensus generation required an overlap. In canu, this is no longer used,
but the ``cnsErrorRate`` option still exists.
diff --git a/src/AS_UTL/AS_UTL_stackTrace.C b/src/AS_UTL/AS_UTL_stackTrace.C
index 1ecf022..0c55da4 100644
--- a/src/AS_UTL/AS_UTL_stackTrace.C
+++ b/src/AS_UTL/AS_UTL_stackTrace.C
@@ -23,13 +23,21 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-APR-26
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
#include "AS_global.H"
-#include <execinfo.h> // backtrace
+#ifndef __CYGWIN__
+ #ifndef _WIN32
+ #include <execinfo.h> // backtrace
+ #endif
+#endif
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -146,6 +154,8 @@ AS_UTL_catchCrash(int sig_num, siginfo_t *info, void *ctx) {
void
AS_UTL_catchCrash(int sig_num, siginfo_t *info, void *ctx) {
void *arr[256];
+#ifndef __CYGWIN__
+#ifndef _WIN32
int32 cnt = backtrace(arr, 256);
// Report the signal we failed on, be careful to not allocate memory.
@@ -219,6 +229,8 @@ AS_UTL_catchCrash(int sig_num, siginfo_t *info, void *ctx) {
AS_UTL_envokeGDB();
WRITE_STRING("\n");
+#endif
+#endif
// Pass the signal through, only so a core file can get generated.
diff --git a/src/AS_UTL/intervalList.H b/src/AS_UTL/intervalList.H
index 9b5336b..929ddf6 100644
--- a/src/AS_UTL/intervalList.H
+++ b/src/AS_UTL/intervalList.H
@@ -145,7 +145,7 @@ public:
void add(iNum position, iNum length, iVal value=0);
void sort(void);
- void merge(uint32 minOverlap=0); // Merge overlapping regions
+ void merge(iNum minOverlap=0); // Merge overlapping regions
void merge(intervalList<iNum, iVal> *IL); // Insert IL into this list
void filterShort(iNum minLength);
@@ -277,7 +277,7 @@ intervalList<iNum, iVal>::sort(void) {
template <class iNum, class iVal>
void
-intervalList<iNum, iVal>::merge(uint32 minOverlap) {
+intervalList<iNum, iVal>::merge(iNum minOverlap) {
uint32 thisInterval = 0;
uint32 nextInterval = 1;
@@ -300,7 +300,7 @@ intervalList<iNum, iVal>::merge(uint32 minOverlap) {
_list[thisInterval].lo = _list[nextInterval].lo;
_list[thisInterval].hi = _list[nextInterval].hi;
_list[thisInterval].ct = _list[nextInterval].ct;
- _list[thisInterval].ct = _list[nextInterval].va;
+ _list[thisInterval].va = _list[nextInterval].va;
_list[nextInterval].lo = 0;
_list[nextInterval].hi = 0;
diff --git a/src/bogart/AS_BAT_IntersectBubble.H b/src/AS_UTL/intervalListTest.C
similarity index 51%
rename from src/bogart/AS_BAT_IntersectBubble.H
rename to src/AS_UTL/intervalListTest.C
index 359c1db..9df9124 100644
--- a/src/bogart/AS_BAT_IntersectBubble.H
+++ b/src/AS_UTL/intervalListTest.C
@@ -13,21 +13,9 @@
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_IntersectBubble.H
- *
* Modifications by:
*
- * Brian P. Walenz from 2010-DEC-06 to 2013-AUG-01
- * are Copyright 2010-2011,2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-DEC-19 to 2015-JUN-03
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
+ * Brian P. Walenz beginning on 2016-MAR-10
* are a 'United States Government Work', and
* are released in the public domain
*
@@ -35,10 +23,42 @@
* full conditions and disclaimers for each license.
*/
-#ifndef INCLUDE_AS_BAT_INTERSECTBUBBLE
-#define INCLUDE_AS_BAT_INTERSECTBUBBLE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+typedef int8_t int8;
+typedef int16_t int16;
+typedef int32_t int32;
+typedef int64_t int64;
+
+typedef uint8_t uint8;
+typedef uint16_t uint16;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+
+#include "intervalList.H"
+
+int
+main(int argc, char **argv) {
+
+ intervalList<int32> t1;
+
+ t1.add(0, 10);
+ t1.add(11,7);
+ t1.add(20, 8);
+
+ fprintf(stderr, "BEFORE:\n");
+ for (uint32 ii=0; ii<t1.numberOfIntervals(); ii++)
+ fprintf(stderr, "%2d %3d-%3d\n", ii, t1.lo(ii), t1.hi(ii));
+
+ t1.merge(-1);
-void popIntersectionBubbles(UnitigVector &unitigs, double erateBubble);
-void popOverlapBubbles(UnitigVector &unitigs, double erateBubble);
+ fprintf(stderr, "AFTER:\n");
+ for (uint32 ii=0; ii<t1.numberOfIntervals(); ii++)
+ fprintf(stderr, "%2d %3d-%3d\n", ii, t1.lo(ii), t1.hi(ii));
-#endif // INCLUDE_AS_BAT_INTERSECTBUBBLE
+ exit(0);
+}
diff --git a/src/AS_UTL/memoryMappedFile.H b/src/AS_UTL/memoryMappedFile.H
index aa70fb6..6edd748 100644
--- a/src/AS_UTL/memoryMappedFile.H
+++ b/src/AS_UTL/memoryMappedFile.H
@@ -64,6 +64,10 @@ enum memoryMappedFileType {
};
+#ifndef MAP_POPULATE
+#define MAP_POPULATE 0
+#endif
+
class memoryMappedFile {
public:
memoryMappedFile(const char *name,
@@ -101,11 +105,11 @@ public:
//
// NOTA BENE!! Even though it is writable, it CANNOT be extended.
- _data = (_type == memoryMappedFile_readOnly) ? mmap(0L, _length, PROT_READ, MAP_FILE | MAP_SHARED, fd, 0)
+ _data = (_type == memoryMappedFile_readOnly) ? mmap(0L, _length, PROT_READ, MAP_FILE | MAP_PRIVATE | MAP_POPULATE, fd, 0)
: mmap(0L, _length, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
if (errno)
- fprintf(stderr, "memoryMappedFile()-- Couldn't mmap '%s': %s\n", _name, strerror(errno)), exit(1);
+ fprintf(stderr, "memoryMappedFile()-- Couldn't mmap '%s' of length "F_SIZE_T": %s\n", _name, _length, strerror(errno)), exit(1);
close(fd);
diff --git a/src/AS_UTL/stddev.H b/src/AS_UTL/stddev.H
index 88ff91b..28dfe79 100644
--- a/src/AS_UTL/stddev.H
+++ b/src/AS_UTL/stddev.H
@@ -23,6 +23,10 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-31
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -38,9 +42,89 @@
using namespace std;
-// Note: Does not work well with unsigned types. The 'smallest' compute can underflow.
+// Online mean and std.dev calculation.
+// B. P. Welford, Technometrics, Vol 4, No 3, Aug 1962 pp 419-420.
+// http://www2.in.tu-clausthal.de/~zach/teaching/info_literatur/Welford.pdf
+// Also presented in Knuth Vol 2 (3rd Ed.) pp 232.
+//
+template<typename TT>
+class stdDev {
+public:
+ stdDev() {
+ _mn = 0.0;
+ _sn = 0.0;
+ _nn = 0;
+ };
+
+ ~stdDev() {
+ };
+
+ void insert(TT val) {
+ double m0 = _mn;
+ double s0 = _sn;
+ uint32 n0 = _nn + 1;
+
+ if (_nn & 0x80000000)
+ fprintf(stderr, "ERROR: stdDev has been finalized; can't insert() new value.\n"), exit(1);
+
+ _mn = m0 + (val - m0) / n0;
+ _sn = s0 + (val - m0) * (val - _mn);
+ _nn = n0;
+ };
+
+ void remove(double val) {
+ uint32 n0 = _nn - 1;
+ double m0 = (n0 == 0) ? (0) : ((_nn * _mn - val) / n0);
+ double s0 = _sn - (val - m0) * (val - _mn);
+
+ if (_nn & 0x80000000)
+ fprintf(stderr, "ERROR: stdDev has been finalized; can't remove() old value.\n"), exit(1);
+
+ _nn = n0;
+ _mn = m0;
+ _sn = s0;
+ };
+
+ void finalize(void) {
+ _sn = stddev();
+ _nn |= 0x80000000;
+ };
+ uint32 size(void) {
+ return(_nn & 0x7fffffff);
+ };
+
+ double mean(void) {
+ return(_mn);
+ };
+
+ double variance(void) {
+ if (_nn & 0x80000000)
+ return(_sn * _sn);
+ else
+ return((_nn < 2) ? (0.0) : (_sn / (_nn-1)));
+ };
+
+ double stddev(void) {
+ if (_nn & 0x80000000)
+ return(_sn);
+ else
+ return(sqrt(variance()));
+ };
+
+private:
+ double _mn; // mean
+ double _sn; // "sum of variances"
+ uint32 _nn; // number of items in the set
+};
+
+
+
+
+// Offline mean and std.dev calculation. Filters outliers.
+// Does not work well with unsigned types. The 'smallest' compute can underflow.
+//
template<typename TT>
void
computeStdDev(vector<TT> dist, double &mean, double &stddev, bool isSorted=false) {
@@ -310,7 +394,7 @@ public:
};
void add(uint64 data, uint32 count=1) {
- if (_histogramAlloc < data)
+ while (_histogramAlloc < data)
resizeArray(_histogram, _histogramMax+1, _histogramAlloc, _histogramAlloc * 2, resizeArray_copyData | resizeArray_clearNew);
if (_histogramMax < data)
@@ -412,7 +496,9 @@ public:
break;
}
- // And, done.
+ // And, done
+
+ delete [] maddata;
_finalized = true;
};
diff --git a/src/AS_UTL/stddevTest.C b/src/AS_UTL/stddevTest.C
new file mode 100644
index 0000000..edb4815
--- /dev/null
+++ b/src/AS_UTL/stddevTest.C
@@ -0,0 +1,116 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-MAR-10
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "stddev.H"
+
+// g++ -Wall -o stddevTest -I. -I.. stddevTest.C
+
+void
+testInsert(void) {
+ stdDev<uint32> sdu;
+ stdDev<int32> sdi;
+ stdDev<double> sdd;
+
+ sdu.insert((uint32)2);
+ sdu.insert((uint32)4);
+ sdu.insert((uint32)4);
+ sdu.insert((uint32)4);
+ sdu.insert((uint32)5);
+ sdu.insert((uint32)5);
+ sdu.insert((uint32)7);
+ sdu.insert((uint32)9);
+
+ sdi.insert((uint32)2);
+ sdi.insert((uint32)4);
+ sdi.insert((uint32)4);
+ sdi.insert((uint32)4);
+ sdi.insert((uint32)5);
+ sdi.insert((uint32)5);
+ sdi.insert((uint32)7);
+ sdi.insert((uint32)9);
+
+ sdd.insert((uint32)2);
+ sdd.insert((uint32)4);
+ sdd.insert((uint32)4);
+ sdd.insert((uint32)4);
+ sdd.insert((uint32)5);
+ sdd.insert((uint32)5);
+ sdd.insert((uint32)7);
+ sdd.insert((uint32)9);
+
+ fprintf(stderr, "Expect mean=5, variance=%f, stddev=%f\n", 32.0 / 7.0, sqrt(32.0 / 7.0));
+
+ fprintf(stderr, " uint32 size %u mean %f variance %f stddev %f\n",
+ sdu.size(), sdu.mean(), sdu.variance(), sdu.stddev());
+ fprintf(stderr, " int32 size %u mean %f variance %f stddev %f\n",
+ sdi.size(), sdi.mean(), sdi.variance(), sdi.stddev());
+ fprintf(stderr, " double size %u mean %f variance %f stddev %f\n",
+ sdd.size(), sdd.mean(), sdd.variance(), sdd.stddev());
+
+ assert(sdu.variance() == 32.0 / 7.0);
+ assert(sdi.variance() == 32.0 / 7.0);
+ assert(sdd.variance() == 32.0 / 7.0);
+
+ fprintf(stderr, "\n\n");
+}
+
+
+
+void
+testRemove(void) {
+ double values[10] = { 1, 2, 3, 4, 9, 8, 7, 6, 20, 30 };
+
+ stdDev<double> sd;
+
+ fprintf(stderr, "Expect final to be zero, and insert() == remove().\n");
+
+ for (int ii=0; ii<10; ii++) {
+ sd.insert(values[ii]);
+ fprintf(stderr, "insert[%2d] mean %8.4f stddev %8.4f\n", ii+1, sd.mean(), sd.stddev());
+ }
+
+ assert(sd.mean() == 9.0);
+
+ fprintf(stderr, "\n");
+
+ for (int ii=9; ii>=0; ii--) {
+ sd.remove(values[ii]);
+ fprintf(stderr, "remove[%2d] mean %8.4f stddev %8.4f\n", ii, sd.mean(), sd.stddev());
+ }
+
+ assert(sd.mean() == 0.0);
+ assert(sd.stddev() == 0.0);
+}
+
+
+
+int
+main(int argc, char **argv) {
+
+ testInsert();
+ testRemove();
+
+ exit(0);
+}
diff --git a/src/AS_global.H b/src/AS_global.H
index e01795d..03d2e06 100644
--- a/src/AS_global.H
+++ b/src/AS_global.H
@@ -278,6 +278,7 @@ int AS_configure(int argc, char **argv);
static
void
omp_set_dynamic(int x) {
+ #pragma unused(x)
}
static
@@ -289,6 +290,7 @@ omp_get_max_threads(void) {
static
void
omp_set_num_threads(int x) {
+ #pragma unused(x)
}
static
@@ -308,21 +310,25 @@ typedef int omp_lock_t;
static
void
omp_init_lock(omp_lock_t *a) {
+ #pragma unused(a)
}
static
void
omp_set_lock(omp_lock_t *a) {
+ #pragma unused(a)
}
static
void
omp_unset_lock(omp_lock_t *a) {
+ #pragma unused(a)
}
static
void
omp_destroy_lock(omp_lock_t *a) {
+ #pragma unused(a)
}
#endif
diff --git a/src/Makefile b/src/Makefile
index 64547e9..19f4511 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -425,7 +425,8 @@ ifeq (${OSTYPE}, Darwin)
LDFLAGS +=
ifeq ($(BUILDDEBUG), 1)
- CXXFLAGS += -g -fno-omit-frame-pointer
+ CXXFLAGS += -g
+# -fno-omit-frame-pointer
else
ifeq ($(BUILDPROFILE), 1)
CXXFLAGS += -g3 -O4 -funroll-loops -fexpensive-optimizations -finline-functions -fno-omit-frame-pointer
@@ -466,6 +467,9 @@ ifeq (${MACHINETYPE}, amd64)
CXXFLAGS += -I/usr/local/include
LDFLAGS += -L/usr/local/lib
+ # callgrind
+ #CXXFLAGS += -g3 -Wa,--gstabs -save-temps
+
#CXXFLAGS += -DLIBUNWIND -I/usr/local/include
#LDFLAGS += -L/usr/local/lib -lunwind -lunwind-x86_64
@@ -498,6 +502,35 @@ ifeq (${MACHINETYPE}, arm)
endif
endif
+ifneq (,$(findstring CYGWIN, ${OSTYPE}))
+ CC ?= gcc
+ CXX ?= g++
+ CXXFLAGS := -pthread -Wno-write-strings -Wno-unused -Wno-char-subscripts -Wno-sign-compare
+ LDFLAGS := -pthread -lm
+
+ CXXFLAGS += -fopenmp
+ LDFLAGS += -fopenmp
+
+ CXXFLAGS += -Wall -Wextra -Wno-write-strings -Wno-unused -Wno-char-subscripts -Wno-sign-compare -Wformat
+
+ ifeq ($(BUILDPROFILE), 1)
+ CXXFLAGS +=
+ LDFLAGS += -pg
+ else
+ CXXFLAGS +=
+ LDFLAGS +=
+ endif
+
+ ifeq ($(BUILDDEBUG), 1)
+ CXXFLAGS += -g
+ else
+ ifeq ($(BUILDPROFILE), 1)
+ CXXFLAGS += -O4 -funroll-loops -fexpensive-optimizations -finline-functions
+ else
+ CXXFLAGS += -O4 -funroll-loops -fexpensive-optimizations -finline-functions -fomit-frame-pointer
+ endif
+ endif
+endif
# Include the main user-supplied submakefile. This also recursively includes
# all other user-supplied submakefiles.
@@ -519,6 +552,7 @@ all: UPDATE_VERSION MAKE_DIRS \
${TARGET_DIR}/lib/canu/CorrectReads.pm \
${TARGET_DIR}/lib/canu/Configure.pm \
${TARGET_DIR}/lib/canu/Defaults.pm \
+ ${TARGET_DIR}/lib/canu/ErrorEstimate.pm \
${TARGET_DIR}/lib/canu/Execution.pm \
${TARGET_DIR}/lib/canu/Gatekeeper.pm \
${TARGET_DIR}/lib/canu/Grid.pm \
@@ -600,6 +634,9 @@ ${TARGET_DIR}/lib/canu/Configure.pm: pipelines/canu/Configure.pm
${TARGET_DIR}/lib/canu/Defaults.pm: pipelines/canu/Defaults.pm
cp -pf pipelines/canu/Defaults.pm ${TARGET_DIR}/lib/canu/
+${TARGET_DIR}/lib/canu/ErrorEstimate.pm: pipelines/canu/ErrorEstimate.pm
+ cp -pf pipelines/canu/ErrorEstimate.pm ${TARGET_DIR}/lib/canu/
+
${TARGET_DIR}/lib/canu/Execution.pm: pipelines/canu/Execution.pm
cp -pf pipelines/canu/Execution.pm ${TARGET_DIR}/lib/canu/
diff --git a/src/bogart/AS_BAT_BestOverlapGraph.C b/src/bogart/AS_BAT_BestOverlapGraph.C
index 2f37fd5..d749b9f 100644
--- a/src/bogart/AS_BAT_BestOverlapGraph.C
+++ b/src/bogart/AS_BAT_BestOverlapGraph.C
@@ -31,20 +31,24 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+
#include "AS_BAT_Unitig.H"
#include "intervalList.H"
+#include "stddev.H"
-// HACK
-uint32 examineOnly = UINT32_MAX;
-
void
BestOverlapGraph::removeSuspicious(void) {
@@ -96,494 +100,259 @@ BestOverlapGraph::removeSuspicious(void) {
}
if (verified == false) {
- if (no > 0)
- writeLog("BestOverlapGraph()-- frag "F_U32" is suspicious ("F_U32" overlaps).\n", fi, no);
-
#pragma omp critical (suspInsert)
_suspicious.insert(fi);
}
}
-}
-
-
-void
-BestOverlapGraph::examineOnlyTopN(void) {
- uint32 fiLimit = FI->numFragments();
- uint32 numThreads = omp_get_max_threads();
- uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
-
- writeLog("BestOverlapGraph()-- analyzing %d fragments for best edges, with %d threads.\n", fiLimit, numThreads);
- writeLog("BestOverlapGraph()-- scoring highest quality %d overlaps.\n", examineOnly);
-#pragma omp parallel for schedule(dynamic, blockSize)
- for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 no = 0;
- uint32 n5 = 0;
- uint32 n3 = 0;
- BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
-
- sort(ovl, ovl + no, BAToverlap_sortByErate);
-
- for (uint32 ii=0; ii<no; ii++) {
- if (((ovl[ii].a_hang >= 0) && (ovl[ii].b_hang <= 0)) ||
- ((ovl[ii].a_hang <= 0) && (ovl[ii].b_hang >= 0)))
- // Don't do contains here!
- continue;
-
- // Process the 5' overlaps.
- if ((n5 < examineOnly) &&
- (ovl[ii].a_hang < 0)) {
- assert(ovl[ii].b_hang < 0);
- n5++;
- scoreEdge(ovl[ii]);
- }
-
- // Process the 3' overlaps.
- if ((n3 < examineOnly) &&
- (ovl[ii].a_hang > 0)) {
- assert(ovl[ii].b_hang > 0);
- n3++;
- scoreEdge(ovl[ii]);
- }
- }
- }
+ writeLog("BestOverlapGraph()-- marked "F_U64" reads as suspicious.\n", _suspicious.size());
}
+
void
-BestOverlapGraph::removeSpurs(void) {
+BestOverlapGraph::removeHighErrorBestEdges(void) {
uint32 fiLimit = FI->numFragments();
uint32 numThreads = omp_get_max_threads();
uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
- writeLog("BestOverlapGraph()-- detecting spur fragments.\n");
+ stdDev<double> edgeStats;
- char *isSpur = new char [fiLimit + 1];
+ // Find the overlap for every best edge.
- memset(isSpur, 0, sizeof(char) * (fiLimit + 1));
+ double *erates = new double [fiLimit + 1 + fiLimit + 1];
+ double *absdev = new double [fiLimit + 1 + fiLimit + 1];
+ uint32 eratesLen = 0;
for (uint32 fi=1; fi <= fiLimit; fi++) {
- bool spur5 = (getBestEdgeOverlap(fi, false)->fragId() == 0);
- bool spur3 = (getBestEdgeOverlap(fi, true)->fragId() == 0);
+ BestEdgeOverlap *b5 = getBestEdgeOverlap(fi, false);
+ BestEdgeOverlap *b3 = getBestEdgeOverlap(fi, true);
- if (isContained(fi)) {
- //writeLog("BestOverlapGraph()-- frag "F_U32" is contained - %d %d.\n", fi, spur5, spur3);
- continue;
- }
+ if (b5->fragId() != 0) edgeStats.insert(erates[eratesLen++] = b5->erate());
+ if (b3->fragId() != 0) edgeStats.insert(erates[eratesLen++] = b3->erate());
+ }
- if ((spur5 == false) && (spur3 == false))
- // Edges off of both ends. Not a spur.
- continue;
+ _mean = edgeStats.mean();
+ _stddev = edgeStats.stddev();
- if ((spur5 == true) && (spur3 == true))
- // No edges off either end. Not a spur, just garbage. EXCEPT that this could also
- // be a contained read that has no 5'/3' best edges assigned to it.
- //writeLog("BestOverlapGraph()-- frag "F_U32" is singleton, no best edges and not contained.\n", fi);
- continue;
+ writeLog("removeHighErrorBestEdges()-- with %u points - mean %f stddev %f -- would use overlaps below %f fraction error\n",
+ edgeStats.size(), _mean, _stddev, _mean + _deviationGraph * _stddev);
- // Exactly one end is missing a best edge. Bad!
+ // Find the median and absolute deviations.
- writeLog("BestOverlapGraph()-- frag "F_U32" is a %s spur.\n", fi, (spur5) ? "5'" : "3'");
- isSpur[fi] = true;
- }
+ sort(erates, erates+eratesLen);
- // Remove best edges, so we can rebuild
+ _median = erates[ eratesLen / 2 ];
- memset(_bestA, 0, sizeof(BestOverlaps) * (fiLimit + 1));
- memset(_scorA, 0, sizeof(BestScores) * (fiLimit + 1));
+ for (uint32 ii=0; ii<eratesLen/2; ii++)
+ absdev[ii] = _median - erates[ii];
- // Rebuild best edges, ignoring edges to spurs. We build edges out of spurs, but don't allow edges into them.
- // This should prevent them from being incorporated into a promiscuous unitig, but still let them be popped
- // as bubbles (but they shouldn't because they're spurs).
+ for (uint32 ii=eratesLen/2; ii<eratesLen; ii++)
+ absdev[ii] = erates[ii] - _median;
- // PASS 3: Find containments.
+ sort(absdev, absdev+eratesLen);
- writeLog("BestOverlapGraph()-- analyzing %d fragments for best contains, with %d threads.\n", fiLimit, numThreads);
+ assert(absdev[0] >= 0.0);
-#pragma omp parallel for schedule(dynamic, blockSize)
- for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 no = 0;
- BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
+ _mad = absdev[eratesLen/2];
- for (uint32 ii=0; ii<no; ii++)
- scoreContainment(ovl[ii]);
- }
+ delete [] absdev;
+ delete [] erates;
- // PASS 4: Find dovetails.
+ writeLog("removeHighErrorBestEdges()-- with %u points - median %f mad %f - would use overlaps below %f fraction error\n",
+ edgeStats.size(), _median, _mad, _median + _deviationGraph * 1.4826 * _mad);
- writeLog("BestOverlapGraph()-- analyzing %d fragments for best edges, with %d threads.\n", fiLimit, numThreads);
+ // The real filtering is done on the next pass through findEdges(). Here, we just report statistics.
-#pragma omp parallel for schedule(dynamic, blockSize)
- for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 no = 0;
- BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
+ uint32 noedge = 0;
+ uint32 removed = 0;
+ uint32 retained = 0;
- for (uint32 ii=0; ii<no; ii++)
- if (isSpur[ovl[ii].b_iid] == false)
- scoreEdge(ovl[ii]);
+ for (uint32 fi=1; fi <= fiLimit; fi++) {
+ BestEdgeOverlap *b5 = getBestEdgeOverlap(fi, false);
+ BestEdgeOverlap *b3 = getBestEdgeOverlap(fi, true);
+
+ if (b5->fragId() == 0)
+ noedge++;
+ else if (b5->erate() > _mean + _deviationGraph * _stddev)
+ removed++;
+ else
+ retained++;
+
+ if (b3->fragId() == 0)
+ noedge++;
+ else if (b3->erate() > _mean + _deviationGraph * _stddev)
+ removed++;
+ else
+ retained++;
}
- delete [] isSpur;
+ writeLog("removeHighErrorBestEdges()-- %u ends have no best edge; %u ends are suspiciously high error; %u ends are acceptable.\n",
+ noedge, removed, retained);
}
+
void
-BestOverlapGraph::removeFalseBest(void) {
+BestOverlapGraph::removeLopsidedEdges(void) {
uint32 fiLimit = FI->numFragments();
uint32 numThreads = omp_get_max_threads();
uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
- writeLog("BestOverlapGraph()-- detecting false best overlaps.\n");
-
- // WARNING! This code was quite confused about erate and evalue (back when they were called
- // error and errorValue or somethihng confusing like that). Use with caution.
-
- uint32 *histo5 = new uint32 [AS_MAX_EVALUE + 1];
- uint32 *histo3 = new uint32 [AS_MAX_EVALUE + 1];
-
- memset(histo5, 0, sizeof(uint32) * (AS_MAX_EVALUE + 1));
- memset(histo3, 0, sizeof(uint32) * (AS_MAX_EVALUE + 1));
-
- uint32 *erate5 = new uint32 [fiLimit + 1];
- uint32 *erate3 = new uint32 [fiLimit + 1];
-
- memset(erate5, 0, sizeof(uint32) * (fiLimit + 1));
- memset(erate3, 0, sizeof(uint32) * (fiLimit + 1));
+ writeLog("BestOverlapGraph()-- removing suspicious edges from graph, with %d threads.\n", numThreads);
- char *altBest = new char [fiLimit + 1];
- char *isBad = new char [fiLimit + 1];
-
- memset(altBest, 0, sizeof(char) * (fiLimit + 1));
- memset(isBad, 0, sizeof(char) * (fiLimit + 1));
-
- // Compute a histogram of the current best edges, and save the erate of the best for each read.
+ uint32 nSuspicious = 0;
+ uint32 nContained = 0;
+ uint32 nSpur = 0;
+ uint32 nMutual = 0;
+ uint32 nAccepted = 0;
+ uint32 nRejected = 0;
+#pragma omp parallel for schedule(dynamic, blockSize)
for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 olapsLen = 0;
- BAToverlap *olaps = OC->getOverlaps(fi, AS_MAX_EVALUE, olapsLen);
+ BestEdgeOverlap *this5 = getBestEdgeOverlap(fi, false);
+ BestEdgeOverlap *this3 = getBestEdgeOverlap(fi, true);
- BestEdgeOverlap *ovl5 = getBestEdgeOverlap(fi, false);
- BestEdgeOverlap *ovl3 = getBestEdgeOverlap(fi, true);
+ // Ignore spurs and contains...and previously detected suspicious reads. The suspicious reads
+ // do not have best edges back to them, and it's possible to find reads B where best edge A->B
+ // exists, yet no best edge from B exists.
- for (uint32 oo=0; oo<olapsLen; oo++) {
- assert(fi == olaps[oo].a_iid);
-
- if (ovl5->fragId() == olaps[oo].b_iid) {
- histo5[olaps[oo].evalue]++;
- erate5[fi] = olaps[oo].erate;
- }
-
- if (ovl3->fragId() == olaps[oo].b_iid) {
- histo3[olaps[oo].evalue]++;
- erate3[fi] = olaps[oo].erate;
- }
+ if (isSuspicious(fi) == true) {
+#pragma omp atomic
+ nSuspicious++;
+ continue;
}
- }
-
- // Compute a nice threshold. Find the mean and stddev of the best edge error rates.
-
- double m5 = 0, s5 = 100;
- double m3 = 0, s3 = 100;
-
- for (uint32 xx=0; xx<10; xx++) {
- double mean5 = 0, mean3 = 0;
- uint64 count5 = 0, count3 = 0;
- double stddev5 = 100, stddev3 = 100;
-
- for (uint32 er=0; er <= AS_MAX_EVALUE; er++) {
- double ER = AS_OVS_decodeEvalue(er) * 100;
-
- if (ER <= 0.0)
- continue;
- if ((m5 - 3 * s5 <= ER) &
- (ER <= m5 + 3 * s5)) {
- mean5 += histo5[er] * ER;
- count5 += histo5[er];
- }
-
- if ((m3 - 3 * s3 <= ER) &
- (ER <= m3 + 3 * s3)) {
- mean3 += histo3[er] * ER;
- count3 += histo3[er];
- }
+ if (isContained(fi) == true) {
+#pragma omp atomic
+ nContained++;
+ continue;
}
- mean5 /= count5;
- mean3 /= count3;
-
- for (uint32 er=0; er <= AS_MAX_EVALUE; er++) {
- double ER = AS_OVS_decodeEvalue(er) * 100;
-
- if (ER <= 0.0)
- continue;
-
- if ((m5 - 3 * s5 <= ER) &
- (ER <= m5 + 3 * s5)) {
- stddev5 += histo5[er] * (ER - mean5) * (ER - mean5);
- }
-
- if ((m3 - 3 * s3 <= ER) &
- (ER <= m3 + 3 * s3)) {
- stddev3 += histo3[er] * (ER - mean3) * (ER - mean3);
- }
+ if ((this5->fragId() == 0) ||
+ (this3->fragId() == 0)) {
+#pragma omp atomic
+ nSpur++;
+ continue;
}
- stddev5 = sqrt(stddev5 / (count5 - 1));
- stddev3 = sqrt(stddev3 / (count3 - 1));
-
- m5 = mean5; m3 = mean3;
- s5 = stddev5; s3 = stddev3;
-
- fprintf(stderr, "mean %.4f +- %.4f --- %.4f +- %.4f ",
- mean5, stddev5,
- mean3, stddev3);
-
- fprintf(stderr, "set xtics ( %.4f, %.4f, %.4f, %.4f)\n",
- mean5 - 3 * stddev5, mean5 + 3 * stddev5,
- mean3 - 3 * stddev3, mean3 + 3 * stddev3);
- } // xx 10 times to stabilize
+ // Find the overlap for this5 and this3.
+ int32 this5ovlLen = FI->overlapLength(fi, this5->fragId(), this5->ahang(), this5->bhang());
+ int32 this3ovlLen = FI->overlapLength(fi, this3->fragId(), this3->ahang(), this3->bhang());
- // Output the evalue histogram
+ // Find the edges for our best overlaps.
- {
- char EN[FILENAME_MAX];
+ BestEdgeOverlap *that5 = getBestEdgeOverlap(this5->fragId(), this5->frag3p());
+ BestEdgeOverlap *that3 = getBestEdgeOverlap(this3->fragId(), this3->frag3p());
- sprintf(EN, "best.edges.erate.histogram");
+ // If both point back to us, we're done.
- errno = 0;
- FILE *EH = fopen(EN, "w");
- if (errno)
- fprintf(stderr, "BestOverlapGraph()-- failed to open '%s' for writing: %s\n", EN, strerror(errno)), exit(1);
-
- for (uint32 er=0; er <= AS_MAX_EVALUE; er++) {
- double ER = AS_OVS_decodeEvalue(er);
-
- if (ER <= 0.0)
- continue;
-
- fprintf(EH, "%.4f\t%u\t%u\t%f\t%f\n",
- ER,
- histo5[er], // HUH?!?! This used to output decodedEvalue() of the histogram...nonsense!
- histo3[er],
- fabs(m5 - ER) / s5, // Grubb's test
- fabs(m3 - ER) / s3);
+ if ((that5->fragId() == fi) && (that5->frag3p() == false) &&
+ (that3->fragId() == fi) && (that3->frag3p() == true)) {
+#pragma omp atomic
+ nMutual++;
+ continue;
}
- fclose(EH);
- }
-
- // For any read with best edge above THRESHOLD error, see if there is an alternate best
- // that is within the target error range.
-
- double erate5thresh = m5 + 2 * s5; // Discard best if it is worse than 2 s.d. from mean.
- double erate3thresh = m3 + 2 * s3;
+ // If there is an overlap to something with no overlaps out of it, that's
+ // a little suspicious.
- for (uint32 fi=1; fi <= fiLimit; fi++) {
-
- if (erate5[fi] > erate5thresh) {
- fprintf(stderr, "RECOMPUTE frag %u 5'\n", fi);
- isBad[fi] = true;
- }
-
- if (erate3[fi] > erate3thresh) {
- fprintf(stderr, "RECOMPUTE frag %u 3'\n", fi);
- isBad[fi] = true;
+ if ((that5->fragId() == 0) ||
+ (that3->fragId() == 0)) {
+ writeLog("WARNING: read %u has overlap to spur - 3' to read %u back to %u - 5' to read %u back to %u\n",
+ fi,
+ this5->fragId(), that5->fragId(),
+ this3->fragId(), that3->fragId());
+#pragma omp critical (suspInsert)
+ _suspicious.insert(fi);
+ continue;
}
- }
-
- fprintf(stderr, "thresholds %f %f\n", erate5thresh, erate3thresh);
-
- double erateCthresh = MIN(erate5thresh, erate3thresh);
-
-#if 0
- // Remove best edges, so we can rebuild
-
- memset(_bestA, 0, sizeof(BestOverlaps) * (fiLimit + 1));
- memset(_scorA, 0, sizeof(BestScores) * (fiLimit + 1));
-
- // Rebuild best edges, ignoring edges to spurs. We build edges out of spurs, but don't allow edges into them.
- // This should prevent them from being incorporated into a promiscuous unitig, but still let them be popped
- // as bubbles (but they shouldn't because they're spurs).
-
- // PASS 3: Find containments.
- writeLog("BestOverlapGraph()-- analyzing %d fragments for best contains, with %d threads.\n", fiLimit, numThreads);
+ // Something doesn't agree. Find those overlaps...
-#pragma omp parallel for schedule(dynamic, blockSize)
- for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 no = 0;
- BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
+ int32 that5ovlLen = FI->overlapLength(this5->fragId(), that5->fragId(), that5->ahang(), that5->bhang());
+ int32 that3ovlLen = FI->overlapLength(this3->fragId(), that3->fragId(), that3->ahang(), that3->bhang());
- for (uint32 ii=0; ii<no; ii++)
- assert(ovl[ii].a_iid == fi);
+ // ...and compare.
- for (uint32 ii=0; ii<no; ii++)
- if (ovl[ii].error < erateCthresh)
- scoreContainment(ovl[ii]);
- }
-
- // PASS 4: Find dovetails.
-
- writeLog("BestOverlapGraph()-- analyzing %d fragments for best edges, with %d threads.\n", fiLimit, numThreads);
+ double percDiff5 = 200.0 * abs(this5ovlLen - that5ovlLen) / (this5ovlLen + that5ovlLen);
+ double percDiff3 = 200.0 * abs(this3ovlLen - that3ovlLen) / (this3ovlLen + that3ovlLen);
-#pragma omp parallel for schedule(dynamic, blockSize)
- for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 no = 0;
- BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
-
- if (isBad[ovl[fi].a_iid] == true)
- continue;
-
- for (uint32 ii=0; ii<no; ii++)
- assert(ovl[ii].a_iid == fi);
+ if ((percDiff5 <= 5) &&
+ (percDiff3 <= 5)) {
+#if 0
+ writeLog("fi %8u -- %8u/%c' len %6u VS %8u/%c' len %6u %8.4f%% -- %8u/%c' len %6u VS %8u/%c' len %6u %8.4f%% -- ACCEPTED\n",
+ fi,
+ this5->fragId(), this5->frag3p() ? '3' : '5', this5ovlLen, that5->fragId(), that5->frag3p() ? '3' : '5', that5ovlLen, percDiff5,
+ this3->fragId(), this3->frag3p() ? '3' : '5', this3ovlLen, that3->fragId(), that3->frag3p() ? '3' : '5', that3ovlLen, percDiff3);
+#endif
+ nAccepted++;
- for (uint32 ii=0; ii<no; ii++)
- if (isBad[ovl[ii].b_iid] == false)
- scoreEdge(ovl[ii]);
- }
+ } else {
+#if 0
+ writeLog("fi %8u -- %8u/%c' len %6u VS %8u/%c' len %6u %8.4f%% -- %8u/%c' len %6u VS %8u/%c' len %6u %8.4f%%\n",
+ fi,
+ this5->fragId(), this5->frag3p() ? '3' : '5', this5ovlLen, that5->fragId(), that5->frag3p() ? '3' : '5', that5ovlLen, percDiff5,
+ this3->fragId(), this3->frag3p() ? '3' : '5', this3ovlLen, that3->fragId(), that3->frag3p() ? '3' : '5', that3ovlLen, percDiff3);
#endif
+ nRejected++;
- delete [] histo5;
- delete [] histo3;
+#pragma omp critical (suspInsert)
+ _suspicious.insert(fi);
+ }
+ }
- delete [] erate5;
- delete [] erate3;
+ writeLog("BestOverlapGraph()-- suspicious %u contained %u spur %u mutual-best %u accepted %u rejected %u\n",
+ nSuspicious, nContained, nSpur, nMutual, nAccepted, nRejected);
}
-
-
-
-
-
-
-
void
-BestOverlapGraph::removeWeak(double threshold) {
+BestOverlapGraph::removeSpurs(void) {
uint32 fiLimit = FI->numFragments();
uint32 numThreads = omp_get_max_threads();
uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
- writeLog("BestOverlapGraph()-- detecting weak overlaps.\n");
-
- // For each read, mark an overlap as bad if it falls in the lower
- // X% of overlaps sorted by identity.
-
- uint32 *minEvalue5p = new uint32 [fiLimit + 1];
- uint32 *minEvalue3p = new uint32 [fiLimit + 1];
-
- memset(minEvalue5p, 0, sizeof(uint32) * (fiLimit + 1));
- memset(minEvalue3p, 0, sizeof(uint32) * (fiLimit + 1));
+ writeLog("BestOverlapGraph()-- detecting spur fragments.\n");
- uint32 evaluesMax = 1048576;
- uint32 evalues5len = 0;
- uint32 *evalues5 = new uint32 [evaluesMax];
- uint32 evalues3len = 0;
- uint32 *evalues3 = new uint32 [evaluesMax];
+ _spur.clear();
for (uint32 fi=1; fi <= fiLimit; fi++) {
- uint32 olapsLen = 0;
- BAToverlap *olaps = OC->getOverlaps(fi, AS_MAX_EVALUE, olapsLen);
-
- uint64 ovl5sum = 0;
- uint32 ovl5cnt = 0;
-
- uint64 ovl3sum = 0;
- uint32 ovl3cnt = 0;
-
- evalues5len = 0;
- evalues5[0] = 0;
-
- evalues3len = 0;
- evalues3[0] = 0;
-
- // Find the error rate histogram for each end.
-
- for (uint32 oo=0; oo<olapsLen; oo++) {
- assert(fi == olaps[oo].a_iid);
-
- if ((AS_BAT_overlapAEndIs5prime(olaps[oo])) && (evalues5len < evaluesMax))
- evalues5[evalues5len++] = olaps[oo].evalue;
+ bool spur5 = (getBestEdgeOverlap(fi, false)->fragId() == 0);
+ bool spur3 = (getBestEdgeOverlap(fi, true)->fragId() == 0);
- if ((AS_BAT_overlapAEndIs3prime(olaps[oo])) && (evalues3len < evaluesMax))
- evalues3[evalues3len++] = olaps[oo].evalue;
- }
+ if (isContained(fi))
+ // Contained, not a spur.
+ continue;
- // Sort by increasing error rate.
+ if ((spur5 == false) && (spur3 == false))
+ // Edges off of both ends. Not a spur.
+ continue;
- sort(evalues5, evalues5 + evalues5len);
- sort(evalues3, evalues3 + evalues3len);
+ if ((spur5 == true) && (spur3 == true))
+ // No edges off either end. Not a spur, just garbage.
+ continue;
- // Pick a min erate for each end.
+ // Exactly one end is missing a best edge. Bad!
- minEvalue5p[fi] = evalues5[(int32)(evalues5len - evalues5len * threshold)];
- minEvalue3p[fi] = evalues3[(int32)(evalues3len - evalues3len * threshold)];
+ writeLog("BestOverlapGraph()-- frag "F_U32" is a %s spur.\n", fi, (spur5) ? "5'" : "3'");
- if ((fi % 1000) == 0) {
- fprintf(stderr, "len %d %d t %f vals %d %f %d %f\n", evalues5len, evalues3len, threshold,
- minEvalue5p[fi], AS_OVS_decodeEvalue(minEvalue5p[fi]),
- minEvalue3p[fi], AS_OVS_decodeEvalue(minEvalue3p[fi]));
- }
+ _spur.insert(fi);
}
-
- delete [] evalues5;
- delete [] evalues3;
-
- // Throw this at the OverlapCache, so it can remove overlaps.
-
- OC->removeWeakOverlaps(minEvalue5p, minEvalue3p);
-
- delete [] minEvalue5p;
- delete [] minEvalue3p;
}
-BestOverlapGraph::BestOverlapGraph(double erate,
- const char *prefix,
- double doRemoveWeakThreshold,
- bool doRemoveSuspicious,
- bool doRemoveSpurs) {
-
- bool doExamineOnlyTopN = false;
- bool doRemoveFalseBest = false;
-
- setLogFile(prefix, "bestOverlapGraph");
-
- writeLog("BestOverlapGraph-- allocating best edges ("F_SIZE_T"MB) and containments ("F_SIZE_T"MB)\n",
- ((2 * sizeof(BestEdgeOverlap) * (FI->numFragments() + 1)) >> 20),
- ((1 * sizeof(BestContainment) * (FI->numFragments() + 1)) >> 20));
-
- _bestA = new BestOverlaps [FI->numFragments() + 1];
- _scorA = new BestScores [FI->numFragments() + 1];
-
- memset(_bestA, 0, sizeof(BestOverlaps) * (FI->numFragments() + 1));
- memset(_scorA, 0, sizeof(BestScores) * (FI->numFragments() + 1));
-
- _restrict = NULL;
- _restrictEnabled = false;
-
- _erate = erate;
-
- // Initialize parallelism.
-
+void
+BestOverlapGraph::findEdges(void) {
uint32 fiLimit = FI->numFragments();
uint32 numThreads = omp_get_max_threads();
uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
- // PASS 0: Find suspicious fragments. For any found, mark as suspicious and don't allow
- // these to be best overlaps.
-
- if (doRemoveWeakThreshold > 0.0)
- removeWeak(doRemoveWeakThreshold);
-
- if (doRemoveSuspicious)
- removeSuspicious();
-
- // PASS 1: Find containments.
+ memset(_bestA, 0, sizeof(BestOverlaps) * (fiLimit + 1));
+ memset(_scorA, 0, sizeof(BestScores) * (fiLimit + 1));
writeLog("BestOverlapGraph()-- analyzing %d fragments for best contains, with %d threads.\n", fiLimit, numThreads);
@@ -596,8 +365,6 @@ BestOverlapGraph::BestOverlapGraph(double erate,
scoreContainment(ovl[ii]);
}
- // PASS 2: Find dovetails.
-
writeLog("BestOverlapGraph()-- analyzing %d fragments for best edges, with %d threads.\n", fiLimit, numThreads);
#pragma omp parallel for schedule(dynamic, blockSize)
@@ -605,159 +372,124 @@ BestOverlapGraph::BestOverlapGraph(double erate,
uint32 no = 0;
BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
+ // Build edges out of spurs, but don't allow edges into them. This should prevent them from
+ // being incorporated into a promiscuous unitig, but still let them be popped as bubbles (but
+ // they shouldn't because they're spurs).
+
for (uint32 ii=0; ii<no; ii++)
- scoreEdge(ovl[ii]);
+ if (_spur.count(ovl[ii].b_iid) == 0)
+ scoreEdge(ovl[ii]);
}
+}
- // Now, several optional refinements.
-
- if (doExamineOnlyTopN)
- examineOnlyTopN();
-
- if (doRemoveSpurs)
- removeSpurs();
-
- if (doRemoveFalseBest)
- removeFalseBest();
- // Done with the scoring data.
- delete [] _scorA;
- _scorA = NULL;
-
- // Finally, remove dovetail overlaps for contained fragments.
+void
+BestOverlapGraph::removeContainedDovetails(void) {
+ uint32 fiLimit = FI->numFragments();
- writeLog("BestOverlapGraph()-- removing best edges for contained fragments, with %d threads.\n", numThreads);
+ writeLog("BestOverlapGraph()-- removing best edges for contained fragments.\n");
-#pragma omp parallel for schedule(dynamic, blockSize)
for (uint32 fi=1; fi <= fiLimit; fi++) {
if (isContained(fi) == true) {
- getBestEdgeOverlap(fi, false)->set(0, 0, 0, 0);
- getBestEdgeOverlap(fi, true) ->set(0, 0, 0, 0);
+ getBestEdgeOverlap(fi, false)->clear();
+ getBestEdgeOverlap(fi, true) ->clear();
}
}
+}
- writeLog("BestOverlapGraph()-- dumping best edges/contains/singletons.\n");
- reportBestEdges(prefix);
- setLogFile(prefix, NULL);
-}
+BestOverlapGraph::BestOverlapGraph(double erateGraph,
+ double deviationGraph,
+ const char *prefix) {
+ setLogFile(prefix, "bestOverlapGraph");
-void
-BestOverlapGraph::rebuildBestContainsWithoutSingletons(UnitigVector &unitigs,
- double erate,
- const char *prefix) {
+ writeLog("BestOverlapGraph-- allocating best edges ("F_SIZE_T"MB)\n",
+ ((2 * sizeof(BestEdgeOverlap) * (FI->numFragments() + 1)) >> 20));
- _erate = erate;
+ _bestA = new BestOverlaps [FI->numFragments() + 1]; // Cleared in findEdges()
+ _scorA = new BestScores [FI->numFragments() + 1];
- uint32 fiLimit = FI->numFragments();
+ _mean = erateGraph;
+ _stddev = 0.0;
- assert(_restrict == NULL);
- assert(_restrictEnabled == false);
+ _median = erateGraph;
+ _mad = 0.0;
- // Save the current best containments for a nice log, then clear
+ _suspicious.clear();
- assert(_bestA != NULL);
+ _bestM.clear();
+ _scorM.clear();
- BestContainment *bestCold = new BestContainment [fiLimit + 1];
+ _restrict = NULL;
+ _restrictEnabled = false;
- for (uint32 fi=0; fi<=fiLimit; fi++) {
- bestCold[fi] = _bestA[fi]._bestC;
+ _erateGraph = erateGraph;
+ _deviationGraph = deviationGraph;
- // Clearing this destroys unitigs??
+ // Mark reads as suspicious if they are not fully covered by overlaps.
- if (bestCold[fi].isContained == false) {
- assert(_bestA[fi]._bestC.container == 0);
- assert(_bestA[fi]._bestC.sameOrientation == false);
- assert(_bestA[fi]._bestC.a_hang == 0);
- assert(_bestA[fi]._bestC.b_hang == 0);
- }
+ removeSuspicious();
+ findEdges();
- _bestA[fi]._bestC.container = 0;
- _bestA[fi]._bestC.sameOrientation = false;
- _bestA[fi]._bestC.a_hang = 0;
- _bestA[fi]._bestC.b_hang = 0;
- }
+ if (logFileFlagSet(LOG_ALL_BEST_EDGES))
+ reportBestEdges(prefix, "best.0.initial");
- // Allocate space for new scores
+ // Analyze the current best edges to set a cutoff on overlap quality used for graph building.
- assert(_scorA == NULL);
+ removeHighErrorBestEdges();
+ findEdges();
- _scorA = new BestScores [fiLimit + 1];
+ if (logFileFlagSet(LOG_ALL_BEST_EDGES))
+ reportBestEdges(prefix, "best.1.filtered");
- memset(_scorA, 0, sizeof(BestScores) * (fiLimit + 1));
+ // Mark reads as suspicious if the length of the best edge out is very different than the length
+ // of the best edge that should be back to us. E.g., if readA has best edge to readB (of length
+ // lenAB), but readB has best edge to readC (of length lenBC), and lenAB is much shorter than
+ // lenBC, then something is wrong with readA.
+ //
+ // This must come before removeSpurs().
- // Rebuild contains ignoring singleton containers
+ removeLopsidedEdges();
+ findEdges();
- for (uint32 fi=1; fi<=fiLimit; fi++) {
- uint32 no = 0;
+ if (logFileFlagSet(LOG_ALL_BEST_EDGES))
+ reportBestEdges(prefix, "best.2.cleaned");
- if (bestCold[fi].isContained == false)
- continue;
+ // Mark reads as spurs, so we don't find best edges to them.
- BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_EVALUE, no);
+ removeSpurs();
+ findEdges();
- for (uint32 ii=0; ii<no; ii++) {
- uint32 autg = Unitig::fragIn(ovl[ii].a_iid);
- uint32 butg = Unitig::fragIn(ovl[ii].b_iid);
+ reportBestEdges(prefix, logFileFlagSet(LOG_ALL_BEST_EDGES) ? "best.3.final" : "best");
- assert(autg == 0); // Contained cannot be placed yet.
+ // One more pass, to find any ambiguous best edges.
- if ((butg != 0) &&
- (unitigs[butg]->ufpath.size() == 1))
- // Skip; container is a in a unitig, and that unitig is a singleton.
- continue;
+ // Cleanup the contained reads. Why?
- scoreContainment(ovl[ii]);
- }
- }
+ removeContainedDovetails();
+
+ // Done with scoring data.
delete [] _scorA;
_scorA = NULL;
- // Remove best edges for contains (shouldn't be any; we didn't make new ones since the last time we removed)
-
- for (uint32 fi=1; fi <= fiLimit; fi++) {
- if (isContained(fi) == true) {
- getBestEdgeOverlap(fi, false)->set(0, 0, 0, 0);
- getBestEdgeOverlap(fi, true) ->set(0, 0, 0, 0);
- }
- }
-
- // Log changes
-
- for (uint32 fi=0; fi<=fiLimit; fi++) {
- if ((bestCold[fi].container != _bestA[fi]._bestC.container) ||
- (bestCold[fi].sameOrientation != _bestA[fi]._bestC.sameOrientation) ||
- (bestCold[fi].a_hang != _bestA[fi]._bestC.a_hang) ||
- (bestCold[fi].b_hang != _bestA[fi]._bestC.b_hang))
- writeLog("frag %u changed container from %c %u/%c/%d/%d to %c %u/%c/%d/%d\n",
- fi,
- bestCold[fi].isContained ? 'T' : 'F',
- bestCold[fi].container,
- bestCold[fi].sameOrientation ? 'N' : 'A',
- bestCold[fi].a_hang,
- bestCold[fi].b_hang,
- _bestA[fi]._bestC.isContained ? 'T' : 'F',
- _bestA[fi]._bestC.container,
- _bestA[fi]._bestC.sameOrientation ? 'N' : 'A',
- _bestA[fi]._bestC.a_hang,
- _bestA[fi]._bestC.b_hang);
- }
+ _spur.clear();
- delete [] bestCold;
+ setLogFile(prefix, NULL);
}
+BestOverlapGraph::BestOverlapGraph(double erateGraph,
+ double deviationGraph,
+ set<uint32> *restrict) {
-BestOverlapGraph::BestOverlapGraph(double erate,
- set<uint32> *restrict) {
-
- _erate = erate;
+ _erateGraph = erateGraph;
+ _deviationGraph = deviationGraph;
_bestA = NULL;
_scorA = NULL;
@@ -813,8 +545,8 @@ BestOverlapGraph::BestOverlapGraph(double erate,
uint32 fi = *it;
if (isContained(fi) == true) {
- getBestEdgeOverlap(fi, false)->set(0, 0, 0, 0);
- getBestEdgeOverlap(fi, true) ->set(0, 0, 0, 0);
+ getBestEdgeOverlap(fi, false)->clear();
+ getBestEdgeOverlap(fi, true) ->clear();
}
}
@@ -826,8 +558,8 @@ BestOverlapGraph::BestOverlapGraph(double erate,
if ((getBestEdgeOverlap(fi, false)->fragId() == 0) ||
(getBestEdgeOverlap(fi, true)->fragId() == 0)) {
- getBestEdgeOverlap(fi, false)->set(0, 0, 0, 0);
- getBestEdgeOverlap(fi, true) ->set(0, 0, 0, 0);
+ getBestEdgeOverlap(fi, false)->clear();
+ getBestEdgeOverlap(fi, true) ->clear();
}
}
#endif
@@ -839,65 +571,189 @@ BestOverlapGraph::BestOverlapGraph(double erate,
+void
+BestOverlapGraph::reportBestEdges(const char *prefix, const char *label) {
+ char N[FILENAME_MAX];
+ FILE *BCH = NULL;
+ FILE *BE = NULL, *BEH = NULL, *BEG;
+ FILE *BS = NULL;
+ FILE *SS = NULL;
+
+ sprintf(N, "%s.%s.edges", prefix, label); BE = fopen(N, "w");
+ sprintf(N, "%s.%s.singletons", prefix, label); BS = fopen(N, "w");
+ sprintf(N, "%s.%s.edges.suspicious", prefix, label); SS = fopen(N, "w");
+ sprintf(N, "%s.%s.contains.histogram", prefix, label); BCH = fopen(N, "w");
+ sprintf(N, "%s.%s.edges.histogram", prefix, label); BEH = fopen(N, "w");
+ sprintf(N, "%s.%s.edges.gfa", prefix, label); BEG = fopen(N, "w");
+ if ((BE) && (BS)) {
+ fprintf(BE, "#fragId\tlibId\tbest5iid\tbest5end\tbest3iid\tbest3end\teRate5\teRate3\tbest5len\tbest3len\n");
+ fprintf(BS, "#fragId\tlibId\n");
+
+ for (uint32 id=1; id<FI->numFragments() + 1; id++) {
+ BestEdgeOverlap *bestedge5 = getBestEdgeOverlap(id, false);
+ BestEdgeOverlap *bestedge3 = getBestEdgeOverlap(id, true);
+ if ((bestedge5->fragId() == 0) && (bestedge3->fragId() == 0) && (isContained(id) == false)) {
+ fprintf(BS, "%u\t%u\n", id, FI->libraryIID(id));
+ }
+
+ else if (_suspicious.count(id) > 0) {
+ fprintf(SS, "%u\t%u\t%u\t%c'\t%u\t%c'\t%6.4f\t%6.4f\t%u\t%u%s\n", id, FI->libraryIID(id),
+ bestedge5->fragId(), bestedge5->frag3p() ? '3' : '5',
+ bestedge3->fragId(), bestedge3->frag3p() ? '3' : '5',
+ AS_OVS_decodeEvalue(bestedge5->evalue()),
+ AS_OVS_decodeEvalue(bestedge3->evalue()),
+ (bestedge5->fragId() == 0 ? 0 : FI->overlapLength(id, bestedge5->fragId(), bestedge5->ahang(), bestedge5->bhang())),
+ (bestedge3->fragId() == 0 ? 0 : FI->overlapLength(id, bestedge3->fragId(), bestedge3->ahang(), bestedge3->bhang())),
+ isContained(id) ? "\tcontained" : "");
+ }
+
+ else {
+ fprintf(BE, "%u\t%u\t%u\t%c'\t%u\t%c'\t%6.4f\t%6.4f\t%u\t%u%s\n", id, FI->libraryIID(id),
+ bestedge5->fragId(), bestedge5->frag3p() ? '3' : '5',
+ bestedge3->fragId(), bestedge3->frag3p() ? '3' : '5',
+ AS_OVS_decodeEvalue(bestedge5->evalue()),
+ AS_OVS_decodeEvalue(bestedge3->evalue()),
+ (bestedge5->fragId() == 0 ? 0 : FI->overlapLength(id, bestedge5->fragId(), bestedge5->ahang(), bestedge5->bhang())),
+ (bestedge3->fragId() == 0 ? 0 : FI->overlapLength(id, bestedge3->fragId(), bestedge3->ahang(), bestedge3->bhang())),
+ isContained(id) ? "\tcontained" : "");
+ }
+ }
+ fclose(BE);
+ fclose(BS);
+ fclose(SS);
+ }
+ if (BEG) {
+ fprintf(BEG, "H\tVN:Z:bogart/edges\n");
+ // First, write the sequences used.
+ for (uint32 id=1; id<FI->numFragments() + 1; id++) {
+ BestEdgeOverlap *bestedge5 = getBestEdgeOverlap(id, false);
+ BestEdgeOverlap *bestedge3 = getBestEdgeOverlap(id, true);
+ if ((bestedge5->fragId() == 0) && (bestedge3->fragId() == 0) && (isContained(id) == false)) {
+ // Do nothing, a singleton.
+ }
+ else if (isContained(id) == true) {
+ // Do nothing, a contained read.
+ }
-void
-BestOverlapGraph::reportBestEdges(const char *prefix) {
- char N[FILENAME_MAX];
+ else if (_suspicious.count(id) > 0) {
+ // Do nothing, a suspicious read.
+ }
- sprintf(N, "%s.best.contains", prefix); FILE *BC = fopen(N, "w");
- sprintf(N, "%s.best.edges", prefix); FILE *BE = fopen(N, "w");
- sprintf(N, "%s.best.singletons", prefix); FILE *BS = fopen(N, "w");
+ else {
+ // Report the read, it has best edges - including contained reads.
+ fprintf(BEG, "S\tread%08u\t*\tLN:i:%u\n", id, FI->fragmentLength(id));
+ }
+ }
- if ((BC) && (BE) && (BS)) {
- fprintf(BC, "#fragId\tlibId\tbestCont\teRate\n");
- fprintf(BE, "#fragId\tlibId\tbest5iid\tbest5end\tbest3iid\tbest3end\teRate5\teRate3\n");
- fprintf(BS, "#fragId\tlibId\n");
+ // Now, report edges. GFA wants edges in exactly this format:
+ //
+ // -------------
+ // -------------
+ //
+ // with read orientation given by +/-. Conveniently, this is what we've saved (for the edges).
for (uint32 id=1; id<FI->numFragments() + 1; id++) {
- BestContainment *bestcont = getBestContainer(id);
BestEdgeOverlap *bestedge5 = getBestEdgeOverlap(id, false);
BestEdgeOverlap *bestedge3 = getBestEdgeOverlap(id, true);
- if (bestcont->isContained) {
- double erate = OC->findErate(id, bestcont->container);
-
- fprintf(BC, "%u\t%u\t%u\t%6.4f\n", id, FI->libraryIID(id), bestcont->container, erate);
+ if ((bestedge5->fragId() == 0) && (bestedge3->fragId() == 0) && (isContained(id) == false)) {
+ // Do nothing, a singleton.
}
- else if ((bestedge5->fragId() > 0) || (bestedge3->fragId() > 0)) {
- double erate5 = OC->findErate(id, bestedge5->fragId());
- double erate3 = OC->findErate(id, bestedge3->fragId());
+ else if (isContained(id) == true) {
+ // Do nothing, a contained read.
+ }
- fprintf(BE, "%u\t%u\t%u\t%c'\t%u\t%c'\t%6.4f\t%6.4f\n", id, FI->libraryIID(id),
- bestedge5->fragId(), bestedge5->frag3p() ? '3' : '5',
- bestedge3->fragId(), bestedge3->frag3p() ? '3' : '5',
- erate5, erate3);
+ else if (_suspicious.count(id) > 0) {
+ // Do nothing, a suspicious read.
}
else {
- fprintf(BS, "%u\t%u\n", id, FI->libraryIID(id));
+ if (bestedge5->fragId() != 0) {
+ int32 ahang = bestedge5->ahang();
+ int32 bhang = bestedge5->bhang();
+ int32 olaplen = FI->overlapLength(id, bestedge5->fragId(), bestedge5->ahang(), bestedge5->bhang());
+
+ assert((ahang <= 0) && (bhang <= 0)); // ALL 5' edges should be this.
+
+ fprintf(BEG, "L\tread%08u\t-\tread%08u\t%c\t%uM\n",
+ id,
+ bestedge5->fragId(), bestedge5->frag3p() ? '-' : '+',
+ olaplen);
+ }
+
+ if (bestedge3->fragId() != 0) {
+ int32 ahang = bestedge3->ahang();
+ int32 bhang = bestedge3->bhang();
+ int32 olaplen = FI->overlapLength(id, bestedge3->fragId(), bestedge3->ahang(), bestedge3->bhang());
+
+ assert((ahang >= 0) && (bhang >= 0)); // ALL 3' edges should be this.
+
+ fprintf(BEG, "L\tread%08u\t+\tread%08u\t%c\t%uM\n",
+ id,
+ bestedge3->fragId(), bestedge3->frag3p() ? '-' : '+',
+ FI->overlapLength(id, bestedge3->fragId(), bestedge3->ahang(), bestedge3->bhang()));
+ }
}
}
- fclose(BC);
- fclose(BE);
- fclose(BS);
+ fclose(BEG);
}
-}
+ if ((BCH) && (BEH)) {
+ double *bc = new double [FI->numFragments() + 1 + FI->numFragments() + 1];
+ double *be = new double [FI->numFragments() + 1 + FI->numFragments() + 1];
+ uint32 bcl = 0;
+ uint32 bel = 0;
+
+ for (uint32 id=1; id<FI->numFragments() + 1; id++) {
+ BestEdgeOverlap *bestedge5 = getBestEdgeOverlap(id, false);
+ BestEdgeOverlap *bestedge3 = getBestEdgeOverlap(id, true);
+
+ if (isContained(id)) {
+ //bc[bcl++] = bestcont->erate();
+#warning what is the error rate of the 'best contained' overlap?
+ bc[bcl++] = bestedge5->erate();
+ bc[bcl++] = bestedge3->erate();
+ }
+ else {
+ if (bestedge5->fragId() > 0)
+ be[bel++] = bestedge5->erate();
+
+ if (bestedge3->fragId() > 0)
+ be[bel++] = bestedge3->erate();
+ }
+ }
+
+ sort(bc, bc+bcl);
+ sort(be, be+bel);
+
+ for (uint32 ii=0; ii<bcl; ii++)
+ fprintf(BCH, "%f\n", bc[ii]);
+
+ for (uint32 ii=0; ii<bel; ii++)
+ fprintf(BEH, "%f\n", be[ii]);
+
+ fclose(BCH);
+ fclose(BEH);
+
+ delete [] bc;
+ delete [] be;
+ }
+}
@@ -923,41 +779,7 @@ BestOverlapGraph::scoreContainment(const BAToverlap& olap) {
// We only save if A is the contained fragment.
return;
- uint64 newScr = scoreOverlap(olap);
-
- assert(newScr > 0);
-
- // The previous version (1.5) saved if A contained B. This was breaking the overlap filtering,
- // because long A fragments containing short B fragments would have those containment overlaps
- // filtered out. Version 1.6 reversed what is saved here so that the containment overlap is
- // associated with the A fragment (the containee).
- //
- // The hangs will transform the container coordinates into the containee cordinates.
-
- if (newScr > bestCscore(olap.a_iid)) {
- BestContainment *c = getBestContainer(olap.a_iid);
-
-#if 0
- writeLog("set best for %d from "F_U32" score="F_U64" to "F_U32" score="F_U64"\n",
- olap.a_iid,
- c->container, bestCscore(olap.a_iid),
- olap.b_iid, newScr);
-#endif
-
- c->container = olap.b_iid;
- c->isContained = true;
- c->sameOrientation = olap.flipped ? false : true;
- c->a_hang = olap.flipped ? olap.b_hang : -olap.a_hang;
- c->b_hang = olap.flipped ? olap.a_hang : -olap.b_hang;
-
- bestCscore(olap.a_iid) = newScr;
-#if 0
- } else {
- writeLog("NOT best for %d WITH "F_U32" score="F_U64"\n",
- olap.a_iid,
- olap.b_iid, newScr);
-#endif
- }
+ setContained(olap.a_iid);
}
@@ -971,7 +793,7 @@ BestOverlapGraph::scoreEdge(const BAToverlap& olap) {
if (isOverlapBadQuality(olap)) {
// Yuck. Don't want to use this crud.
- if ((enableLog == true) && ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY)))
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
writeLog("scoreEdge()-- OVERLAP BADQ: %d %d %c hangs "F_S32" "F_S32" err %.3f -- bad quality\n",
olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
return;
@@ -979,15 +801,15 @@ BestOverlapGraph::scoreEdge(const BAToverlap& olap) {
if (isOverlapRestricted(olap)) {
// Whoops, don't want this overlap for this BOG
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
- writeLog("scoreEdge()-- OVERLAP REST: %d %d %c hangs "F_S32" "F_S32" err %.3f -- restricted\n",
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
+ writeLog("scoreEdge()-- OVERLAP RESTRICT: %d %d %c hangs "F_S32" "F_S32" err %.3f -- restricted\n",
olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
return;
}
if (isSuspicious(olap.b_iid)) {
// Whoops, don't want this overlap for this BOG
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
writeLog("scoreEdge()-- OVERLAP SUSP: %d %d %c hangs "F_S32" "F_S32" err %.3f -- suspicious\n",
olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
return;
@@ -996,7 +818,7 @@ BestOverlapGraph::scoreEdge(const BAToverlap& olap) {
if (((olap.a_hang >= 0) && (olap.b_hang <= 0)) ||
((olap.a_hang <= 0) && (olap.b_hang >= 0))) {
// Skip containment overlaps.
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
writeLog("scoreEdge()-- OVERLAP CONT: %d %d %c hangs "F_S32" "F_S32" err %.3f -- container read\n",
olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
return;
@@ -1004,28 +826,32 @@ BestOverlapGraph::scoreEdge(const BAToverlap& olap) {
if (isContained(olap.b_iid) == true) {
// Skip overlaps to contained reads (allow scoring of best edges from contained reads).
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
writeLog("scoreEdge()-- OVERLAP CONT: %d %d %c hangs "F_S32" "F_S32" err %.3f -- contained read\n",
olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
return;
}
uint64 newScr = scoreOverlap(olap);
- bool a3p = AS_BAT_overlapAEndIs3prime(olap);
+ bool a3p = olap.AEndIs3prime();
BestEdgeOverlap *best = getBestEdgeOverlap(olap.a_iid, a3p);
uint64 &score = (a3p) ? (best3score(olap.a_iid)) : (best5score(olap.a_iid));
assert(newScr > 0);
- if (newScr <= score)
+ if (newScr <= score) {
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
+ writeLog("scoreEdge()-- OVERLAP GOOD: %d %d %c hangs "F_S32" "F_S32" err %.3f -- no better than best\n",
+ olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
return;
+ }
best->set(olap);
score = newScr;
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
- writeLog("OVERLAP GOOD: %d %d %c hangs "F_S32" "F_S32" err %.3f -- NOW BEST\n",
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
+ writeLog("scoreEdge()-- OVERLAP BEST: %d %d %c hangs "F_S32" "F_S32" err %.3f -- NOW BEST\n",
olap.a_iid, olap.b_iid, olap.flipped ? 'A' : 'N', olap.a_hang, olap.b_hang, olap.erate);
}
@@ -1045,11 +871,16 @@ BestOverlapGraph::isOverlapBadQuality(const BAToverlap& olap) {
// assembly, but sometimes us users want to delete fragments after overlaps are generated.
return(true);
- // The overlap is GOOD (false == not bad) if the corrected error rate is below the requested
- // erate.
+ // The overlap is GOOD (false == not bad) if the error rate is below the allowed erate.
+ // Initially, this is just the erate passed in. After the first rount of finding edges,
+ // it is reset to the mean and stddev of selected best edges.
//
- if (olap.erate <= _erate) {
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
+
+ double Tstddev = _mean + _deviationGraph * _stddev;
+ double Tmad = _median + _deviationGraph * 1.4826 * _mad;
+
+ if (olap.erate <= Tmad || (Tmad == 0 && olap.erate <= Tstddev)) {
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
writeLog("isOverlapBadQuality()-- OVERLAP GOOD: %d %d %c hangs "F_S32" "F_S32" err %.3f\n",
olap.a_iid, olap.b_iid,
olap.flipped ? 'A' : 'N',
@@ -1064,7 +895,7 @@ BestOverlapGraph::isOverlapBadQuality(const BAToverlap& olap) {
// against another limit. This was to allow very short overlaps where one error would push the
// error rate above a few percent. canu doesn't do short overlaps.
- if ((enableLog == true) && (logFileFlags & LOG_OVERLAP_QUALITY))
+ if ((enableLog == true) && (logFileFlagSet(LOG_OVERLAP_SCORING)))
writeLog("isOverlapBadQuality()-- OVERLAP REJECTED: %d %d %c hangs "F_S32" "F_S32" err %.3f\n",
olap.a_iid, olap.b_iid,
olap.flipped ? 'A' : 'N',
@@ -1076,6 +907,7 @@ BestOverlapGraph::isOverlapBadQuality(const BAToverlap& olap) {
}
+
// If no restrictions are known, this overlap is useful if both fragments are not in a unitig
// already. Otherwise, we are restricted to just a specific set of fragments (usually a single
// unitig and all the mated reads). The overlap is useful if both fragments are in the set.
@@ -1096,27 +928,20 @@ BestOverlapGraph::isOverlapRestricted(const BAToverlap &olap) {
}
+
uint64
BestOverlapGraph::scoreOverlap(const BAToverlap& olap) {
-
- // BPW's newer new score. For the most part, we use the length of the overlap, but we also
- // want to break ties with the higher quality overlap:
- // The high bits are the length of the overlap.
- // The next are the corrected error rate.
- // The last are the original error rate.
- //
uint64 leng = 0;
- uint64 corr = AS_MAX_EVALUE - olap.evalue;
- uint64 orig = AS_MAX_EVALUE - 0;
+ uint64 rate = AS_MAX_EVALUE - olap.evalue;
- // Shift AFTER assigning to a 64-bit value to avoid overflows.
- corr <<= AS_MAX_EVALUE_BITS;
+ assert(olap.evalue <= AS_MAX_EVALUE);
+ assert(rate <= AS_MAX_EVALUE);
// Containments - the length of the overlaps are all the same. We return the quality.
//
if (((olap.a_hang >= 0) && (olap.b_hang <= 0)) ||
((olap.a_hang <= 0) && (olap.b_hang >= 0)))
- return(corr | orig);
+ return(rate);
// Dovetails - the length of the overlap is the score, but we bias towards lower error.
// (again, shift AFTER assigning to avoid overflows)
@@ -1127,7 +952,7 @@ BestOverlapGraph::scoreOverlap(const BAToverlap& olap) {
// takes into account both reads, or as the number of aligned bases on the A read.
#if 0
- leng = FI->overlapLength(olap.a_iid, olap.b_iid, olap.a_hang, olap.b_hang);
+ leng = FI->overlapLength(olap.a_iid, olap.b_iid, olap.a_hang, olap.b_hang);
#endif
if (olap.a_hang > 0)
@@ -1144,7 +969,7 @@ BestOverlapGraph::scoreOverlap(const BAToverlap& olap) {
// And finally shift it to the correct place in the word.
- leng <<= (2 * AS_MAX_EVALUE_BITS);
+ leng <<= AS_MAX_EVALUE_BITS;
- return(leng | corr | orig);
+ return(leng | rate);
}
diff --git a/src/bogart/AS_BAT_BestOverlapGraph.H b/src/bogart/AS_BAT_BestOverlapGraph.H
index ac53ca7..408e1d8 100644
--- a/src/bogart/AS_BAT_BestOverlapGraph.H
+++ b/src/bogart/AS_BAT_BestOverlapGraph.H
@@ -31,6 +31,10 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -38,14 +42,125 @@
#ifndef INCLUDE_AS_BAT_BESTOVERLAPGRAPH
#define INCLUDE_AS_BAT_BESTOVERLAPGRAPH
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_global.H"
+#include "AS_BAT_OverlapCache.H"
+
+class FragmentEnd {
+public:
+ FragmentEnd() {
+ _id = 0;
+ _e3p = false;
+ };
+ FragmentEnd(uint32 id, bool e3p) {
+ _id = id;
+ _e3p = e3p;
+ };
+
+ uint32 fragId(void) const { return(_id); };
+ bool frag3p(void) const { return(_e3p == true); };
+ bool frag5p(void) const { return(_e3p == false); };
+
+ bool operator==(FragmentEnd const that) const {
+ return((fragId() == that.fragId()) && (frag3p() == that.frag3p()));
+ };
+
+ bool operator!=(FragmentEnd const that) const {
+ return((fragId() != that.fragId()) || (frag3p() != that.frag3p()));
+ };
+
+ bool operator<(FragmentEnd const that) const {
+ if (fragId() != that.fragId())
+ return fragId() < that.fragId();
+ else
+ return frag3p() < that.frag3p();
+ };
+
+private:
+ uint32 _id:31;
+ uint32 _e3p:1;
+};
+
+
+
+// Stores an overlap from an 'a' read (implied by the index into the array of best edges) to a 'b'
+// read. The hangs are relative to the 'a' read - just as a normal overlap would be.
+//
+class BestEdgeOverlap {
+public:
+ BestEdgeOverlap() {
+ clear();
+ };
+ BestEdgeOverlap(BAToverlap const &ovl) {
+ set(ovl);
+ };
+ ~BestEdgeOverlap() {
+ };
+
+ void clear(void) {
+ _id = 0;
+ _e3p = 0;
+ _ahang = 0;
+ _bhang = 0;
+ _evalue = 0;
+ };
+
+ void set(BAToverlap const &olap) {
+ _id = olap.b_iid;
+ if (((olap.a_hang <= 0) && (olap.b_hang >= 0)) || // If contained, _e3p just means
+ ((olap.a_hang >= 0) && (olap.b_hang <= 0))) // the other read is flipped
+ _e3p = olap.flipped;
+ else
+ _e3p = olap.BEndIs3prime(); // Otherwise, means olap is to the 3' end
+
+ _ahang = olap.a_hang;
+ _bhang = olap.b_hang;
+ _evalue = olap.evalue;
+ };
+
+ void set(uint32 id, bool e3p, int32 ahang, int32 bhang, uint32 evalue) {
+ _id = id;
+ _e3p = e3p;
+ _ahang = ahang;
+ _bhang = bhang;
+ _evalue = evalue;
+ };
+
+
+ uint32 fragId(void) const { return(_id); };
+ bool frag3p(void) const { return(_e3p == true); };
+ bool frag5p(void) const { return(_e3p == false); };
+
+ int32 ahang(void) const { return(_ahang); };
+ int32 bhang(void) const { return(_bhang); };
+
+ uint32 evalue(void) const { return(_evalue); };
+ double erate(void) const { return(AS_OVS_decodeEvalue(_evalue)); };
+
+private:
+ uint32 _id;
+ uint64 _e3p : 1; // Overlap with the 3' end of that fragment, or flipped contain
+ int64 _ahang : AS_MAX_READLEN_BITS+1;
+ int64 _bhang : AS_MAX_READLEN_BITS+1;
+ uint64 _evalue : AS_MAX_EVALUE_BITS;
+};
+
+#if (1 + AS_MAX_READLEN_BITS + 1 + AS_MAX_READLEN_BITS + 1 + AS_MAX_EVALUE_BITS > 64)
+#error not enough bits to store overlaps. decrease AS_MAX_EVALUE_BITS or AS_MAX_READLEN_BITS.
+#endif
+
+
+
+
+
+
+
+
class BestOverlaps {
public:
BestEdgeOverlap _best5;
BestEdgeOverlap _best3;
- BestContainment _bestC;
+ uint32 _isC;
};
@@ -54,12 +169,12 @@ public:
BestScores() {
_best5score = 0;
_best3score = 0;
- _bestCscore = 0;
+ _isC = 0;
};
uint64 _best5score;
uint64 _best3score;
- uint64 _bestCscore;
+ uint32 _isC;
};
@@ -67,19 +182,22 @@ public:
class BestOverlapGraph {
private:
void removeSuspicious(void);
- void examineOnlyTopN(void);
void removeSpurs(void);
- void removeFalseBest(void);
- void removeWeak(double threshold);
+ void removeLopsidedEdges(void);
+
+ void findEdges(void);
+
+ void removeHighErrorBestEdges(void);
+
+ void removeContainedDovetails(void);
public:
- BestOverlapGraph(double erate,
- const char *prefix,
- double doRemoveWeakThreshold,
- bool doRemoveSuspicious,
- bool doRemoveSpurs);
+ BestOverlapGraph(double erateGraph,
+ double deviationGraph,
+ const char *prefix);
- BestOverlapGraph(double erate,
+ BestOverlapGraph(double erateGraph,
+ double deviationGraph,
set<uint32> *restrict);
~BestOverlapGraph() {
@@ -106,32 +224,28 @@ public:
return(FragmentEnd(edge->fragId(), !edge->frag3p()));
};
+ void setContained(const uint32 fragid) {
+ if (_bestA)
+ _bestA[fragid]._isC = true;
+ else
+ _bestM[fragid]._isC = true;
+ };
+
bool isContained(const uint32 fragid) {
if (_bestA)
- return(_bestA[fragid]._bestC.isContained);
- return(_bestM[fragid]._bestC.isContained);
+ return(_bestA[fragid]._isC);
+ return(_bestM[fragid]._isC);
};
bool isSuspicious(const uint32 fragid) {
return(_suspicious.count(fragid) > 0);
};
- // Given a containee, returns pointer to BestContainment record
- BestContainment *getBestContainer(const uint32 fragid) {
- if (_bestA)
- return(&_bestA[fragid]._bestC);
- return(&_bestM[fragid]._bestC);
- };
-
- void reportBestEdges(const char *prefix);
+ void reportBestEdges(const char *prefix, const char *label);
public:
- void rebuildBestContainsWithoutSingletons(UnitigVector &unitigs,
- double erate,
- const char *prefix);
-
+ bool isOverlapBadQuality(const BAToverlap& olap); // Used in repeat detection
private:
- bool isOverlapBadQuality(const BAToverlap& olap);
bool isOverlapRestricted(const BAToverlap &olap);
uint64 scoreOverlap(const BAToverlap& olap);
@@ -140,12 +254,6 @@ private:
void scoreEdge(const BAToverlap& olap);
private:
- uint64 &bestCscore(uint32 id) {
- if (_restrictEnabled == false)
- return(_scorA[id]._bestCscore);
- return(_scorM[id]._bestCscore);
- };
-
uint64 &best5score(uint32 id) {
if (_restrictEnabled == false)
return(_scorA[id]._best5score);
@@ -162,7 +270,14 @@ private:
BestOverlaps *_bestA;
BestScores *_scorA;
+ double _mean;
+ double _stddev;
+
+ double _median;
+ double _mad;
+
set<uint32> _suspicious;
+ set<uint32> _spur;
map<uint32, BestOverlaps> _bestM;
map<uint32, BestScores> _scorM;
@@ -171,9 +286,12 @@ private:
bool _restrictEnabled;
public:
- double _erate;
+ double _erateGraph;
+ double _deviationGraph;
}; //BestOverlapGraph
-#endif //INCLUDE_AS_BAT_BESTOVERLAPGRAPH
+extern BestOverlapGraph *OG;
+
+#endif // INCLUDE_AS_BAT_BESTOVERLAPGRAPH
diff --git a/src/bogart/AS_BAT_Breaking.C b/src/bogart/AS_BAT_Breaking.C
deleted file mode 100644
index b7fce7a..0000000
--- a/src/bogart/AS_BAT_Breaking.C
+++ /dev/null
@@ -1,332 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_Breaking.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
- * are Copyright 2010-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Breaking.H"
-
-#include "AS_BAT_BestOverlapGraph.H"
-
-#define LOG_ADDUNITIG_BREAKING 1
-#define LOG_ADDFRAG_BREAKING 0
-
-// The four cases we must handle:
-//
-// -------------------------A
-// --
-// ---------------------------
-// ------
-// B----------------------------
-// ---
-// ------
-//
-// When at A:
-// keepContains == true -- Remember lastUnitig, and the coordinate of A.
-// Any fragment that ends before A is placed in lastUnitig.
-// When a fragment begins after A, we can forget lastUnitig.
-//
-// keepContains == false -- Remember lastUnitig. and the coordinate of A.
-// Search forward until the first fragment that starts after A,
-// search for any gaps in the layout. Move all fragments before
-// the last gap to lastUnitig. Gap is defined as being less
-// than an overlap of overlap.
-//
-// When at B:
-// keepContains == true -- Do nothing special.
-//
-// keepContains == false -- Same as "A: keepContains == true".
-//
-// UGLIES:
-//
-// 1) When A and B are on the same fragment, both breakPoints must have set keepContains
-// consistently. This could arise if the fragment is a chimera. In this case, we definitely
-// do not want to keep contains. So, instead of asserting (we'll probably do that, just to
-// see if this occurs) we'll set keepContains only if ALL breakpoints request it.
-//
-// 2) Rapid fire breakpoints. When breakpoints closely follow each other -- specificially when
-// the fragments they break on are overlapping -- the meaning of keepContains becomes horribly
-// confused. The algorithm will keep lastTig updated to the last break point seen. The
-// picture above (without trying) shows this case. The tiny fragment second from the end is
-// contained in both A and B. It will end up with B if that is keepContains=true, regardless
-// of what A said.
-
-
-static
-bool
-processBreakpoints(vector<breakPoint> &breaks,
- map<uint32, breakPoint> &breakMap) {
- bool ejectContains = false;
-
- for (uint32 bp=0; bp<breaks.size(); bp++) {
- uint32 fid = breaks[bp].fragEnd.fragId();
-
- //writeLog("found break %u/%c' eject=%d keep=%d\n",
- // breaks[bp].fragEnd.fragId(), (breaks[bp].fragEnd.frag3p() ? '3' : '5'),
- // breaks[bp].ejectContains, breaks[bp].keepContains);
-
- if (breakMap.find(fid) == breakMap.end()) {
- breakMap[fid] = breaks[bp];
-
- ejectContains |= breaks[bp].ejectContains;
- breakMap[fid].break3p = (breaks[bp].fragEnd.frag3p() == true);
- breakMap[fid].break5p = (breaks[bp].fragEnd.frag3p() == false);
-
- } else {
- ejectContains |= breaks[bp].ejectContains;
- breakMap[fid].break3p |= (breaks[bp].fragEnd.frag3p() == true);
- breakMap[fid].break5p |= (breaks[bp].fragEnd.frag3p() == false);
- breakMap[fid].keepContains &= breaks[bp].keepContains;
- }
- }
-
- return(ejectContains);
-}
-
-
-static
-bool
-isBreakpoint(ufNode &frg,
- map<uint32, breakPoint> &breakMap,
- bool &break5p, bool &break3p,
- bool &rememberLastTig,
- bool &searchDiscontinuous) {
-
- if (breakMap.find(frg.ident) == breakMap.end())
- return(false);
-
- breakPoint &bp = breakMap[frg.ident];
-
- assert(bp.fragEnd.fragId() == frg.ident);
-
- //writeLog("BREAK %u %d/%d\n",
- // bp.fragEnd.fragId(), bp.break3p, bp.break5p);
-
- break3p = bp.break3p;
- break5p = bp.break5p;
- rememberLastTig = false;
- searchDiscontinuous = false;
-
- bool frgReversed = (frg.position.end < frg.position.bgn);
- bool isFarEnd = false;
-
- if (((break3p == true) && (frgReversed == false)) ||
- ((break5p == true) && (frgReversed == true)))
- isFarEnd = true;
-
- // Remember the lastTig if we are case A (isFarEnd == true) and keepContains is true,
- // of if we are case B (isFarEnd == false) and keepContains is false.
- if (isFarEnd == bp.keepContains)
- rememberLastTig = true;
-
- // Do the painful search for disconnects if we are case A and keepContains is false.
- if ((isFarEnd == true) && (bp.keepContains == false))
- searchDiscontinuous = true;
-
- return(true);
-}
-
-
-
-
-
-
-
-bool
-breakUnitigAt(UnitigVector &unitigs,
- Unitig *tig,
- vector<breakPoint> &breaks,
- bool doDelete) {
- uint32 newTigs = 0;
-
- if (breaks.empty())
- return(false);
-
- // we cannot predict the number of new unitigs created from the number of break points. to
- // prevent infinite splitting loops, we need to count the number of new unitigs we create here,
- // and return 'no work done' if only one new unitig is created (aka, if we just copied all
- // fragments from the old unitig to the new unitig)
-
- Unitig *lastTig = NULL; // For saving contained frags in the last unitig constructed.
- int32 lastOffset = 0;
- int32 lastLength = 0;
-
- Unitig *saveTig = NULL; // The last unitig constructed.
- int32 saveOffset = 0;
-
- Unitig *currTig = NULL; // The current unitig being constructed. Not guaranteed to exist.
- int32 currOffset = 0;
-
- uint32 tigsCreated = 0;
-
- // Examine the break points. Merge multiple break points on a single fragment together. Put the
- // break points into a map -- without this, we'd need to make sure the break points are sorted in
- // the same order as the fragments in the unitig.
-
- map<uint32, breakPoint> breakMap;
-
- bool ejectContains = processBreakpoints(breaks, breakMap);
-
- for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
- ufNode frg = tig->ufpath[fi];
- bool frgReversed = (frg.position.end < frg.position.bgn);
-
- bool break5p = false;
- bool break3p = false;
- bool rememberLastTig = false;
- bool searchDiscontinuous = false;
-
- if (ejectContains && OG->isContained(frg.ident)) {
- Unitig::removeFrag(frg.ident);
- continue;
- }
-
- // Current fragment is the one we want to break on. Figure out which end(s) to break on -- we
- // might (stupidly) have requested to break on both ends -- and if we should be remembering
- // lastTig.
-
- if (isBreakpoint(frg,
- breakMap,
- break5p, break3p,
- rememberLastTig,
- searchDiscontinuous)) {
- //writeLog("NEW BREAK at frag %u 5p=%d 3p=%d remember=%d search=%d\n",
- // frg.ident, break5p, break3p, rememberLastTig, searchDiscontinuous);
-
- lastTig = NULL;
- lastOffset = 0;
- lastLength = 0;
-
- if ((rememberLastTig) && (saveTig != NULL)) {
- lastTig = saveTig;
- lastOffset = saveOffset;
- lastLength = saveOffset + saveTig->getLength();
- }
-
- // Should we clear saveTig? This is always set to the last unitig created. It is
- // possible to get two breakpoints with no unitig creation (break at the high end of one
- // fragment, and the low end of the next fragment) in which case we still want to have
- // the last unitig...and not NULL...saved. So, no, don't clear.
- //
- //saveTig = NULL;
- //saveOffset = 0;
-
- if (searchDiscontinuous) {
- }
- }
-
-
- // If both break5p and break3p, this fragment is ejected to a singleton. This is an easy case,
- // and we'll get it out of the way.
-
- if ((break5p == true) &&
- (break3p == true)) {
- newTigs++;
- saveTig = currTig = unitigs.newUnitig(false); // LOG_ADDUNITIG_BREAKING);
- saveOffset = currOffset = (frgReversed) ? -frg.position.end : -frg.position.bgn;
-
- currTig->addFrag(frg, currOffset, LOG_ADDFRAG_BREAKING);
-
- currTig = NULL;
-
- continue;
- }
-
- // If neither break and we're saving contains, add to the last unitig.
-
- if ((break5p == false) &&
- (break3p == false) &&
- (lastTig) &&
- (frg.position.bgn < lastLength) &&
- (frg.position.end < lastLength)) {
- lastTig->addFrag(frg, lastOffset, LOG_ADDFRAG_BREAKING);
- continue;
- }
-
- // If neither break, just add the fragment to the existing unitig.
-
- if ((break5p == false) &&
- (break3p == false)) {
- if (currTig == NULL) {
- newTigs++;
- saveTig = currTig = unitigs.newUnitig(false); // LOG_ADDUNITIG_BREAKING);
- saveOffset = currOffset = (frgReversed) ? -frg.position.end : -frg.position.bgn;
- }
-
- currTig->addFrag(frg, currOffset, LOG_ADDFRAG_BREAKING);
- continue;
- }
-
- // Breaking at the left end of the fragment. This fragment starts a new unitig.
-
- if ((break5p && (frgReversed == false)) ||
- (break3p && (frgReversed == true))) {
- newTigs++;
- saveTig = currTig = unitigs.newUnitig(false); // LOG_ADDUNITIG_BREAKING);
- saveOffset = currOffset = (frgReversed) ? -frg.position.end : -frg.position.bgn;
-
- currTig->addFrag(frg, currOffset, LOG_ADDFRAG_BREAKING);
-
- continue;
- }
-
- // Breaking at the right end of the fragment. This fragment ends the existing unitig (which
- // might not even exist).
-
- if ((break5p && (frgReversed == true)) ||
- (break3p && (frgReversed == false))) {
- if (currTig == NULL) {
- newTigs++;
- saveTig = currTig = unitigs.newUnitig(false); // LOG_ADDUNITIG_BREAKING);
- saveOffset = currOffset = (frgReversed) ? -frg.position.end : -frg.position.bgn;
- }
-
- currTig->addFrag(frg, currOffset, LOG_ADDFRAG_BREAKING);
-
- currTig = NULL;
-
- continue;
- }
- }
-
- if (newTigs == 0)
- return(false);
-
- if (doDelete) {
- unitigs[tig->id()] = NULL;
- delete tig;
- }
-
- return(true);
-}
diff --git a/src/bogart/AS_BAT_Breaking.H b/src/bogart/AS_BAT_Breaking.H
deleted file mode 100644
index f056dc2..0000000
--- a/src/bogart/AS_BAT_Breaking.H
+++ /dev/null
@@ -1,112 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_Breaking.H
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-DEC-06 to 2013-AUG-01
- * are Copyright 2010-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#ifndef INCLUDE_AS_BAT_BREAKING
-#define INCLUDE_AS_BAT_BREAKING
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
-
-// What to do with contained fragments at the split point?
-//
-// ejectContains -- eject all contains from the unitig, client is expected to
-// place them again.
-//
-// keepContains -- contains that are associated with the fragEnd are moved to
-// the new unitig. There are five cases:
-//
-// A #-----------------*
-// 1 ------ - contained in A
-// 2 ------- - contained in A, dovetail to B
-// B -----------------------
-// 3 ------ - contained in A, contained in B
-// 4 ------- - dovetail to A, contained in B
-// 5 ------- - contained in B
-//
-// If the break point is at #:
-// keepContains == true -- 1, 2, 3 will move with A.
-// keepContains == false -- 1, 2 will move with A.
-//
-// If the break point is at *:
-// keepContains == true -- 1, 2, 3 will move with A.
-// keepContains == false -- 1, 2 will move with A.
-//
-
-class breakPoint {
-public:
- breakPoint() {
- fragEnd = FragmentEnd();
- ejectContains = false;
- keepContains = false;
- break5p = false;
- break3p = false;
- rememberLastTig = false;
- searchDiscontinuous = false;
- };
-
- breakPoint(uint32 isectFrg, bool isect3p, bool eject, bool keep) {
- fragEnd = FragmentEnd(isectFrg, isect3p);
- ejectContains = eject;
- keepContains = keep;
- break5p = false;
- break3p = false;
- rememberLastTig = false;
- searchDiscontinuous = false;
- };
-
- bool operator<(breakPoint const that) const {
- return(fragEnd < that.fragEnd);
- };
-
-public:
- FragmentEnd fragEnd;
-
- bool ejectContains;
- bool keepContains;
-
- bool break5p;
- bool break3p;
- bool rememberLastTig;
- bool searchDiscontinuous;
-};
-
-
-
-bool
-breakUnitigAt(UnitigVector &unitigs, Unitig *tig, vector<breakPoint> &breaks, bool doDelete);
-
-#endif // INCLUDE_AS_BAT_BREAKING
diff --git a/src/bogart/AS_BAT_ChunkGraph.C b/src/bogart/AS_BAT_ChunkGraph.C
index 742cf35..66c67ac 100644
--- a/src/bogart/AS_BAT_ChunkGraph.C
+++ b/src/bogart/AS_BAT_ChunkGraph.C
@@ -35,9 +35,11 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_ChunkGraph.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_ChunkGraph.H"
+
+#include "AS_BAT_Logging.H"
ChunkGraph::ChunkGraph(const char *output_prefix) {
@@ -55,17 +57,23 @@ ChunkGraph::ChunkGraph(const char *output_prefix) {
memset(_chunkLength, 0, sizeof(ChunkLength) * (_maxFragment));
for (uint32 fid=1; fid <= _maxFragment; fid++) {
- if (OG->isContained(fid))
+ if (OG->isContained(fid)) {
+ if (logFileFlagSet(LOG_CHUNK_GRAPH))
+ writeLog("read %u contained\n", fid);
continue;
+ }
- if (OG->isSuspicious(fid))
- // Fragment is suspicious. We won't seed a BOG from it, and populateUnitig will make only a
- // singleton.
+ if (OG->isSuspicious(fid)) {
+ if (logFileFlagSet(LOG_CHUNK_GRAPH))
+ writeLog("read %u suspicious\n", fid);
continue;
+ }
+
+ uint32 l5 = countFullWidth(FragmentEnd(fid, false));
+ uint32 l3 = countFullWidth(FragmentEnd(fid, true));
_chunkLength[fid-1].fragId = fid;
- _chunkLength[fid-1].cnt = (countFullWidth(FragmentEnd(fid, false)) +
- countFullWidth(FragmentEnd(fid, true)));
+ _chunkLength[fid-1].cnt = l5 + l3;
}
delete [] _pathLen;
@@ -200,13 +208,15 @@ ChunkGraph::countFullWidth(FragmentEnd firstEnd) {
currIdx = getIndex(currEnd);
}
+
+
if (logFileFlagSet(LOG_CHUNK_GRAPH)) {
seen.clear();
currEnd = firstEnd;
currIdx = firstIdx;
- writeLog("PATH from %d,%d length %d:",
+ writeLog("path from %d,%d length %d:",
firstEnd.fragId(),
(firstEnd.frag3p()) ? 3 : 5,
_pathLen[firstIdx]);
diff --git a/src/bogart/AS_BAT_ChunkGraph.H b/src/bogart/AS_BAT_ChunkGraph.H
index 30aab29..f2e7a52 100644
--- a/src/bogart/AS_BAT_ChunkGraph.H
+++ b/src/bogart/AS_BAT_ChunkGraph.H
@@ -38,7 +38,13 @@
#ifndef INCLUDE_AS_BAT_CHUNKGRAPH
#define INCLUDE_AS_BAT_CHUNKGRAPH
-#include "AS_BAT_Datatypes.H"
+#include "AS_global.H"
+
+#include <set>
+#include <map>
+
+using namespace std;
+
class BestOverlapGraph;
@@ -86,4 +92,8 @@ private:
set<uint32> *_restrict;
};
-#endif
+
+
+extern ChunkGraph *CG;
+
+#endif // INCLUDE_AS_BAT_CHUNKGRAPH
diff --git a/src/bogart/AS_BAT_Datatypes.H b/src/bogart/AS_BAT_Datatypes.H
deleted file mode 100644
index 4d62145..0000000
--- a/src/bogart/AS_BAT_Datatypes.H
+++ /dev/null
@@ -1,339 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_Datatypes.H
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2014-JAN-28
- * are Copyright 2010-2014 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-AUG-11 to 2015-JUN-16
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#ifndef INCLUDE_AS_BAT_DATATYPES
-#define INCLUDE_AS_BAT_DATATYPES
-
-#include "AS_global.H"
-#include "ovStore.H"
-#include "gkStore.H"
-
-#include <map>
-#include <set>
-#include <list>
-#include <vector>
-#include <algorithm>
-
-#ifndef BROKEN_CLANG_OpenMP
-#include <omp.h>
-#endif
-
-using namespace std;
-
-#include "AS_BAT_Logging.H"
-#include "AS_BAT_OverlapCache.H"
-
-const uint32 noUnitig = 0xffffffff;
-
-////////////////////////////////////////
-
-class FragmentInfo;
-class OverlapCache;
-class BestOverlapGraph;
-class ChunkGraph;
-
-extern FragmentInfo *FI;
-extern OverlapCache *OC;
-extern BestOverlapGraph *OG;
-extern ChunkGraph *CG;
-
-////////////////////////////////////////
-
-////////////////////////////////////////
-
-// These MUST be signed integers. Read placement -- Unitig::placeFrag() for sure -- depends on
-// being able to set coordinates to negative values to indicate they extend the unitig off the
-// beginning.
-//
-struct SeqInterval {
- int32 bgn;
- int32 end;
-};
-
-static const SeqInterval NULL_SEQ_LOC = {0,0};
-
-inline
-bool
-isReverse(SeqInterval pos) {
- return(pos.bgn > pos.end);
-}
-
-inline
-bool
-operator==(SeqInterval a, SeqInterval b) {
- return(((a.bgn == b.bgn) && (a.end == b.end)) ||
- ((a.bgn == b.end) && (a.end == b.bgn)));
-}
-
-inline
-bool
-operator!=(SeqInterval a, SeqInterval b) {
- return(((a.bgn != b.bgn) || (a.end != b.end)) &&
- ((a.bgn != b.end) || (a.end != b.bgn)));
-}
-
-inline
-bool
-operator<(SeqInterval a, SeqInterval b) {
- if (isReverse(a)) {
- if (isReverse(b)) return a.end < b.end;
- else return a.end < b.bgn;
- } else {
- if (isReverse(b)) return a.bgn < b.end;
- else return a.bgn < b.bgn;
- }
-}
-
-////////////////////////////////////////
-
-class FragmentEnd {
-public:
- FragmentEnd() {
- _id = 0;
- _e3p = false;
- };
- FragmentEnd(uint32 id, bool e3p) {
- _id = id;
- _e3p = e3p;
- };
-
- uint32 fragId(void) const { return(_id); };
- bool frag3p(void) const { return(_e3p == true); };
- bool frag5p(void) const { return(_e3p == false); };
-
- bool operator==(FragmentEnd const that) const {
- return((fragId() == that.fragId()) && (frag3p() == that.frag3p()));
- };
-
- bool operator!=(FragmentEnd const that) const {
- return((fragId() != that.fragId()) || (frag3p() != that.frag3p()));
- };
-
- bool operator<(FragmentEnd const that) const {
- if (fragId() != that.fragId())
- return fragId() < that.fragId();
- else
- return frag3p() < that.frag3p();
- };
-
-private:
- uint32 _id:31;
- uint32 _e3p:1;
-};
-
-
-// Swiped from AS_OVS_overlap.h, modified to take a BAToverlap instead of an OVSoverlap.
-
-static
-uint32
-AS_BAT_overlapAEndIs5prime(const BAToverlap& olap) {
- return((olap.a_hang < 0) && (olap.b_hang < 0));
-}
-
-static
-uint32
-AS_BAT_overlapAEndIs3prime(const BAToverlap& olap) {
- return((olap.a_hang > 0) && (olap.b_hang > 0));
-}
-
-static
-uint32
-AS_BAT_overlapBEndIs3prime(const BAToverlap& olap) {
- return((AS_BAT_overlapAEndIs5prime(olap) && (olap.flipped == false)) ||
- (AS_BAT_overlapAEndIs3prime(olap) && (olap.flipped == true)));
-}
-
-
-class BestEdgeOverlap {
-public:
- BestEdgeOverlap() {
- clear();
- };
- ~BestEdgeOverlap() {
- };
-
- void clear(void) {
- _id = 0;
- _e3p = 0;
- _ahang = 0;
- _bhang = 0;
- };
-
- void set(BAToverlap const &olap) {
- _id = olap.b_iid;
- _e3p = AS_BAT_overlapBEndIs3prime(olap);
- _ahang = olap.a_hang;
- _bhang = olap.b_hang;
- };
-
- void set(uint32 id, bool e3p, int32 ahang, int32 bhang) {
- _id = id;
- _e3p = e3p;
- _ahang = ahang;
- _bhang = bhang;
- };
-
-
- uint32 fragId(void) const { return(_id); };
- bool frag3p(void) const { return(_e3p == true); };
- bool frag5p(void) const { return(_e3p == false); };
-
- int32 ahang(void) const { return(_ahang); };
- int32 bhang(void) const { return(_bhang); };
-
-private:
- uint32 _id;
- uint64 _e3p : 1; // Overlap with the 3' end of that fragment
- int64 _ahang : AS_MAX_READLEN_BITS+1;
- int64 _bhang : AS_MAX_READLEN_BITS+1;
-};
-
-
-// Contains what kind of containment relationship exists between fragment a and fragment b
-//
-class BestContainment{
-public:
- BestContainment() {
- clear();
- };
- ~BestContainment() {
- };
-
- void clear(void) {
- container = 0;
- isContained = false;
- sameOrientation = false;
- a_hang = 0;
- b_hang = 0;
- };
-
- uint32 container;
- uint64 isContained : 1;
- uint64 sameOrientation : 1;
- int64 a_hang : AS_MAX_READLEN_BITS+1;
- int64 b_hang : AS_MAX_READLEN_BITS+1;
-};
-
-
-
-
-class FragmentInfo {
-public:
- FragmentInfo(gkStore *gkp, const char *prefix, uint32 minReadLen);
- ~FragmentInfo();
-
- uint64 memoryUsage(void) {
- return((3 * sizeof(uint32) * _numFragments) +
- (2 * sizeof(double) * _numLibraries) +
- (2 * sizeof(uint32) * _numLibraries));
- };
-
- uint32 numFragments(void) { return(_numFragments); };
- uint32 numLibraries(void) { return(_numLibraries); };
-
- uint32 fragmentLength(uint32 iid) { return(_fragLength[iid]); };
- uint32 libraryIID(uint32 iid) { return(_libIID[iid]); };
-
- uint32 overlapLength(uint32 a_iid, uint32 b_iid, int32 a_hang, int32 b_hang) {
- int32 alen = fragmentLength(a_iid);
- int32 blen = fragmentLength(b_iid);
- int32 aovl = 0;
- int32 bovl = 0;
-
- assert(alen > 0);
- assert(blen > 0);
-
- if (a_hang < 0) {
- // b_hang < 0 ? ---------- : ----
- // ? ---------- : ----------
- //
- aovl = (b_hang < 0) ? (alen + b_hang) : (alen);
- bovl = (b_hang < 0) ? (blen + a_hang) : (blen + a_hang - b_hang);
- } else {
- // b_hang < 0 ? ---------- : ----------
- // ? ---- : ----------
- //
- aovl = (b_hang < 0) ? (alen - a_hang + b_hang) : (alen - a_hang);
- bovl = (b_hang < 0) ? (blen) : (blen - b_hang);
- }
-
- if ((aovl <= 0) || (bovl <= 0) || (aovl > alen) || (bovl > blen)) {
- fprintf(stderr, "WARNING: bogus overlap found for A="F_U32" B="F_U32"\n", a_iid, b_iid);
- fprintf(stderr, "WARNING: A len="F_S32" hang="F_S32" ovl="F_S32"\n", alen, a_hang, aovl);
- fprintf(stderr, "WARNING: B len="F_S32" hang="F_S32" ovl="F_S32"\n", blen, b_hang, bovl);
- }
-
- if (aovl < 0) aovl = 0;
- if (bovl < 0) bovl = 0;
-
- if (aovl > alen) aovl = alen;
- if (bovl > blen) bovl = blen;
-
- assert(aovl > 0);
- assert(bovl > 0);
- assert(aovl <= alen);
- assert(bovl <= blen);
-
- // AVE does not work. return((uint32)((aovl, bovl)/2));
- // MAX does not work. return((uint32)MAX(aovl, bovl));
-
- return(aovl);
- };
-
- // For DIAGNOSTIC ONLY. Mark this fragment as 'deleted' so checkUnitigMembership() will pass
- // when we ignore fragments. The resuting assembly will fail in CGW.
- void markAsIgnore(uint32 iid) {
- _fragLength[iid] = 0;
- };
-
-private:
- void save(const char *prefix);
- bool load(const char *prefix);
-
- uint32 _numFragments;
- uint32 _numLibraries;
-
- uint32 *_fragLength;
- uint32 *_libIID;
-
- uint32 *_numFragsInLib;
-};
-
-#endif
-
-
-
diff --git a/src/bogart/AS_BAT_FragmentInfo.C b/src/bogart/AS_BAT_FragmentInfo.C
index 6ad76d5..58f4347 100644
--- a/src/bogart/AS_BAT_FragmentInfo.C
+++ b/src/bogart/AS_BAT_FragmentInfo.C
@@ -35,7 +35,8 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_Logging.H"
const uint64 fiMagicNumber = 0x6f666e4967617266llu; // 'fragInfo' until it gets messed up by endianess.
const uint64 fiVersionNumber = 2;
@@ -60,16 +61,11 @@ FragmentInfo::FragmentInfo(gkStore *gkp,
_fragLength = new uint32 [_numFragments + 1];
_libIID = new uint32 [_numFragments + 1];
- _numFragsInLib = new uint32 [_numLibraries + 1];
-
for (uint32 i=0; i<_numFragments + 1; i++) {
_fragLength[i] = 0;
_libIID[i] = 0;
}
- for (uint32 i=1; i<_numLibraries + 1; i++)
- _numFragsInLib[i] = 0;
-
uint32 numSkipped = 0;
uint32 numLoaded = 0;
@@ -86,8 +82,6 @@ FragmentInfo::FragmentInfo(gkStore *gkp,
_fragLength[iid] = read->gkRead_sequenceLength();
_libIID[iid] = lib;
- _numFragsInLib[lib]++;
-
numLoaded++;
}
@@ -107,8 +101,6 @@ FragmentInfo::FragmentInfo(gkStore *gkp,
FragmentInfo::~FragmentInfo() {
delete [] _fragLength;
delete [] _libIID;
-
- delete [] _numFragsInLib;
}
@@ -137,8 +129,6 @@ FragmentInfo::save(const char *prefix) {
AS_UTL_safeWrite(file, _fragLength, "fragmentInformationFragLen", sizeof(uint32), _numFragments + 1);
AS_UTL_safeWrite(file, _libIID, "fragmentInformationLibIID", sizeof(uint32), _numFragments + 1);
- AS_UTL_safeWrite(file, _numFragsInLib, "fragmentInformationNumFrgsInLib", sizeof(uint32), _numLibraries + 1);
-
fclose(file);
}
@@ -180,13 +170,9 @@ FragmentInfo::load(const char *prefix) {
_fragLength = new uint32 [_numFragments + 1];
_libIID = new uint32 [_numFragments + 1];
- _numFragsInLib = new uint32 [_numLibraries + 1];
-
AS_UTL_safeRead(file, _fragLength, "fragmentInformationFragLen", sizeof(uint32), _numFragments + 1);
AS_UTL_safeRead(file, _libIID, "fragmentInformationLibIID", sizeof(uint32), _numFragments + 1);
- AS_UTL_safeRead(file, _numFragsInLib, "fragmentInformationNumFrgsInLib", sizeof(uint32), _numLibraries + 1);
-
fclose(file);
return(true);
diff --git a/src/bogart/AS_BAT_FragmentInfo.H b/src/bogart/AS_BAT_FragmentInfo.H
new file mode 100644
index 0000000..cba2d08
--- /dev/null
+++ b/src/bogart/AS_BAT_FragmentInfo.H
@@ -0,0 +1,123 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * This file is derived from:
+ *
+ * src/bogart/AS_BAT_Datatypes.H
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-APR-28
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#ifndef INCLUDE_AS_BAT_FRAGMENT_INFO
+#define INCLUDE_AS_BAT_FRAGMENT_INFO
+
+#include "AS_global.H"
+#include "ovStore.H"
+#include "gkStore.H"
+
+#include <map>
+#include <set>
+#include <list>
+#include <vector>
+#include <algorithm>
+
+
+
+class FragmentInfo {
+public:
+ FragmentInfo(gkStore *gkp, const char *prefix, uint32 minReadLen);
+ ~FragmentInfo();
+
+ uint64 memoryUsage(void) {
+ return((3 * sizeof(uint32) * _numFragments) +
+ (2 * sizeof(double) * _numLibraries) +
+ (2 * sizeof(uint32) * _numLibraries));
+ };
+
+ uint32 numFragments(void) { return(_numFragments); };
+ uint32 numLibraries(void) { return(_numLibraries); };
+
+ uint32 fragmentLength(uint32 iid) { return(_fragLength[iid]); };
+ uint32 libraryIID(uint32 iid) { return(_libIID[iid]); };
+
+ uint32 overlapLength(uint32 a_iid, uint32 b_iid, int32 a_hang, int32 b_hang) {
+ int32 alen = fragmentLength(a_iid);
+ int32 blen = fragmentLength(b_iid);
+ int32 aovl = 0;
+ int32 bovl = 0;
+
+ assert(alen > 0);
+ assert(blen > 0);
+
+ if (a_hang < 0) {
+ // b_hang < 0 ? ---------- : ----
+ // ? ---------- : ----------
+ //
+ aovl = (b_hang < 0) ? (alen + b_hang) : (alen);
+ bovl = (b_hang < 0) ? (blen + a_hang) : (blen + a_hang - b_hang);
+ } else {
+ // b_hang < 0 ? ---------- : ----------
+ // ? ---- : ----------
+ //
+ aovl = (b_hang < 0) ? (alen - a_hang + b_hang) : (alen - a_hang);
+ bovl = (b_hang < 0) ? (blen) : (blen - b_hang);
+ }
+
+ if ((aovl <= 0) || (bovl <= 0) || (aovl > alen) || (bovl > blen)) {
+ fprintf(stderr, "WARNING: bogus overlap found for A="F_U32" B="F_U32"\n", a_iid, b_iid);
+ fprintf(stderr, "WARNING: A len="F_S32" hang="F_S32" ovl="F_S32"\n", alen, a_hang, aovl);
+ fprintf(stderr, "WARNING: B len="F_S32" hang="F_S32" ovl="F_S32"\n", blen, b_hang, bovl);
+ }
+
+ if (aovl < 0) aovl = 0;
+ if (bovl < 0) bovl = 0;
+
+ if (aovl > alen) aovl = alen;
+ if (bovl > blen) bovl = blen;
+
+ assert(aovl > 0);
+ assert(bovl > 0);
+ assert(aovl <= alen);
+ assert(bovl <= blen);
+
+ // AVE does not work. return((uint32)((aovl, bovl)/2));
+ // MAX does not work. return((uint32)MAX(aovl, bovl));
+
+ return(aovl);
+ };
+
+private:
+ void save(const char *prefix);
+ bool load(const char *prefix);
+
+ uint32 _numFragments;
+ uint32 _numLibraries;
+
+ uint32 *_fragLength;
+ uint32 *_libIID;
+};
+
+
+
+extern FragmentInfo *FI;
+
+#endif // INCLUDE_AS_BAT_FRAGMENT_INFO
diff --git a/src/bogart/AS_BAT_Instrumentation.C b/src/bogart/AS_BAT_Instrumentation.C
index 32b563d..41775ed 100644
--- a/src/bogart/AS_BAT_Instrumentation.C
+++ b/src/bogart/AS_BAT_Instrumentation.C
@@ -35,19 +35,23 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
#include "AS_BAT_SetParentAndHang.H"
#include "AS_BAT_Outputs.H"
#include "intervalList.H"
// Will fail if a read is in unitig 0, or if a read isn't in a unitig.
-// Used to also compute some simple size statistics.
void
checkUnitigMembership(UnitigVector &unitigs) {
uint32 *inUnitig = new uint32 [FI->numFragments()+1];
+ uint32 noUnitig = 0xffffffff;
// All reads start of not placed in a unitig.
@@ -66,7 +70,16 @@ checkUnitigMembership(UnitigVector &unitigs) {
for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
ufNode *frg = &tig->ufpath[fi];
- assert(frg->ident <= FI->numFragments());
+ if (frg->ident > FI->numFragments())
+ fprintf(stderr, "tig %u ufpath[%d] ident %u more than number of reads %u\n",
+ tig->id(), fi, frg->ident, FI->numFragments());
+
+ if (inUnitig[frg->ident] != noUnitig)
+ fprintf(stderr, "tig %u ufpath[%d] ident %u placed multiple times\n",
+ tig->id(), fi, frg->ident);
+
+ assert(frg->ident <= FI->numFragments()); // Can't be out of range.
+ assert(inUnitig[frg->ident] == noUnitig); // Read must be not placed yet.
inUnitig[frg->ident] = ti;
}
@@ -86,80 +99,6 @@ checkUnitigMembership(UnitigVector &unitigs) {
}
-// For every unitig, report the best overlaps contained in the
-// unitig, and all overlaps contained in the unitig.
-void
-reportOverlapsUsed(UnitigVector &unitigs, const char *prefix, const char *name) {
-
- if (logFileFlagSet(LOG_OVERLAPS_USED) == 0)
- return;
-
- char ovlPath[FILENAME_MAX];
- sprintf(ovlPath, "%s.%03u.%s.overlaps", prefix, logFileOrder, name);
-
- FILE *F = fopen(ovlPath, "w");
-
- if (F == NULL)
- return;
-
- for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *utg = unitigs[ti];
-
- if (utg == NULL)
- continue;
-
- for (uint32 fi=0; fi<utg->ufpath.size(); fi++) {
- ufNode *frg = &utg->ufpath[fi];
-
- // Where is our best overlap? Contained or dovetail?
-
- BestEdgeOverlap *bestedge5 = OG->getBestEdgeOverlap(frg->ident, false);
- BestEdgeOverlap *bestedge3 = OG->getBestEdgeOverlap(frg->ident, true);
-
- uint32 bestident5 = 0;
- uint32 bestident3 = 0;
-
- if (bestedge5)
- bestident5 = bestedge5->fragId();
-
- if (bestedge3)
- bestident3 = bestedge3->fragId();
-
- // Now search ahead, reporting any overlap to any fragment.
- //
- for (uint32 oi=fi+1; oi<utg->ufpath.size(); oi++) {
- ufNode *ooo = &utg->ufpath[oi];
-
- int frgbgn = MIN(frg->position.bgn, frg->position.end);
- int frgend = MAX(frg->position.bgn, frg->position.end);
-
- int ooobgn = MIN(ooo->position.bgn, ooo->position.end);
- int oooend = MAX(ooo->position.bgn, ooo->position.end);
-
- if ((frgbgn <= ooobgn) && (ooobgn + 40 < frgend)) {
- BestContainment *bestcont = OG->getBestContainer(ooo->ident);
-
- uint32 bestident = 0;
- if (bestcont->isContained)
- bestident = bestcont->container;
-
- bool isBest = ((frg->ident == bestident) ||
- (ooo->ident == bestident5) ||
- (ooo->ident == bestident3));
-
- fprintf(F, "%d\t%d%s\n", frg->ident, ooo->ident, (isBest) ? ((bestident) ? "\tbc" : "\tbe") : "");
- }
-
- if (frgend < ooobgn)
- break;
- }
- }
- }
-
- fclose(F);
-}
-
-
// Decides if a unitig is unassembled. The other classifications (isBubble, isCircular, isRepeat)
// are made when the type is processed (e.g., when bubbles are popped).
//
@@ -316,10 +255,11 @@ reportN50(vector<uint32> &data, char const *label, uint64 genomeSize) {
sum += data[i];
while (siz * nnn / 100 < sum) {
- writeLog("ng%03"F_U64P" %6"F_U64P" lg%03"F_U64P" %6"F_U64P" sum %10"F_U64P"\n",
+ writeLog("ng%03"F_U64P" %9"F_U64P" lg%03"F_U64P" %8"F_U64P" sum %11"F_U64P" (%s)\n",
nnn, data[i],
nnn, i+1,
- sum);
+ sum,
+ label);
nnn += 10;
}
@@ -372,16 +312,22 @@ reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name, uint6
reportN50(circularLength, "CIRCULAR", genomeSize);
reportN50(contigLength, "CONTIGS", genomeSize);
- // Dump to an intermediate store.
-
if (logFileFlagSet(LOG_INTERMEDIATE_UNITIGS) == 0)
return;
+ // Dump to an intermediate store.
+
+ char tigStorePath[FILENAME_MAX];
+ sprintf(tigStorePath, "%s.%03u.%s.tigStore", prefix, logFileOrder, name);
+
+ fprintf(stderr, "Creating intermediate tigStore '%s'\n", tigStorePath);
+
uint32 numFragsT = 0;
uint32 numFragsP = 0;
uint64 utgLen = 0;
// Compute average frags per partition.
+
for (uint32 ti=0; ti<unitigs.size(); ti++) {
Unitig *utg = unitigs[ti];
@@ -403,11 +349,391 @@ reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name, uint6
// Dump the unitigs to an intermediate store.
- char tigStorePath[FILENAME_MAX];
- sprintf(tigStorePath, "%s.%03u.%s.tigStore", prefix, logFileOrder, name);
-
setParentAndHang(unitigs);
writeUnitigsToStore(unitigs, tigStorePath, tigStorePath, numFragsP, false);
}
+
+
+
+#define tCTG 0 // To a read in a normal tig
+#define tRPT 1 // To a read in a repeat tig
+#define tBUB 2 // To a read in a bubble tig
+#define tUNA 3 // To a read in an 'unassembled' leftover tig
+#define tUNU 4 // To a read not placed in a tig
+#define tNOP 5 // To no read (for best edges)
+
+struct olapsUsed {
+
+ uint64 total;
+ // By definition, satisfied overlaps are in the same tig.
+
+ uint64 doveSatSame[6];
+ uint64 contSatSame[6];
+
+ // Unsatisfied overlaps can be in the same tig...
+ uint64 doveUnsatSame[6];
+ uint64 contUnsatSame[6];
+
+ // ...or can be between tigs.
+
+ uint64 doveUnsatDiff[6][6];
+ uint64 contUnsatDiff[6][6];
+};
+
+
+
+uint32
+getTigType(Unitig *tg) {
+ if (tg == NULL) return(tUNU);
+ if (tg->_isUnassembled) return(tUNA);
+ if (tg->_isBubble) return(tBUB);
+ if (tg->_isRepeat) return(tRPT);
+ if (1) return(tCTG);
+}
+
+
+bool
+satisfiedOverlap(uint32 rdAlo, uint32 rdAhi, bool rdAfwd, uint32 rdBlo, uint32 rdBhi, bool rdBfwd, bool flipped) {
+ return(((rdAhi < rdBlo) || (rdBhi < rdBlo)) || // Not satisfied, no overlap
+ ((rdAfwd == rdBfwd) && (flipped == true)) || // Not satisfied, same orient, but flipped overlap
+ ((rdAfwd != rdBfwd) && (flipped == false))); // Not satisfied, diff orient, but normal overlap
+}
+
+
+// Iterate over all overlaps (but the only interface we have is by iterating
+// over all reads), and count the number of overlaps satisfied in unitigs.
+void
+reportOverlaps(UnitigVector &unitigs, const char *prefix, const char *name) {
+ olapsUsed *dd = new olapsUsed; // Dovetail overlaps to non-contained reads
+ olapsUsed *dc = new olapsUsed; // Dovetail overlaps to contained reads
+ olapsUsed *cc = new olapsUsed; // Containment overlaps
+ olapsUsed *bb = new olapsUsed; // Best overlaps
+
+ memset(dd, 0, sizeof(olapsUsed));
+ memset(dc, 0, sizeof(olapsUsed));
+ memset(cc, 0, sizeof(olapsUsed));
+ memset(bb, 0, sizeof(olapsUsed));
+
+
+ for (uint32 fi=0; fi<FI->numFragments()+1; fi++) {
+ if (FI->fragmentLength(fi) == 0)
+ continue;
+
+ uint32 rdAid = fi;
+ uint32 tgAid = Unitig::fragIn(rdAid);
+ Unitig *tgA = unitigs[tgAid];
+ uint32 tgAtype = getTigType(tgA);
+
+ // Best overlaps exist if the read isn't contained.
+
+ if (OG->isContained(rdAid) == false) {
+ BestEdgeOverlap *b5 = OG->getBestEdgeOverlap(fi, false);
+ uint32 rd5id = b5->fragId();
+ uint32 tg5id = Unitig::fragIn(rd5id);
+ Unitig *tg5 = unitigs[tg5id];
+ uint32 tg5type = getTigType(tg5);
+
+ BestEdgeOverlap *b3 = OG->getBestEdgeOverlap(fi, true);
+ uint32 rd3id = b3->fragId();
+ uint32 tg3id = Unitig::fragIn(rd3id);
+ Unitig *tg3 = unitigs[tg3id];
+ uint32 tg3type = getTigType(tg3);
+
+ bb->total += 2;
+
+ // If this read isn't even in a tig, add to the unused categories.
+
+ if (tgAid == 0) {
+ if (rd5id == 0)
+ bb->doveUnsatDiff[tUNU][tNOP]++;
+ else
+ bb->doveUnsatDiff[tUNU][tg5type]++;
+
+ if (rd3id == 0)
+ bb->doveUnsatDiff[tUNU][tNOP]++;
+ else
+ bb->doveUnsatDiff[tUNU][tg3type]++;
+ }
+
+ // Otherwise, its in a tig, and we need to compare positions.
+
+ else {
+ uint32 rdApos = unitigs[tgAid]->pathPosition(rdAid);
+ ufNode *rdA = &unitigs[tgAid]->ufpath[rdApos];
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ // Different tigs? Unsatisfied. Same tig? Grab the reads and check for overlap.
+
+ if (tgA != tg5) {
+ bb->doveUnsatDiff[tgAtype][tg5type]++;
+
+ } else if (rd5id == 0) {
+ bb->doveUnsatDiff[tgAtype][tNOP]++;
+
+ } else {
+ uint32 rd5pos = unitigs[tg5id]->pathPosition(rd5id);
+ ufNode *rd5 = &unitigs[tg5id]->ufpath[rd5pos];
+ bool rd5fwd = (rd5->position.bgn < rd5->position.end);
+ int32 rd5lo = (rd5fwd) ? rd5->position.bgn : rd5->position.end;
+ int32 rd5hi = (rd5fwd) ? rd5->position.end : rd5->position.bgn;
+
+ if (satisfiedOverlap(rdAlo, rdAhi, rdAfwd, rd5lo, rd5hi, rd5fwd, (b5->frag3p() == true))) {
+ bb->doveSatSame[tgAtype]++;
+ } else {
+ bb->doveUnsatSame[tgAtype]++;
+ }
+ }
+
+
+ if (tgA != tg3) {
+ bb->doveUnsatDiff[tgAtype][tg3type]++;
+
+ } else if (rd3id == 0) {
+ bb->doveUnsatDiff[tgAtype][tNOP]++;
+
+ } else {
+ uint32 rd3pos = unitigs[tg3id]->pathPosition(rd3id);
+ ufNode *rd3 = &unitigs[tg3id]->ufpath[rd3pos];
+ bool rd3fwd = (rd3->position.bgn < rd3->position.end);
+ int32 rd3lo = (rd3fwd) ? rd3->position.bgn : rd3->position.end;
+ int32 rd3hi = (rd3fwd) ? rd3->position.end : rd3->position.bgn;
+
+ if (satisfiedOverlap(rdAlo, rdAhi, rdAfwd, rd3lo, rd3hi, rd3fwd, (b3->frag3p() == false))) {
+ bb->doveSatSame[tgAtype]++;
+ } else {
+ bb->doveUnsatSame[tgAtype]++;
+ }
+ }
+ }
+ }
+
+
+ // For all overlaps.
+
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(fi, AS_MAX_ERATE, ovlLen);
+
+
+ for (uint32 oi=0; oi<ovlLen; oi++) {
+ uint32 rdAid = ovl[oi].a_iid;
+ uint32 tgAid = Unitig::fragIn(rdAid);
+ Unitig *tgA = unitigs[tgAid];
+ uint32 tgAtype = getTigType(tgA);
+
+ uint32 rdBid = ovl[oi].b_iid;
+ uint32 tgBid = Unitig::fragIn(rdBid);
+ Unitig *tgB = unitigs[tgBid];
+ uint32 tgBtype = getTigType(tgB);
+
+ bool isDove = ovl[oi].isDovetail();
+ bool contReads = OG->isContained(rdAid) || OG->isContained(rdBid);
+
+ // Figure out what class of overlap we're counting.
+
+ olapsUsed *used = NULL;
+
+ if (isDove == false)
+ used = cc;
+ else
+ if (contReads == true)
+ used = dc;
+ else
+ used = dd;
+
+ used->total++;
+
+ // If to reads not in a tig, unsatisfied.
+
+ if ((tgAid == 0) || (tgBid == 0)) {
+ if (isDove)
+ used->doveUnsatDiff[tgAtype][tgBtype]++;
+ else
+ used->contUnsatDiff[tgAtype][tgBtype]++;
+ continue;
+ }
+
+ // If in different tigs, unsatisfied.
+
+ if (tgAid != tgBid) {
+ if (isDove)
+ used->doveUnsatDiff[tgAtype][tgBtype]++;
+ else
+ used->contUnsatDiff[tgAtype][tgBtype]++;
+ continue;
+ }
+
+ // Else, possibly satisfied. We need to check positions.
+
+ uint32 rdApos = unitigs[tgAid]->pathPosition(rdAid);
+ ufNode *rdA = &unitigs[tgAid]->ufpath[rdApos];
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ uint32 rdBpos = unitigs[tgBid]->pathPosition(rdBid);
+ ufNode *rdB = &unitigs[tgBid]->ufpath[rdBpos];
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ // If overlapping and correctly oriented, good enough for now. Do we want to care about
+ // overlap length? Nah, there's enough fudging (still, I think) in placement that it'd be
+ // tough to get that usefully precise.
+
+ if (satisfiedOverlap(rdAlo, rdAhi, rdAfwd, rdBlo, rdBhi, rdBfwd, ovl[oi].flipped)) {
+ if (isDove)
+ used->doveUnsatSame[tgAtype]++;
+ else
+ used->contUnsatSame[tgAtype]++;
+
+ } else {
+ if (isDove)
+ used->doveSatSame[tgAtype]++;
+ else
+ used->contSatSame[tgAtype]++;
+ }
+ }
+ }
+
+ // Merge the symmetrical counts
+
+ for (uint32 ii=0; ii<6; ii++) {
+ for (uint32 jj=ii+1; jj<6; jj++) {
+ bb->doveUnsatDiff[ii][jj] += bb->doveUnsatDiff[jj][ii]; bb->doveUnsatDiff[jj][ii] = UINT64_MAX;
+ dd->doveUnsatDiff[ii][jj] += dd->doveUnsatDiff[jj][ii]; dd->doveUnsatDiff[jj][ii] = UINT64_MAX;
+ dc->doveUnsatDiff[ii][jj] += dc->doveUnsatDiff[jj][ii]; dc->doveUnsatDiff[jj][ii] = UINT64_MAX;
+ cc->doveUnsatDiff[ii][jj] += cc->doveUnsatDiff[jj][ii]; cc->doveUnsatDiff[jj][ii] = UINT64_MAX;
+
+ bb->contUnsatDiff[ii][jj] += bb->contUnsatDiff[jj][ii]; bb->contUnsatDiff[jj][ii] = UINT64_MAX;
+ dd->contUnsatDiff[ii][jj] += dd->contUnsatDiff[jj][ii]; dd->contUnsatDiff[jj][ii] = UINT64_MAX;
+ dc->contUnsatDiff[ii][jj] += dc->contUnsatDiff[jj][ii]; dc->contUnsatDiff[jj][ii] = UINT64_MAX;
+ cc->contUnsatDiff[ii][jj] += cc->contUnsatDiff[jj][ii]; cc->contUnsatDiff[jj][ii] = UINT64_MAX;
+ }
+ }
+
+
+ // Emit a nicely formatted report.
+
+#define B(X) (100.0 * (X) / (bb->total))
+#define P(X) (100.0 * (X) / (dd->total))
+#define Q(X) (100.0 * (X) / (dc->total))
+#define R(X) (100.0 * (X) / (cc->total))
+
+ writeLog("--------------------------------------------------------------------------------\n");
+ writeLog("OVERLAP FATE\n");
+ writeLog("\n");
+ writeLog("dovetail overlaps (best) "F_U64"\n", bb->total);
+ writeLog("dovetail overlaps "F_U64"\n", dd->total);
+ writeLog("dovetail overlaps to contained reads "F_U64"\n", dc->total);
+ writeLog("containment overlaps "F_U64"\n", cc->total);
+ writeLog("\n");
+ writeLog("SATISFIED best edges DOVETAIL\n");
+ writeLog("--------- ------------ -------\n");
+ writeLog("contig %12"F_U64P" %6.2f%%\n", bb->doveSatSame[tCTG], B(bb->doveSatSame[tCTG]));
+ writeLog("repeat contig %12"F_U64P" %6.2f%%\n", bb->doveSatSame[tRPT], B(bb->doveSatSame[tRPT]));
+ writeLog("bubble %12"F_U64P" %6.2f%%\n", bb->doveSatSame[tBUB], B(bb->doveSatSame[tBUB]));
+ writeLog("\n");
+ writeLog("UNSATISFIED best edges DOVETAIL\n");
+ writeLog("----------- ------------ -------\n");
+ writeLog("contig %12"F_U64P" %6.2f%%\n", bb->doveUnsatSame[tCTG], B(bb->doveUnsatSame[tCTG]));
+ writeLog("repeat %12"F_U64P" %6.2f%%\n", bb->doveUnsatSame[tRPT], B(bb->doveUnsatSame[tRPT]));
+ writeLog("bubble %12"F_U64P" %6.2f%%\n", bb->doveUnsatSame[tBUB], B(bb->doveUnsatSame[tBUB]));
+ writeLog("unassembled %12"F_U64P" %6.2f%%\n", bb->doveUnsatSame[tUNA], B(bb->doveUnsatSame[tUNA]));
+ writeLog("unused %12"F_U64P" %6.2f%%\n", bb->doveUnsatSame[tUNU], B(bb->doveUnsatSame[tUNU]));
+ writeLog("\n");
+ writeLog("UNSATISFIED best edges DOVETAIL\n");
+ writeLog("----------- ------------ -------\n");
+ writeLog("contig-contig %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tCTG][tCTG], B(bb->doveUnsatDiff[tCTG][tCTG]));
+ writeLog("contig-repeat %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tCTG][tRPT], B(bb->doveUnsatDiff[tCTG][tRPT]));
+ writeLog("contig-bubble %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tCTG][tBUB], B(bb->doveUnsatDiff[tCTG][tBUB]));
+ writeLog("contig-unassembled %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tCTG][tUNA], B(bb->doveUnsatDiff[tCTG][tUNA]));
+ writeLog("contig-unused %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tCTG][tUNU], B(bb->doveUnsatDiff[tCTG][tUNU]));
+ writeLog("contig-none %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tCTG][tNOP], B(bb->doveUnsatDiff[tCTG][tNOP]));
+ writeLog("\n");
+//writeLog("repeat-contig %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tRPT][tCTG], B(bb->doveUnsatDiff[tRPT][tCTG]));
+ writeLog("repeat-repeat %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tRPT][tRPT], B(bb->doveUnsatDiff[tRPT][tRPT]));
+ writeLog("repeat-bubble %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tRPT][tBUB], B(bb->doveUnsatDiff[tRPT][tBUB]));
+ writeLog("repeat-unassembled %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tRPT][tUNA], B(bb->doveUnsatDiff[tRPT][tUNA]));
+ writeLog("repeat-unused %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tRPT][tUNU], B(bb->doveUnsatDiff[tRPT][tUNU]));
+ writeLog("repeat-none %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tRPT][tNOP], B(bb->doveUnsatDiff[tRPT][tNOP]));
+ writeLog("\n");
+//writeLog("bubble-contig %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tBUB][tCTG], B(bb->doveUnsatDiff[tBUB][tCTG]));
+//writeLog("bubble-repeat %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tBUB][tRPT], B(bb->doveUnsatDiff[tBUB][tRPT]));
+ writeLog("bubble-bubble %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tBUB][tBUB], B(bb->doveUnsatDiff[tBUB][tBUB]));
+ writeLog("bubble-unassembled %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tBUB][tUNA], B(bb->doveUnsatDiff[tBUB][tUNA]));
+ writeLog("bubble-unused %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tBUB][tUNU], B(bb->doveUnsatDiff[tBUB][tUNU]));
+ writeLog("bubble-none %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tBUB][tNOP], B(bb->doveUnsatDiff[tBUB][tNOP]));
+ writeLog("\n");
+//writeLog("unassembled-contig %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNA][tCTG], B(bb->doveUnsatDiff[tUNA][tCTG]));
+//writeLog("unassembled-repeat %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNA][tRPT], B(bb->doveUnsatDiff[tUNA][tRPT]));
+//writeLog("unassembled-bubble %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNA][tBUB], B(bb->doveUnsatDiff[tUNA][tBUB]));
+ writeLog("unassembled-unassembled %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNA][tUNA], B(bb->doveUnsatDiff[tUNA][tUNA]));
+ writeLog("unassembled-unused %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNA][tUNU], B(bb->doveUnsatDiff[tUNA][tUNU]));
+ writeLog("unassembled-none %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNA][tNOP], B(bb->doveUnsatDiff[tUNA][tNOP]));
+ writeLog("\n");
+//writeLog("unused-contig %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNU][tCTG], B(bb->doveUnsatDiff[tUNU][tCTG]))
+//writeLog("unused-repeat %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNU][tRPT], B(bb->doveUnsatDiff[tUNU][tRPT]));
+//writeLog("unused-bubble %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNU][tBUB], B(bb->doveUnsatDiff[tUNU][tBUB]));
+//writeLog("unused-unassembled %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNU][tUNA], B(bb->doveUnsatDiff[tUNU][tUNA]));
+ writeLog("unused-unused %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNU][tUNU], B(bb->doveUnsatDiff[tUNU][tUNU]));
+ writeLog("unused-none %12"F_U64P" %6.2f%%\n", bb->doveUnsatDiff[tUNU][tNOP], B(bb->doveUnsatDiff[tUNU][tNOP]));
+ writeLog("\n");
+ writeLog("\n");
+ writeLog("\n");
+ writeLog("SATISFIED all overlaps DOVETAIL DOVECONT CONTAINMENT\n");
+ writeLog("--------- ------------ ------- ------------ ------- ------------ -------\n");
+ writeLog("contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveSatSame[tCTG], P(dd->doveSatSame[tCTG]), dc->doveSatSame[tCTG], Q(dc->doveSatSame[tCTG]), cc->contSatSame[tCTG], R(cc->contSatSame[tCTG]));
+ writeLog("repeat contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveSatSame[tRPT], P(dd->doveSatSame[tRPT]), dc->doveSatSame[tRPT], Q(dc->doveSatSame[tRPT]), cc->contSatSame[tRPT], R(cc->contSatSame[tRPT]));
+ writeLog("bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveSatSame[tBUB], P(dd->doveSatSame[tBUB]), dc->doveSatSame[tBUB], Q(dc->doveSatSame[tBUB]), cc->contSatSame[tBUB], R(cc->contSatSame[tBUB]));
+ writeLog("\n");
+ writeLog("UNSATISFIED all overlaps DOVETAIL DOVECONT CONTAINMENT\n");
+ writeLog("----------- ------------ ------- ------------ ------- ------------ -------\n");
+ writeLog("contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatSame[tCTG], P(dd->doveUnsatSame[tCTG]), dc->doveUnsatSame[tCTG], Q(dc->doveUnsatSame[tCTG]), cc->contUnsatSame[tCTG], R(cc->contUnsatSame[tCTG]));
+ writeLog("repeat %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatSame[tRPT], P(dd->doveUnsatSame[tRPT]), dc->doveUnsatSame[tRPT], Q(dc->doveUnsatSame[tRPT]), cc->contUnsatSame[tRPT], R(cc->contUnsatSame[tRPT]));
+ writeLog("bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatSame[tBUB], P(dd->doveUnsatSame[tBUB]), dc->doveUnsatSame[tBUB], Q(dc->doveUnsatSame[tBUB]), cc->contUnsatSame[tBUB], R(cc->contUnsatSame[tBUB]));
+ writeLog("unassembled %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatSame[tUNA], P(dd->doveUnsatSame[tUNA]), dc->doveUnsatSame[tUNA], Q(dc->doveUnsatSame[tUNA]), cc->contUnsatSame[tUNA], R(cc->contUnsatSame[tUNA]));
+ writeLog("unused %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatSame[tUNU], P(dd->doveUnsatSame[tUNU]), dc->doveUnsatSame[tUNU], Q(dc->doveUnsatSame[tUNU]), cc->contUnsatSame[tUNU], R(cc->contUnsatSame[tUNU]));
+ writeLog("\n");
+ writeLog("UNSATISFIED all overlaps DOVETAIL DOVECONT CONTAINMENT\n");
+ writeLog("----------- ------------ ------- ------------ ------- ------------ -------\n");
+ writeLog("contig-contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tCTG][tCTG], P(dd->doveUnsatDiff[tCTG][tCTG]), dc->doveUnsatDiff[tCTG][tCTG], Q(dc->doveUnsatDiff[tCTG][tCTG]), cc->contUnsatDiff[tCTG][tCTG], R(cc->contUnsatDiff[tCTG][tCTG]));
+ writeLog("contig-repeat %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tCTG][tRPT], P(dd->doveUnsatDiff[tCTG][tRPT]), dc->doveUnsatDiff[tCTG][tRPT], Q(dc->doveUnsatDiff[tCTG][tRPT]), cc->contUnsatDiff[tCTG][tRPT], R(cc->contUnsatDiff[tCTG][tRPT]));
+ writeLog("contig-bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tCTG][tBUB], P(dd->doveUnsatDiff[tCTG][tBUB]), dc->doveUnsatDiff[tCTG][tBUB], Q(dc->doveUnsatDiff[tCTG][tBUB]), cc->contUnsatDiff[tCTG][tBUB], R(cc->contUnsatDiff[tCTG][tBUB]));
+ writeLog("contig-unassembled %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tCTG][tUNA], P(dd->doveUnsatDiff[tCTG][tUNA]), dc->doveUnsatDiff[tCTG][tUNA], Q(dc->doveUnsatDiff[tCTG][tUNA]), cc->contUnsatDiff[tCTG][tUNA], R(cc->contUnsatDiff[tCTG][tUNA]));
+ writeLog("contig-unused %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tCTG][tUNU], P(dd->doveUnsatDiff[tCTG][tUNU]), dc->doveUnsatDiff[tCTG][tUNU], Q(dc->doveUnsatDiff[tCTG][tUNU]), cc->contUnsatDiff[tCTG][tUNU], R(cc->contUnsatDiff[tCTG][tUNU]));
+ writeLog("\n");
+//writeLog("repeat-contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tRPT][tCTG], P(dd->doveUnsatDiff[tRPT][tCTG]), dc->doveUnsatDiff[tRPT][tCTG], Q(dc->doveUnsatDiff[tRPT][tCTG]), cc->contUnsatDiff[tRPT][tCTG], R(cc->contUnsatDiff[tRPT][tCTG]));
+ writeLog("repeat-repeat %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tRPT][tRPT], P(dd->doveUnsatDiff[tRPT][tRPT]), dc->doveUnsatDiff[tRPT][tRPT], Q(dc->doveUnsatDiff[tRPT][tRPT]), cc->contUnsatDiff[tRPT][tRPT], R(cc->contUnsatDiff[tRPT][tRPT]));
+ writeLog("repeat-bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tRPT][tBUB], P(dd->doveUnsatDiff[tRPT][tBUB]), dc->doveUnsatDiff[tRPT][tBUB], Q(dc->doveUnsatDiff[tRPT][tBUB]), cc->contUnsatDiff[tRPT][tBUB], R(cc->contUnsatDiff[tRPT][tBUB]));
+ writeLog("repeat-unassembled %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tRPT][tUNA], P(dd->doveUnsatDiff[tRPT][tUNA]), dc->doveUnsatDiff[tRPT][tUNA], Q(dc->doveUnsatDiff[tRPT][tUNA]), cc->contUnsatDiff[tRPT][tUNA], R(cc->contUnsatDiff[tRPT][tUNA]));
+ writeLog("repeat-unused %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tRPT][tUNU], P(dd->doveUnsatDiff[tRPT][tUNU]), dc->doveUnsatDiff[tRPT][tUNU], Q(dc->doveUnsatDiff[tRPT][tUNU]), cc->contUnsatDiff[tRPT][tUNU], R(cc->contUnsatDiff[tRPT][tUNU]));
+ writeLog("\n");
+//writeLog("bubble-contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tBUB][tCTG], P(dd->doveUnsatDiff[tBUB][tCTG]), dc->doveUnsatDiff[tBUB][tCTG], Q(dc->doveUnsatDiff[tBUB][tCTG]), cc->contUnsatDiff[tBUB][tCTG], R(cc->contUnsatDiff[tBUB][tCTG]));
+//writeLog("bubble-repeat %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tBUB][tRPT], P(dd->doveUnsatDiff[tBUB][tRPT]), dc->doveUnsatDiff[tBUB][tRPT], Q(dc->doveUnsatDiff[tBUB][tRPT]), cc->contUnsatDiff[tBUB][tRPT], R(cc->contUnsatDiff[tBUB][tRPT]));
+ writeLog("bubble-bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tBUB][tBUB], P(dd->doveUnsatDiff[tBUB][tBUB]), dc->doveUnsatDiff[tBUB][tBUB], Q(dc->doveUnsatDiff[tBUB][tBUB]), cc->contUnsatDiff[tBUB][tBUB], R(cc->contUnsatDiff[tBUB][tBUB]));
+ writeLog("bubble-unassembled %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tBUB][tUNA], P(dd->doveUnsatDiff[tBUB][tUNA]), dc->doveUnsatDiff[tBUB][tUNA], Q(dc->doveUnsatDiff[tBUB][tUNA]), cc->contUnsatDiff[tBUB][tUNA], R(cc->contUnsatDiff[tBUB][tUNA]));
+ writeLog("bubble-unused %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tBUB][tUNU], P(dd->doveUnsatDiff[tBUB][tUNU]), dc->doveUnsatDiff[tBUB][tUNU], Q(dc->doveUnsatDiff[tBUB][tUNU]), cc->contUnsatDiff[tBUB][tUNU], R(cc->contUnsatDiff[tBUB][tUNU]));
+ writeLog("\n");
+//writeLog("unassembled-contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNA][tCTG], P(dd->doveUnsatDiff[tUNA][tCTG]), dc->doveUnsatDiff[tUNA][tCTG], Q(dc->doveUnsatDiff[tUNA][tCTG]), cc->contUnsatDiff[tUNA][tCTG], R(cc->contUnsatDiff[tUNA][tCTG]));
+//writeLog("unassembled-repeat %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNA][tRPT], P(dd->doveUnsatDiff[tUNA][tRPT]), dc->doveUnsatDiff[tUNA][tRPT], Q(dc->doveUnsatDiff[tUNA][tRPT]), cc->contUnsatDiff[tUNA][tRPT], R(cc->contUnsatDiff[tUNA][tRPT]));
+//writeLog("unassembled-bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNA][tBUB], P(dd->doveUnsatDiff[tUNA][tBUB]), dc->doveUnsatDiff[tUNA][tBUB], Q(dc->doveUnsatDiff[tUNA][tBUB]), cc->contUnsatDiff[tUNA][tBUB], R(cc->contUnsatDiff[tUNA][tBUB]));
+ writeLog("unassembled-unassembled %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNA][tUNA], P(dd->doveUnsatDiff[tUNA][tUNA]), dc->doveUnsatDiff[tUNA][tUNA], Q(dc->doveUnsatDiff[tUNA][tUNA]), cc->contUnsatDiff[tUNA][tUNA], R(cc->contUnsatDiff[tUNA][tUNA]));
+ writeLog("unassembled-unused %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNA][tUNU], P(dd->doveUnsatDiff[tUNA][tUNU]), dc->doveUnsatDiff[tUNA][tUNU], Q(dc->doveUnsatDiff[tUNA][tUNU]), cc->contUnsatDiff[tUNA][tUNU], R(cc->contUnsatDiff[tUNA][tUNU]));
+ writeLog("\n");
+//writeLog("unused-contig %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNU][tCTG], P(dd->doveUnsatDiff[tUNU][tCTG]), dc->doveUnsatDiff[tUNU][tCTG], Q(dc->doveUnsatDiff[tUNU][tCTG]), cc->contUnsatDiff[tUNU][tCTG], R(cc->contUnsatDiff[tUNU][tCTG]));
+//writeLog("unused-repeat %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNU][tRPT], P(dd->doveUnsatDiff[tUNU][tRPT]), dc->doveUnsatDiff[tUNU][tRPT], Q(dc->doveUnsatDiff[tUNU][tRPT]), cc->contUnsatDiff[tUNU][tRPT], R(cc->contUnsatDiff[tUNU][tRPT]));
+//writeLog("unused-bubble %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNU][tBUB], P(dd->doveUnsatDiff[tUNU][tBUB]), dc->doveUnsatDiff[tUNU][tBUB], Q(dc->doveUnsatDiff[tUNU][tBUB]), cc->contUnsatDiff[tUNU][tBUB], R(cc->contUnsatDiff[tUNU][tBUB]));
+//writeLog("unused-unassembled %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNU][tUNA], P(dd->doveUnsatDiff[tUNU][tUNA]), dc->doveUnsatDiff[tUNU][tUNA], Q(dc->doveUnsatDiff[tUNU][tUNA]), cc->contUnsatDiff[tUNU][tUNA], R(cc->contUnsatDiff[tUNU][tUNA]));
+ writeLog("unused-unused %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%% %12"F_U64P" %6.2f%%\n", dd->doveUnsatDiff[tUNU][tUNU], P(dd->doveUnsatDiff[tUNU][tUNU]), dc->doveUnsatDiff[tUNU][tUNU], Q(dc->doveUnsatDiff[tUNU][tUNU]), cc->contUnsatDiff[tUNU][tUNU], R(cc->contUnsatDiff[tUNU][tUNU]));
+ writeLog("\n");
+ writeLog("\n");
+
+ delete dd;
+ delete dc;
+ delete cc;
+}
+
diff --git a/src/bogart/AS_BAT_Instrumentation.H b/src/bogart/AS_BAT_Instrumentation.H
index c690a23..0db8578 100644
--- a/src/bogart/AS_BAT_Instrumentation.H
+++ b/src/bogart/AS_BAT_Instrumentation.H
@@ -40,7 +40,7 @@
void checkUnitigMembership(UnitigVector &unitigs);
-void reportOverlapsUsed(UnitigVector &unitigs, const char *prefix, const char *name);
+void reportOverlaps(UnitigVector &unitigs, const char *prefix, const char *name);
void reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name, uint64 genomeSize);
void classifyUnitigsAsUnassembled(UnitigVector &unitigs,
@@ -49,5 +49,4 @@ void classifyUnitigsAsUnassembled(UnitigVector &unitigs,
double spanFraction,
double lowcovFraction, uint32 lowcovDepth);
-
#endif // INCLUDE_AS_BAT_INSTRUMENTATION
diff --git a/src/bogart/AS_BAT_IntersectBubble.C b/src/bogart/AS_BAT_IntersectBubble.C
deleted file mode 100644
index bbad6cf..0000000
--- a/src/bogart/AS_BAT_IntersectBubble.C
+++ /dev/null
@@ -1,660 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_IntersectBubble.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
- * are Copyright 2010-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-DEC-19 to 2015-JUN-03
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_BestOverlapGraph.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_PlaceFragUsingOverlaps.H"
-
-#include "AS_BAT_OverlapCache.H"
-
-#include "AS_BAT_IntersectBubble.H"
-
-//#include "AS_BAT_BestOverlapGraph.H"
-//#include "MultiAlignStore.H"
-
-#define MAX_OVERLAPS_PER_FRAG (16 * 1024 * 1024)
-
-// IN THE FIRST PART: Test the size of the bubble in the larger unitig using edges at the end.
-//
-// Function is immediately true if the is only one edge from the bubble to the large unitig.
-// Otherwise, continue.
-//
-// Function is immediately false if the edges are to different larger unitigs. Function
-// fails if the edges are to the same unitig, but that unitig is not 'larger'.
-//
-// ---bubble---
-// | |
-// v v
-// -----------------------------------
-//
-// Function is immediately false if the sizes are wildly different, if the edges are to different
-// unitigs, etc. Otherwise, continue.
-//
-// IN THE SECOND PART: Test the orientation of the fragments at the ends of the bubble. The FIRST
-// PART is unlikely to succeed if the SECOND PART is false. The order and orientation of these two
-// fragments should be the same when placed in the larger unitig. The bubble can be merged into the
-// larger unitig in the same order&orientation it is currently represented as, or the entire bubble
-// unitig can be reverse complemented - in which case the order reversed and the orientations flip.
-//
-// lFrg and rFrg are UPDATED with the position of those fragments in the larger unitig.
-//
-
-
-////////////////////////////////////////
-//
-// This is suboptimal. It is possible to have two bubbles, A and B, that both really belong in
-// larger unitig C, but A -> B -> C. If A is merged into B first, then there is a chance that
-// we'll lose the edges at the ends of B that place it in C (via ties; two fragments end at exaclty
-// the end of unitig B).
-//
-////////////////////////////////////////
-
-
-
-static
-bool
-validateBubbleWithEdges(UnitigVector &unitigs,
- double erateBubble,
- Unitig *bubble,
- ufNode &lFrg, BestEdgeOverlap *lEnd,
- ufNode &rFrg, BestEdgeOverlap *rEnd,
- Unitig *larger) {
-
- assert(0);
-
- // Compute placement of the two fragments. Compare the size against the bubble.
-
- ufNode lFrgN = lFrg;
- ufNode rFrgN = rFrg;
-
-#if 0
- int32 lFrg5idx = -1;
- int32 lFrg3idx = -1;
-
- bool lPlaced = larger->placeFrag(lFrgN, lFrg5idx, (isReverse(lFrg.position) == true) ? NULL : lEnd,
- lFrgN, lFrg3idx, (isReverse(lFrg.position) == false) ? lEnd : NULL);
-
- int32 rFrg5idx = -1;
- int32 rFrg3idx = -1;
-
- bool rPlaced = larger->placeFrag(rFrgN, rFrg5idx, (isReverse(rFrg.position) == true) ? NULL : rEnd,
- rFrgN, rFrg3idx, (isReverse(rFrg.position) == false) ? rEnd : NULL);
-
- if (lPlaced == false) {
- // Huh? Didn't place? Emit diagnostics.
- writeLog("popBubbles()-- Failed to place lFrg.\n");
- return(false);
- }
-
- if (rPlaced == false) {
- // Huh? Didn't place? Emit diagnostics.
- writeLog("popBubbles()-- Failed to place rFrg.\n");
- return(false);
- }
-
-#else
-
- overlapPlacement lFrgPlacement;
- overlapPlacement rFrgPlacement;
-
- lFrgPlacement.errors = 4.0e9;
- lFrgPlacement.aligned = 1;
-
- rFrgPlacement.errors = 4.0e9;
- rFrgPlacement.aligned = 1;
-
- vector<overlapPlacement> placements;
-
- placements.clear();
-
- placeFragUsingOverlaps(unitigs, erateBubble, larger, lFrg.ident, placements);
- for (uint32 i=0; i<placements.size(); i++) {
- assert(placements[i].tigID == larger->id());
- if (placements[i].tigID != larger->id()) continue;
-
- if (placements[i].fCoverage < 0.99)
- continue;
-
- if (placements[i].errors / placements[i].aligned < lFrgPlacement.errors / lFrgPlacement.aligned)
- lFrgPlacement = placements[i];
- }
-
- lFrgN.ident = lFrgPlacement.frgID;
- lFrgN.contained = 0;
- lFrgN.parent = 0;
- lFrgN.ahang = 0;
- lFrgN.bhang = 0;
- lFrgN.position = lFrgPlacement.position;
- lFrgN.containment_depth = 0;
-
- if ((lFrgN.position.bgn == 0) &&
- (lFrgN.position.end == 0)) {
- writeLog("popBubbles()-- Failed to place lFrg.\n");
- return(false);
- }
-
-
- placements.clear();
-
- placeFragUsingOverlaps(unitigs, erateBubble, larger, rFrg.ident, placements);
- for (uint32 i=0; i<placements.size(); i++) {
- assert(placements[i].tigID == larger->id());
- if (placements[i].tigID != larger->id()) continue;
-
- if (placements[i].fCoverage < 0.99)
- continue;
-
- if (placements[i].errors / placements[i].aligned < rFrgPlacement.errors / rFrgPlacement.aligned)
- rFrgPlacement = placements[i];
- }
-
- rFrgN.ident = rFrgPlacement.frgID;
- rFrgN.contained = 0;
- rFrgN.parent = 0;
- rFrgN.ahang = 0;
- rFrgN.bhang = 0;
- rFrgN.position = rFrgPlacement.position;
- rFrgN.containment_depth = 0;
-
- if ((rFrgN.position.bgn == 0) &&
- (rFrgN.position.end == 0)) {
- writeLog("popBubbles()-- Failed to place rFrg.\n");
- return(false);
- }
-#endif
-
-
- int32 minL = MIN(lFrg.position.bgn, lFrg.position.end);
- int32 maxL = MAX(lFrg.position.bgn, lFrg.position.end);
-
- int32 minR = MIN(rFrg.position.bgn, rFrg.position.end);
- int32 maxR = MAX(rFrg.position.bgn, rFrg.position.end);
-
- int32 placedLen = MAX(maxL, maxR) - MIN(minL, minR);
-
- if (2 * placedLen < bubble->getLength()) {
- // Too short.
- writeLog("popBubbles()-- Too short. lFrg %d,%d rFrg %d,%d. L %d,%d R %d,%d len %d\n",
- lFrg.position.bgn, lFrg.position.end,
- rFrg.position.bgn, rFrg.position.end,
- minL, maxL, minR, maxR, placedLen);
- return(false);
- }
-
- if (2 * bubble->getLength() < placedLen) {
- // Too long.
- writeLog("popBubbles()-- Too long. lFrg %d,%d rFrg %d,%d. L %d,%d R %d,%d len %d\n",
- lFrg.position.bgn, lFrg.position.end,
- rFrg.position.bgn, rFrg.position.end,
- minL, maxL, minR, maxR, placedLen);
- return(false);
- }
-
- ////////////////////
- //
- // Check orientations
- //
- ////////////////////
-
- // If lFrg and rFrg are the same fragment (bubble is one uncontained fragment) then we're done.
-
- if (lFrg.ident == rFrg.ident) {
- lFrg = lFrgN;
- rFrg = rFrgN;
- return(true);
- }
-
- // Otherwise, check that the orientation and positioning of the before and after fragments is the
- // same.
-
- bool bL = (isReverse(lFrg.position));
- bool bR = (isReverse(rFrg.position));
- bool bOrd = (MIN(lFrg.position.bgn, lFrg.position.end) < MIN(rFrg.position.bgn, rFrg.position.end));
-
- bool nL = (isReverse(lFrgN.position));
- bool nR = (isReverse(rFrgN.position));
- bool nOrd = (MIN(lFrgN.position.bgn, lFrgN.position.end) < MIN(rFrgN.position.bgn, rFrgN.position.end));
-
- if (((bL == nL) && (bR == nR) && (bOrd == nOrd)) ||
- ((bL != nL) && (bR != nR) && (bOrd != nOrd))) {
- // Yup, looks good!
- lFrg = lFrgN;
- rFrg = rFrgN;
- return(true);
- }
-
- // Nope, something got screwed up in alignment.
-
- writeLog("popBubbles()-- Order/Orientation problem. bL %d bR %d bOrd %d nL %d nR %d nOrd %d\n",
- bL, bR, bOrd,
- nL, nR, nOrd);
-
- return(false);
-}
-
-
-
-
-// False if any of the fragments in 'bubble' are not fully covered by overlaps to fragments in
-// 'larger'. Such uncovered fragments would indicate a large bubble -- large enough that we failed
-// to find an overlap -- and would cause problems in consensus.
-//
-// False if any of the fragments in 'bubble' cannot be placed between lFrg and rFrg. This would
-// indicate the bubble contains a significant rearrangement and would cause problems in consensus.
-//
-// If the above tests pass, 'bubble' is inserted into 'larger' and 'bubble' is deleted.
-//
-static
-bool
-validateBubbleFragmentsWithOverlaps(UnitigVector &unitigs,
- double erateBubble,
- Unitig *bubble,
- ufNode &lFrg,
- ufNode &rFrg,
- Unitig *larger) {
-
- assert(0);
-
- // Method:
- //
- // * Call placeFragUsingOverlaps() for every fragment. Save the placements returned.
- // * Count the number of placements that are outside the lFrg/rFrg range.
- // * Isolate down to one 'best' placement for each fragment.
- // * Must be within lFrg/rFrg.
- // * Resolve ties with
- // * Placement in the original unitig
- // * Error rates on overlaps
-
- bool success = false;
-
- vector<overlapPlacement> *placements = new vector<overlapPlacement> [bubble->ufpath.size()];
- overlapPlacement *correctPlace = new overlapPlacement [bubble->ufpath.size()];
-
- for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
- ufNode *frg = &bubble->ufpath[fi];
-
- placeFragUsingOverlaps(unitigs, erateBubble, larger, frg->ident, placements[fi]);
-
- // Initialize the final placement to be bad, so we can pick the best.
- correctPlace[fi].fCoverage = 0.0;
- correctPlace[fi].errors = 4.0e9;
- correctPlace[fi].aligned = 1;
- }
-
- // Some bizarre cases -- possibly only from bad data -- confound any logical attempt at finding the min/max extents. Yes, even though
- // this should work, it doesn't. Or maybe it's just broken and I haven't seen how.
- //
- //int32 minE = (lFrg.position.bgn < rFrg.position.bgn) ? MIN(lFrg.position.bgn, lFrg.position.end) : MIN(rFrg.position.bgn, rFrg.position.end);
- //int32 maxE = (lFrg.position.bgn < rFrg.position.bgn) ? MAX(rFrg.position.bgn, rFrg.position.end) : MAX(lFrg.position.bgn, lFrg.position.end);
- //
- // The one case that breaks it is a bubble unitig with a single chimeric fragment.
- // lFrg ident = 367563, contained = 0, parent = 254673, ahang = 144, bhang = 24, bgn = 33406, end = 33238}
- // rFrg ident = 367563, contained = 0, parent = 147697, ahang = -58, bhang = -157, bgn = 33406, end = 33574}
- //
- int32 minE = MIN(MIN(lFrg.position.bgn, lFrg.position.end), MIN(rFrg.position.bgn, rFrg.position.end));
- int32 maxE = MAX(MAX(lFrg.position.bgn, lFrg.position.end), MAX(rFrg.position.bgn, rFrg.position.end));
- int32 diff = maxE - minE;
-
- assert(minE < maxE);
-
- minE -= diff / 2; if (minE < 0) minE = 0;
- maxE += diff / 2;
-
- uint32 nCorrect = 0;
-
- for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
- uint32 nNotPlaced = 0;
- uint32 nNotPlacedInCorrectPosition = 0;
- uint32 nNotPlacedFully = 0;
- uint32 nNotOriented = 0;
-
- if (placements[fi].size() == 0)
- nNotPlaced++;
-
- // If we're contained, and our container is actually in the bubble, we can (or should be able
- // to) safely allow almost any placement.
-
- bool requireFullAlignment = true;
-
- if ((OG->isContained(bubble->ufpath[fi].ident) == true) &&
- (bubble->fragIn(OG->getBestContainer(bubble->ufpath[fi].ident)->container) == bubble->id()))
- requireFullAlignment = false;
-
- for (uint32 pl=0; pl<placements[fi].size(); pl++) {
- assert(placements[fi][pl].tigID == larger->id());
- if (placements[fi][pl].tigID != larger->id()) continue;
-
- int32 minP = MIN(placements[fi][pl].position.bgn, placements[fi][pl].position.end);
- int32 maxP = MAX(placements[fi][pl].position.bgn, placements[fi][pl].position.end);
-
- if ((maxP < minE) || (maxE < minP)) {
- nNotPlacedInCorrectPosition++;
- continue;
- }
-
- if ((requireFullAlignment == true) && (placements[fi][pl].fCoverage < 0.99)) {
- nNotPlacedFully++;
- continue;
- }
-
- //if ((placements[fi][pl].nForward > 0) &&
- // (placements[fi][pl].nReverse > 0)) {
- // nNotOriented++;
- // continue;
- //}
-
- // The current placement seems like a good one. Should we keep it?
-
- // The length requirement was added to solve a problem during testing on hydra. We tried to
- // place a contained fragment -- so skipped the fCoverage test above. This fragment had two
- // placements in the correct location on the target unitig. One plaement was fCoverage=1.00,
- // the other was fCoverage=0.15. Clearly the first was better, but the second had less
- // error. Without the length filter, we'd incorrectly pick the second placement.
-
- bool keepIt = false;
-
- if (placements[fi][pl].fCoverage > correctPlace[fi].fCoverage)
- // Yes! The current placement has more coverage than the saved one.
- keepIt = true;
-
- if ((placements[fi][pl].fCoverage >= correctPlace[fi].fCoverage) &&
- (placements[fi][pl].errors / placements[fi][pl].aligned < correctPlace[fi].errors / correctPlace[fi].aligned))
- // Yes! The current placement is just as long, and lower error.
- keepIt = true;
-
- // Yup, looks like a better placement.
- if (keepIt)
- correctPlace[fi] = placements[fi][pl];
- } // over all placements
-
- if (correctPlace[fi].fCoverage > 0)
- nCorrect++;
- else
- writeLog("popBubbles()-- Failed to place frag %d notPlaced %d notPlacedInCorrectPosition %d notPlacedFully %d notOriented %d\n",
- bubble->ufpath[fi].ident, nNotPlaced, nNotPlacedInCorrectPosition, nNotPlacedFully, nNotOriented);
- }
-
- if (nCorrect != bubble->ufpath.size())
- goto finished;
-
- // Now just move the fragments into the larger unitig and delete the bubble unitig.
- //
- // Explicitly DO NOT propagate the contained, parent, ahang or bhang from the bubble here. We
- // could figure all this stuff out, but it definitely is NOT just a simple copy from the bubble
- // unitig (for example, we could add the bubble unitig reversed).
- //
- //
- for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
- ufNode nFrg;
-
- nFrg.ident = correctPlace[fi].frgID;
- nFrg.contained = 0;
- nFrg.parent = 0;
- nFrg.ahang = 0;
- nFrg.bhang = 0;
- nFrg.position = correctPlace[fi].position;
- nFrg.containment_depth = 0;
-
- larger->addFrag(nFrg, 0, logFileFlagSet(LOG_INTERSECTION_BUBBLES_DEBUG));
- }
-
- larger->sort();
-
- success = true;
-
- writeLog("popBubbles()-- merged bubble unitig %d into unitig %d\n",
- bubble->id(), larger->id());
-
- finished:
- delete [] placements;
- delete [] correctPlace;
-
- return(success);
-}
-
-
-
-static
-bool
-popIntersectionBubble(UnitigVector &unitigs,
- double erateBubble,
- Unitig *shortTig) {
-
- // Search for edges. For a bubble to exist, either the first or last non-contained fragment
- // must have an edge to the 'merge' unitig it is a bubble of. Ideally, both the first and
- // last will have edges to the same unitig, but we'll test and allow only a single edge.
-
- uint32 fIdx = 0;
- uint32 lIdx = shortTig->ufpath.size() - 1;
-
- // We'd like to claim that all unitigs begin with a non-contained fragment, but zombie fragments
- // (contained fragments that are in a circular containment relationship) violate this. So, we
- // could then claim that unitigs with more than one fragment begin with a non-contained fragment.
- // But any zombie that has a bubble popped into it violate this.
- //
- // We hope that any unitig that doesn't start with a non-contained fragment won't be a bubble, in
- // particular, that there won't be non-contained fragments somewhere in that unitig.
-
- if (OG->isContained(shortTig->ufpath[fIdx].ident) == true) {
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments STARTS WITH A CONTAINED FRAGMENT %d\n",
- shortTig->id(), shortTig->getLength(), shortTig->ufpath.size(),
- shortTig->ufpath[fIdx].ident);
- return(false);
- }
-
- // Now, find the last non-contained fragment.
-
- while ((lIdx > 0) && (OG->isContained(shortTig->ufpath[lIdx].ident) == true))
- lIdx--;
-
- ufNode fFrg = shortTig->ufpath[fIdx]; // NOTE: A COPY, not a pointer or reference.
- ufNode lFrg = shortTig->ufpath[lIdx]; // These get modified.
-
- // Grab the best edges outside the unitig. If the first fragment is reversed, we want
- // to grab the edge off of the 3' end; opposite for the last fragment.
-
- bool f3p = (isReverse(fFrg.position) == true);
- BestEdgeOverlap *fEdge = OG->getBestEdgeOverlap(fFrg.ident, f3p);
-
- bool l3p = (isReverse(lFrg.position) == false);
- BestEdgeOverlap *lEdge = OG->getBestEdgeOverlap(lFrg.ident, l3p);
-
- // Just make sure...those edges should NOT to be to ourself. But if they are, we'll just ignore
- // them -- these can be from circular unitigs (in which case we can't really merge ourself into
- // ourself at the correct spot) OR from a bubble that was already merged and just happened to tie
- // for a fragment at the end.
- //
- // aaaaaaaaaa
- // aaaaaaaaaaa
- // bbbbbbbbb
- // bbbbbbbbbb
- //
- // The second b fragment now becomes the last non-contained fragment in the merged unitig, and it has
- // a best edge to ourself.
- //
- // We are no longer using fEdge or rEdge to place these fragments in the larger unitig; we're
- // using all overlaps. Just to be sure, we'll get rid of them.
- //
- uint32 fUtg = Unitig::fragIn(fEdge->fragId());
- uint32 lUtg = Unitig::fragIn(lEdge->fragId());
-
- if (fUtg == shortTig->id()) {
- fEdge = NULL;
- fUtg = 0;
- }
- if (lUtg == shortTig->id()) {
- lEdge = NULL;
- lUtg = 0;
- }
-
- if ((fUtg != 0) && (lUtg != 0))
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. Edges (%d/%d') from frag %d/%d' and (%d/%d') from frag %d/%d'\n",
- shortTig->id(), shortTig->getLength(), shortTig->ufpath.size(),
- fEdge->fragId(), (fEdge->frag3p() ? 3 : 5), fFrg.ident, (f3p ? 3 : 5),
- lEdge->fragId(), (lEdge->frag3p() ? 3 : 5), lFrg.ident, (l3p ? 3 : 5));
- else if (fUtg != 0)
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. Edge (%d/%d') from frag %d/%d'\n",
- shortTig->id(), shortTig->getLength(), shortTig->ufpath.size(),
- fEdge->fragId(), (fEdge->frag3p() ? 3 : 5), fFrg.ident, (f3p ? 3 : 5));
- else if (lUtg != 0)
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. Edge (%d/%d') from frag %d/%d'\n",
- shortTig->id(), shortTig->getLength(), shortTig->ufpath.size(),
- lEdge->fragId(), (lEdge->frag3p() ? 3 : 5), lFrg.ident, (l3p ? 3 : 5));
- else {
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. NO EDGES, no bubble.\n",
- shortTig->id(), shortTig->getLength(), shortTig->ufpath.size());
- return(false);
- }
-
- // The only interesting case here is if we have both edges and they point to different unitigs. We could try
- // placing in both, but for now, we just give up.
-
- if ((fUtg != 0) && (lUtg != 0) && (fUtg != lUtg)) {
- writeLog("popBubbles()-- bubble unitig %d has edges to both unitig %d and unitig %d, cannot place (yet)\n",
- shortTig->id(), fUtg, lUtg);
- return(true);
- }
-
- Unitig *mergeTig = (fUtg == 0) ? unitigs[lUtg] : unitigs[fUtg];
-
- if (validateBubbleWithEdges(unitigs, erateBubble, shortTig, fFrg, fEdge, lFrg, lEdge, mergeTig) == false) {
- writeLog("popBubbles()-- failed to validate edges for bubble unitig %d into larger unitig %d\n",
- shortTig->id(), mergeTig->id());
- return(false);
- }
-
- if (validateBubbleFragmentsWithOverlaps(unitigs, erateBubble, shortTig, fFrg, lFrg, mergeTig) == false) {
- writeLog("popBubbles()-- failed to validate fragments for bubble unitig %d into larger unitig %d\n",
- shortTig->id(), mergeTig->id());
- return(false);
- }
-
- // Merged successfully!
-
- unitigs[shortTig->id()] = NULL;
- delete shortTig;
-
- return(true);
-}
-
-
-void
-popIntersectionBubbles(UnitigVector &unitigs, double erateBubble) {
- uint32 nFrgToMerge = 1;
- uint32 nFrgToMergeMax = 500;
-
- uint32 nBubblePopped = 0;
-
- logFileFlags |= LOG_PLACE_FRAG;
-
- while (1) {
- bool keepPopping = false;
- uint32 nBubbleFixed = 1;
- vector<uint32> tryAgain;
-
- // Step 1: Iterate over all possible merge sizes, popping whatever.
-
- for (nFrgToMerge=1; nFrgToMerge < nFrgToMergeMax; nFrgToMerge++) {
- writeLog("==> SEARCHING FOR BUBBLES of size %u fragments.\n", nFrgToMerge);
-
- for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *shortTig = unitigs[ti];
-
- if (shortTig == NULL)
- // Ain't no tig here!
- continue;
-
- if (shortTig->ufpath.size() != nFrgToMerge)
- // Wrong size. We've either done it, or will do it, or it's just too big.
- continue;
-
- // popIntersectionBubble() returns false if the shortTig cannot be merged. It returns true
- // if the shortTig was merged, or might be merged after some other merge.
- //
- if (popIntersectionBubble(unitigs, erateBubble, shortTig)) {
- if (unitigs[ti]) {
- tryAgain.push_back(ti);
- } else {
- nBubblePopped++;
- keepPopping = true;
- }
- }
- } // Over all unitigs
- } // Over all merge sizes
-
- // Step 2: If nothing changed, get out of here.
-
- if (keepPopping == false)
- break;
-
- // Step 3: Attempt to merge bubbles that were across two unitigs. Regardless of the order we
- // merge in, it is possible for some bubble unitig B to have edges to A (a large unitig) and C
- // (another bubble). If B is examined before C, B will not be merged. This is noted above
- // ('tryAgain' will store B), and here we see if C was merged into A, thus allowing B to merge.
-
- while (nBubbleFixed > 0) {
- nBubbleFixed = 0;
-
- writeLog("==> SEARCHING FOR BUBBLES that spanned unitigs.\n");
-
- for (uint32 ta=0; ta<tryAgain.size(); ta++) {
- Unitig *shortTig = unitigs[tryAgain[ta]];
-
- if (shortTig == NULL)
- continue;
-
- if (popIntersectionBubble(unitigs, erateBubble, shortTig)) {
- if (unitigs[tryAgain[ta]] == NULL) {
- nBubblePopped++;
- nBubbleFixed++;
- keepPopping = true;
- }
- }
- }
- }
-
- // Step 4: If nothing changed, get out of here.
-
- if (keepPopping == false)
- break;
- } // Until we break.
-
- logFileFlags &= ~LOG_PLACE_FRAG;
-
- writeLog("Popped %u bubbles.\n", nBubblePopped);
-}
diff --git a/src/bogart/AS_BAT_IntersectSplit.C b/src/bogart/AS_BAT_IntersectSplit.C
deleted file mode 100644
index 0585e40..0000000
--- a/src/bogart/AS_BAT_IntersectSplit.C
+++ /dev/null
@@ -1,334 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_IntersectSplit.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
- * are Copyright 2010-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_Breaking.H"
-#include "AS_BAT_BestOverlapGraph.H"
-
-#include "AS_BAT_IntersectSplit.H"
-
-
-// The original version was filtering breakpoints. It was accepting any break point with more than
-// MIN_BREAK_FRAGS fragments and longer than MIN_BREAK_LENGTH. The shorter ones in between two
-// large break points were (I suspect) analyzed to see if many short break points were piling up in
-// one region. If so, one was selected and accepted into the list of final break points.
-//
-// This filtering was implemented as the first step in breakUnitigAt(). This turned
-// a (supposedly) general purpose unitig breaker into very special case. And added a ton
-// of complexity to the UnitigBreakPoint structure -- it needed to keep all the info needed
-// for filtering.
-
-static const int MIN_BREAK_FRAGS = 1;
-static const int MIN_BREAK_LENGTH = 500;
-
-
-
-intersectionList::intersectionList(UnitigVector &unitigs) {
-
- for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *tig = unitigs[ti];
-
- if (tig == NULL)
- continue;
-
- intersectionEvidence *evidence = new intersectionEvidence [tig->ufpath.size()];
-
- for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
- ufNode *frg = &tig->ufpath[fi];
-
- if (OG->isContained(frg->ident))
- continue;
-
- // For my best overlap, the ID of the unitig that the overlapping fragment is in.
-
- evidence[fi].edge5 = *OG->getBestEdgeOverlap(frg->ident, false);
- evidence[fi].edge3 = *OG->getBestEdgeOverlap(frg->ident, true);
-
- evidence[fi].frag5tig = tig->fragIn(evidence[fi].edge5.fragId());
- evidence[fi].frag3tig = tig->fragIn(evidence[fi].edge3.fragId());
-
- // Do NOT initialize these! An earlier fragment could have already confirmed an end.
- // Properly, only the 5' end of a forward fragment (or 3' end of a reverse fragment) can be
- // confirmed already (otherwise the tig is nonsense), but we don't yet check that.
- //
- //evidence[fi].frag5confirmed = false;
- //evidence[fi].frag3confirmed = false;
-
- // But, because the path could be promiscuous, not every overlap to a different tig is bad.
- //
- // If my best overlap is to a different tig, but there is an overlapping fragment (in the
- // unitig placement) with a best edge to me, I'm still good. The BOG build this unitig using
- // the edge from the other fragment to me.
- //
- // If the fragments do not overlap in the layout (yet the best edge still exists) that is a
- // self-intersection.
- //
- // The two blocks are identical, except for 'edge3' and 'edge5'.
-
- if (evidence[fi].frag5tig == tig->id()) {
- uint32 ti = tig->pathPosition(evidence[fi].edge5.fragId());
- ufNode *trg = &tig->ufpath[ti];
-
- uint32 minf = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 maxf = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- uint32 mint = (trg->position.bgn < trg->position.end) ? trg->position.bgn : trg->position.end;
- uint32 maxt = (trg->position.bgn < trg->position.end) ? trg->position.end : trg->position.bgn;
-
- // If they overlap, mark as confirmed, else remember an intersection.
-
- if (((minf < mint) && (mint < maxf)) || // t begins inside f
- ((mint < minf) && (minf < maxt))) { // f begins inside t
- if (evidence[fi].edge5.frag3p())
- evidence[ti].frag3confirmed = true;
- else
- evidence[ti].frag5confirmed = true;
-
- } else {
- evidence[fi].frag5self = true;
-
- // Not the correct place to report this. Some of these get confirmed by later fragments.
- //writeLog("BUG1 F: %d,%d T %d,%d\n", minf, maxf, mint, maxt);
- //writeLog("INTERSECT from unitig %d frag %d end %d TO unitig %d frag %d end %d (SELF)\n",
- // tig->id(), frg->ident, 5, evidence[fi].frag5tig, evidence[fi].edge5.fragId(), evidence[fi].edge5.frag3p() ? 3 : 5);
- }
- }
-
-
-
- if (evidence[fi].frag3tig == tig->id()) {
- uint32 ti = tig->pathPosition(evidence[fi].edge3.fragId());
- ufNode *trg = &tig->ufpath[ti];
-
- uint32 minf = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 maxf = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- uint32 mint = (trg->position.bgn < trg->position.end) ? trg->position.bgn : trg->position.end;
- uint32 maxt = (trg->position.bgn < trg->position.end) ? trg->position.end : trg->position.bgn;
-
- if (((minf < mint) && (mint < maxf)) || // t begins inside f
- ((mint < minf) && (minf < maxt))) { // f begins inside t
- if (evidence[fi].edge3.frag3p())
- evidence[ti].frag3confirmed = true;
- else
- evidence[ti].frag5confirmed = true;
-
- } else {
- evidence[fi].frag3self = true;
-
- // Not the correct place to report this. Some of these get confirmed by later fragments.
- //writeLog("BUG2 F: %d,%d T %d,%d\n", minf, maxf, mint, maxt);
- //writeLog("INTERSECT from unitig %d frag %d end %d TO unitig %d frag %d end %d (SELF)\n",
- // tig->id(), frg->ident, 3, evidence[fi].frag3tig, evidence[fi].edge3.fragId(), evidence[fi].edge3.frag3p() ? 3 : 5);
- }
- }
- }
-
- //
- // Build the list.
- //
-
- for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
- ufNode *frg = &tig->ufpath[fi];
-
- if ((evidence[fi].frag5tig != 0) &&
- (evidence[fi].frag5tig != tig->id()) &&
- (evidence[fi].frag5confirmed == false))
- isects.push_back(intersectionPoint(evidence[fi].edge5, frg->ident, false, false));
-
- if ((evidence[fi].frag5tig == tig->id()) &&
- (evidence[fi].frag5self == true) &&
- (evidence[fi].frag5confirmed == false))
- isects.push_back(intersectionPoint(evidence[fi].edge5, frg->ident, false, true));
-
- if ((evidence[fi].frag3tig != 0) &&
- (evidence[fi].frag3tig != tig->id()) &&
- (evidence[fi].frag3confirmed == false))
- isects.push_back(intersectionPoint(evidence[fi].edge3, frg->ident, true, false));
-
- if ((evidence[fi].frag3tig == tig->id()) &&
- (evidence[fi].frag3self == true) &&
- (evidence[fi].frag3confirmed == false))
- isects.push_back(intersectionPoint(evidence[fi].edge3, frg->ident, true, true));
- }
-
- delete [] evidence;
- }
-
-
- // Sort the intersections by the ID of the intersected fragment, then build an index into the array.
-
- std::sort(isects.begin(), isects.end());
-
- // Terminate the intersection list with a sentinal intersection. This is CRITICAL
- // to the way we iterate over intersections.
-
- isects.push_back(intersectionPoint(BestEdgeOverlap(), 0, true, true));
-
- // Build a map from fragment id to the first intersection in the list.
-
- for (uint32 i=0; i<isects.size(); i++) {
- isectsNum[isects[i].isectFrg]++;
-
- if (isectsMap.find(isects[i].isectFrg) == isectsMap.end())
- isectsMap[isects[i].isectFrg] = i;
- }
-}
-
-
-intersectionList::~intersectionList() {
-}
-
-
-void
-intersectionList::logIntersections(void) {
-
- for (uint32 ii=0; ii<isects.size(); ii++) {
- intersectionPoint *isect = &isects[ii];
-
- writeLog("INTERSECT from unitig %d frag %d end %d TO unitig %d frag %d end %d\n",
- Unitig::fragIn(isect->isectFrg), isect->isectFrg, isect->isect3p ? 3 : 5,
- Unitig::fragIn(isect->invadFrg), isect->invadFrg, isect->invad3p ? 3 : 5);
- }
-}
-
-
-
-
-
-void
-breakUnitigs(UnitigVector &unitigs,
- char *output_prefix,
- bool enableIntersectionBreaking) {
-
- writeLog("==> BREAKING UNITIGS.\n");
-
- intersectionList *ilist = new intersectionList(unitigs);
-
- // Stop when we've seen all current unitigs. Replace tiMax
- // in the for loop below with unitigs.size() to recursively
- // split unitigs.
-
- uint32 tiMax = unitigs.size();
-
- for (uint32 ti=0; ti<tiMax; ti++) {
- Unitig *tig = unitigs[ti];
-
- if (tig == NULL)
- continue;
-
- vector<breakPoint> breaks;
-
- for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
- ufNode *frg = &tig->ufpath[fi];
- intersectionPoint *isect = ilist->getIntersection(frg->ident, 0);
-
- if (isect == NULL)
- continue;
-
- for (; isect->isectFrg == frg->ident; isect++) {
- assert(tig->id() == Unitig::fragIn(isect->isectFrg));
-
- // Grab the invading unitig
-
- Unitig *inv = unitigs[Unitig::fragIn(isect->invadFrg)];
- assert(inv->id() == Unitig::fragIn(isect->invadFrg));
-
- // Grab the best edges off the invading fragment.
-
- BestEdgeOverlap *best5 = OG->getBestEdgeOverlap(isect->invadFrg, false);
- BestEdgeOverlap *best3 = OG->getBestEdgeOverlap(isect->invadFrg, true);
-
- // Check if the incoming tig is a spur, and we should just ignore it immediately
-
- if ((inv->ufpath.size() == 1) &&
- ((best5->fragId() == 0) ||
- (best3->fragId() == 0))) {
- if (logFileFlagSet(LOG_INTERSECTION_BREAKING))
- writeLog("unitig %d frag %d end %c' into unitig %d frag %d end %c' -- IS A SPUR, skip it\n",
- inv->id(), isect->invadFrg, isect->invad3p ? '3' : '5',
- tig->id(), isect->isectFrg, isect->isect3p ? '3' : '5');
- continue;
- }
-
- // Keep only significant intersections
-
- if ((inv->getLength() > MIN_BREAK_LENGTH) &&
- (inv->ufpath.size() > MIN_BREAK_FRAGS)) {
- if (logFileFlagSet(LOG_INTERSECTION_BREAKING))
- writeLog("unitig %d frag %d end %c' into unitig %d frag %d end %c'\n",
- inv->id(), isect->invadFrg, isect->invad3p ? '3' : '5',
- tig->id(), isect->isectFrg, isect->isect3p ? '3' : '5');
- breaks.push_back(breakPoint(isect->isectFrg, isect->isect3p, true, false));
- }
- } // Over all incoming fragments
-
- // If this is the last fragment, terminate the break point list with a 'fakeEnd' (in AS_BAT_Breaking.cc) break point
- // at the end of the unitig.
-
- if ((fi+1 == tig->ufpath.size()) &&
- (breaks.size() > 0)) {
- breaks.push_back(breakPoint(frg->ident, (frg->position.bgn < frg->position.end), true, false));
- }
- } // Over all fragments in the unitig
-
-
- if (breaks.size() == 0)
- continue;
-
- // Report where breaks occur. 'breaks' is a list, not a vector.
-#if 0
- // We've lost the fields in breaks[i] -- but the reports above aren't updated yet.
- if (logFileFlagSet(LOG_INTERSECTION_BREAKING) ||
- logFileFlagSet(LOG_MATE_SPLIT_COVERAGE_PLOT))
- for (uint32 i=0; i<breaks.size(); i++)
- writeLog("BREAK unitig %d at position %d,%d from inSize %d inFrags %d.\n",
- tig->id(),
- breaks[i].fragPos.bgn,
- breaks[i].fragPos.end,
- breaks[i].inSize,
- breaks[i].inFrags);
-#endif
-
- // Actually do the breaking.
- if (enableIntersectionBreaking)
- breakUnitigAt(unitigs, tig, breaks, true);
-
- breaks.clear();
- } // Over all unitigs
-}
diff --git a/src/bogart/AS_BAT_IntersectSplit.H b/src/bogart/AS_BAT_IntersectSplit.H
deleted file mode 100644
index 16911e8..0000000
--- a/src/bogart/AS_BAT_IntersectSplit.H
+++ /dev/null
@@ -1,151 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_IntersectSplit.H
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-DEC-06 to 2013-AUG-01
- * are Copyright 2010-2011,2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#ifndef INCLUDE_AS_BAT_INTERSECTSPLIT
-#define INCLUDE_AS_BAT_INTERSECTSPLIT
-
-
-
-
-class intersectionEvidence {
-public:
- intersectionEvidence() {
- frag5tig = 0;
- frag3tig = 0;
-
- frag5confirmed = false;
- frag3confirmed = false;
-
- frag5self = false;
- frag3self = false;
- };
- ~intersectionEvidence() {
- };
-
-
- BestEdgeOverlap edge5; // fragID of the frag on our 5' best overlap
- BestEdgeOverlap edge3; //
-
- uint32 frag5tig; // tigID the frag on our 5' best overlap is in
- uint32 frag3tig; //
-
- uint32 frag5confirmed:1; // true if our 5' end is confirmed by a best overlap in the same unitig
- uint32 frag3confirmed:1; //
-
- uint32 frag5self:1; // true if our 5' end is intersecting the same unitig
- uint32 frag3self:1;
-};
-
-
-
-
-class intersectionPoint {
-public:
- intersectionPoint() {
- isectFrg = 0;
- isect3p = false;
-
- invadFrg = 0;
- invad3p = false;
-
- isSelf = false;
- };
- intersectionPoint(BestEdgeOverlap edge, uint32 sId, bool s3p, bool self) {
- isectFrg = edge.fragId();
- isect3p = edge.frag3p();
-
- invadFrg = sId;
- invad3p = s3p;
-
- isSelf = self;
-
- //fprintf(stderr, "intersectionPoint()-- %d/%c' from %d/%c' self=%d\n",
- // isectFrg, isect3p ? '3' : '5',
- // invadFrg, invad3p ? '3' : '5',
- // isSelf);
- };
- ~intersectionPoint() {
- };
-
- bool operator<(const intersectionPoint that) const {
- return(isectFrg < that.isectFrg);
- };
-
- uint32 isectFrg; // Fragment that is intersected into, we split on this.
- bool isect3p; // True if we intersected onto the 3' end of the fragment.
-
- uint32 invadFrg;
- bool invad3p;
-
- bool isSelf;
-};
-
-
-
-
-class intersectionList {
-public:
- intersectionList();
- intersectionList(UnitigVector &unitigs);
- ~intersectionList();
-
- void logIntersections(void);
-
-
- uint32 numIntersections(uint32 fid) {
- if (isectsNum.find(fid) == isectsNum.end())
- return(0);
- return(isectsNum[fid]);
- };
-
- intersectionPoint *getIntersection(uint32 fid, uint32 i) {
-
- if (isectsMap.find(fid) == isectsMap.end())
- return(NULL);
-
- return(&isects[isectsMap[fid] + i]);
- };
-
-private:
- vector<intersectionPoint> isects;
- map<uint32,uint32> isectsNum; // Number of intersections for this fragment
- map<uint32,uint32> isectsMap; // Start index in isects for this fragment
-};
-
-void breakUnitigs(UnitigVector &unitigs, char *output_prefix, bool enableIntersectionBreaking);
-
-#endif // INCLUDE_AS_BAT_INTERSECTSPLIT
diff --git a/src/bogart/AS_BAT_Joining.C b/src/bogart/AS_BAT_Joining.C
deleted file mode 100644
index 33d4688..0000000
--- a/src/bogart/AS_BAT_Joining.C
+++ /dev/null
@@ -1,405 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_Joining.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
- * are Copyright 2010,2012-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_BestOverlapGraph.H"
-
-#include "AS_BAT_Joining.H"
-
-class joinEntry {
-public:
- joinEntry() {
- frFragID = 0;
- toFragID = 0;
-
- frFirst = false;
-
- joinLen = 0;
- };
-
- joinEntry(uint32 frFragID_,
- bool frFirst_,
- uint32 toFragID_,
- bool toFlip_,
- uint32 joinLen_) {
- frFragID = frFragID_;
- toFragID = toFragID_;
-
- frFirst = frFirst_;
- toFlip = toFlip_;
-
- joinLen = joinLen_;
- };
-
- ~joinEntry() {
- };
-
- uint32 frFragID;
- uint32 toFragID;
-
- bool frFirst;
- bool toFlip;
-
- uint32 joinLen;
-
- bool operator<(const joinEntry &that) const { return(joinLen < that.joinLen); };
- bool operator>(const joinEntry &that) const { return(joinLen > that.joinLen); };
-};
-
-
-
-
-
-// A cheap test if this unitig looks like it is a bubble in some other unitig. If both best edges
-// off the end are to the same other unitig, this is probably a bubble.
-static
-bool
-joinUnitigs_looksLikeBubble(Unitig *fr) {
- ufNode *bgn = &fr->ufpath[0];
- ufNode *end = &fr->ufpath[fr->ufpath.size() - 1];
-
- BestEdgeOverlap *bestEdge5 = OG->getBestEdgeOverlap(bgn->ident, (bgn->position.end < bgn->position.bgn));
- BestEdgeOverlap *bestEdge3 = OG->getBestEdgeOverlap(end->ident, (end->position.bgn < end->position.end));
-
- uint32 unitig5 = fr->fragIn(bestEdge5->fragId());
- uint32 unitig3 = fr->fragIn(bestEdge3->fragId());
-
- if (unitig5 == unitig3) {
- //writeLog("joinUnitigs_looksLikeBubble()-- unitig %u looks like a bubble in unitig %u\n",
- // fr->id(), unitig5);
- return(true);
- }
-
- return(false);
-}
-
-
-
-// Examine the first (few?) fragments of a unitig, evaluate if they indicate a join should be made.
-static
-bool
-joinUnitigs_examineEnd(UnitigVector &unitigs,
- Unitig *fr,
- uint32 idx,
- bool frFirstEnd,
- vector<joinEntry> &joins) {
- uint32 frgIdx = (frFirstEnd) ? (idx) : (fr->ufpath.size() - 1 - idx);
- ufNode *frg = &fr->ufpath[frgIdx];
- bool frgRev = (frg->position.end < frg->position.bgn);
-
- // Grab the best edge for this end frag. The last arg requests the 3' end if true.
- //
- // If we're looking at the first read, we want to get:
- // 5' - if the frag is forward
- // 3' - if the frag is reverse (frgRev == true)
- //
- // If we're looking at the lat read, we want to get:
- // 5' - if the frag is reverse
- // 3' - if the frag is forward (frgRev == false)
- //
- BestEdgeOverlap *bestEdge = OG->getBestEdgeOverlap(frg->ident, (frgRev == frFirstEnd));
-
- uint32 tgtId = bestEdge->fragId();
- bool tgt3p = bestEdge->frag3p();
-
- if (tgtId == 0)
- // No best edge? Skip it.
- return(false);
-
- // Grab the unitig for that best edge.
-
- uint32 toID = fr->fragIn(tgtId);
- Unitig *to = unitigs[toID];
-
- if (to->ufpath.size() == 1)
- // Joining to something teeny? Don't bother checking further.
- return(false);
-
- if (to->id() == fr->id())
- // Join to myself? Nope.
- return(false);
-
- // Grab the read we have an edge to, an compute the overlapping length and left over length.
-
- ufNode *tgt = &to->ufpath[to->pathPosition(tgtId)];
- bool tgtRev = (tgt->position.end < tgt->position.bgn);
-
- // If tgt3p (we overlap to the 3' end) is the same as tgtRev (read is reverse) then the unitig is oriented
- // correctly. Otherwise, positions need to be reverse-complemented.
-
-
- bool toFlip = false;
-
- if ((frFirstEnd == true) && (tgt3p == false) && (tgtRev == false))
- // source read is at the start, overlap to 5' and the read is forward, need to flip the target unitig
- toFlip = true;
-
- if ((frFirstEnd == true) && (tgt3p == true) && (tgtRev == true))
- // source read is at the start, overlap to 3' and the read is reverse, need to flip the target unitig
- toFlip = true;
-
-
- if ((frFirstEnd == false) && (tgt3p == false) && (tgtRev == true))
- // source read is at the end, overlap to 5' and the read is reverse, need to flip the target unitig
- toFlip = true;
-
- if ((frFirstEnd == false) && (tgt3p == true) && (tgtRev == false))
- // source read is at the end, overlap to 3' and the read is forward, need to flip the target unitig
- toFlip = true;
-
-
- uint32 toMin = MIN(tgt->position.bgn, tgt->position.end);
- uint32 toMax = MAX(tgt->position.bgn, tgt->position.end);
- uint32 toLen = to->getLength();
- uint32 frLen = fr->getLength();
-
- if (toFlip) {
- toMin = toLen - MAX(tgt->position.bgn, tgt->position.end);
- toMax = toLen - MIN(tgt->position.bgn, tgt->position.end);
- }
-
- assert(toMin < toMax);
-
- // Our two unitigs are of length frLen and toLen. We are appending some portion of 'to' onto
- // 'fr', and 'discarding' the rest. If the 'discarded' piece is larger than the 'fr' unitig, we
- // don't want to do the join.
- //
- // We err on the side of the discarded piece.
-
- uint32 joinLen = 0;
- uint32 discLen = 0;
-
- if (frFirstEnd == true) {
- joinLen = toMin + frLen; // Prepend the start of 'to' onto 'fr'.
- discLen = toLen - toMin;
-
- } else {
- joinLen = frLen + toLen - toMax; // Append the end of 'to' onto 'fr'.
- discLen = toMax;
- }
-
- // If the discard is bigger than us, we do damage by joining.
-
- if (discLen > frLen)
- return(false);
-
- // The joined should be much larger and the discarded much smaller.
-
- uint32 maxLen = MAX(frLen, toLen);
- uint32 minLen = MIN(frLen, toLen);
-
- double joinChange = (double)joinLen / maxLen;
- double discChange = (double)discLen / minLen;
-
- bool isBad = false;
-
- if ((joinChange < 1.10) ||
- (0.75 < discChange))
- // Bad if we didn't really change sizes.
- isBad = true;
-
- if ((1.0 < joinChange) &&
- (discChange < 0.5))
- // But good if discard is tiny. This occurs if we merge a small with a big. The join change
- // is somewhat small (1.05 say) yet most of the smaller unitig is used.
- isBad = false;
-
- if (isBad) {
- writeLog("joinUnitigs_examineEnd()-- join unitig %6u (%7ubp) frag %6u %s <-> unitig %6u (%7ubp) frag %6u %s <-> length %5.2f %7u and %5.2f %7u BAD\n",
- fr->id(), fr->getLength(), frg->ident, (frgRev) ? "rev" : "fwd",
- to->id(), to->getLength(), tgt->ident, (tgtRev) ? "rev" : "fwd",
- joinChange, joinLen,
- discChange, discLen);
- return(false);
- }
-
- // OK, join.
-
- writeLog("joinUnitigs_examineEnd()-- join unitig %6u (%7ubp) frag %6u %s <-> unitig %6u (%7ubp) frag %6u %s <-> length %5.2f %7u and %5.2f %7u\n",
- fr->id(), fr->getLength(), frg->ident, (frgRev) ? "rev" : "fwd",
- to->id(), to->getLength(), tgt->ident, (tgtRev) ? "rev" : "fwd",
- joinChange, joinLen,
- discChange, discLen);
-
- joins.push_back(joinEntry(frg->ident, frFirstEnd, tgt->ident, toFlip, joinLen));
-
- return(true);
-}
-
-
-
-
-
-
-
-
-
-static
-void
-joinUnitigs_append(UnitigVector &unitigs, joinEntry *join) {
- uint32 frId = Unitig::fragIn(join->frFragID);
- uint32 toId = Unitig::fragIn(join->toFragID);
-
- Unitig *fr = unitigs[frId];
- Unitig *to = unitigs[toId];
-
- uint32 frIdx = Unitig::pathPosition(join->frFragID);
- uint32 toIdx = Unitig::pathPosition(join->toFragID);
-
- // The 'fr' unitig is assumed to be forward, and assumed to be the one we join to.
-
- // Compute the offset for our append. We just need to compute where the join fragment would
- // appear in the unitig. The join fragment MUST be the first thing in the frUnitig.
-
- //int32 offset = MIN(frF.position.bgn, frF.position.end);
-
- // Over all fragments in the frUnitig, add them to either the joinUnitig or the discUnitig.
-
- Unitig *joinUnitig = unitigs.newUnitig(false);
- Unitig *discUnitig = unitigs.newUnitig(false);
-
- // Reverse the 'to' unitig if needed.
-
- if (join->toFlip)
- to->reverseComplement(true);
-
- // If we're joining off the 5' end of the fr untiig, add the to reads first.
-
- if (join->frFirst == true) {
- uint32 ii=0;
-
- for (; ii < toIdx; ii++)
- joinUnitig->addFrag(to->ufpath[ii], 0, false);
-
- for (; ii < to->ufpath.size(); ii++)
- discUnitig->addFrag(to->ufpath[ii], 0, false);
- }
-
- // Now add all the fr unitig reads.
-
- for (uint32 ii=0; ii < fr->ufpath.size(); ii++)
- joinUnitig->addFrag(to->ufpath[ii], 0, false);
-
- // If we're not joining off the 5' end, add the to unitig reads last.
-
- if (join->frFirst == false) {
- uint32 ii = 0;
-
- for (; ii < toIdx; ii++)
- discUnitig->addFrag(to->ufpath[ii], 0, false);
-
- for (; ii < to->ufpath.size(); ii++)
- joinUnitig->addFrag(to->ufpath[ii], 0, false);
- }
-
- // Delete the donor unitigs.
-
- delete fr;
- delete to;
-
- unitigs[frId] = NULL;
- unitigs[toId] = NULL;
-
- // And make sure the new unitigs are consistent.
-
- joinUnitig->sort();
- discUnitig->sort();
-}
-
-
-
-
-
-void
-joinUnitigs(UnitigVector &unitigs, bool enableJoining) {
-
- if (enableJoining == false)
- return;
-
- writeLog("==> JOINING SPLIT UNITIGS\n");
-
- // Sort unitigs by joined size. Sort. Join the largest first.
-
- vector<joinEntry> joins;
-
- // Over all unitigs, evaluate if a unitig is a candidate for merging onto something.
-
- for (uint32 frID=0; frID<unitigs.size(); frID++) {
- Unitig *fr = unitigs[frID];
-
- if (fr == NULL)
- // Ain't no unitig here, mister!
- continue;
-
- if (fr->ufpath.size() < 2)
- // Ain't no real unitig here, mister!
- continue;
-
- // Do we look like a bubble?
-
- if (joinUnitigs_looksLikeBubble(fr))
- continue;
-
- // The for loop tries reads close to the end - but we don't support joining these.
-
- for (uint32 ii=0; (ii < 1) && (ii < fr->ufpath.size()); ii++)
- if (joinUnitigs_examineEnd(unitigs, fr, ii, true, joins))
- break;
-
-
- for (uint32 ii=0; (ii < 1) && (ii < fr->ufpath.size()); ii++)
- if (joinUnitigs_examineEnd(unitigs, fr, ii, false, joins))
- break;
- } // Over all unitigs.
-
-
- writeLog("Found %d pairs of unitigs to join.\n", (int)joins.size());
-
- std::sort(joins.begin(), joins.end(), greater<joinEntry>());
-
-
- return;
-
-
- for (uint32 j=0; j<joins.size(); j++) {
- joinEntry *join = &joins[j];
-
- //joinUnitigs_append(unitigs, join);
- }
-}
-
diff --git a/src/bogart/AS_BAT_Logging.C b/src/bogart/AS_BAT_Logging.C
index 879aace..08aaff4 100644
--- a/src/bogart/AS_BAT_Logging.C
+++ b/src/bogart/AS_BAT_Logging.C
@@ -40,7 +40,7 @@
class logFileInstance {
public:
logFileInstance() {
- file = NULL;
+ file = stderr;
name[0] = 0;
part = 0;
length = 0;
@@ -65,6 +65,9 @@ public:
};
void rotate(void) {
+
+ assert(name[0] != 0);
+
fclose(file);
file = NULL;
@@ -115,38 +118,26 @@ logFileInstance *logFileThread = NULL; // For writes during threaded portion
uint32 logFileOrder = 0;
uint64 logFileFlags = 0;
-uint64 LOG_OVERLAP_QUALITY = 0x0000000000000001; // Debug, scoring of overlaps
-uint64 LOG_OVERLAPS_USED = 0x0000000000000002; // Report overlaps used/not used
+uint64 LOG_OVERLAP_SCORING = 0x0000000000000001; // Debug, scoring of overlaps
+uint64 LOG_ALL_BEST_EDGES = 0x0000000000000002;
uint64 LOG_CHUNK_GRAPH = 0x0000000000000004; // Report the chunk graph as we build it
-uint64 LOG_INTERSECTIONS = 0x0000000000000008; // Report intersections found when building initial unitigs
-uint64 LOG_POPULATE_UNITIG = 0x0000000000000010; // Report building of initial unitigs (both unitig creation and fragment placement)
-uint64 LOG_INTERSECTION_BREAKING = 0x0000000000000020; //
-uint64 LOG_INTERSECTION_BUBBLES = 0x0000000000000040; //
-uint64 LOG_INTERSECTION_BUBBLES_DEBUG = 0x0000000000000080; //
-uint64 LOG_INTERSECTION_JOINING = 0x0000000000000100; //
-uint64 LOG_INTERSECTION_JOINING_DEBUG = 0x0000000000000200; //
-uint64 LOG_SPLIT_DISCONTINUOUS = 0x0000000000000400; //
-uint64 LOG_INITIAL_CONTAINED_PLACEMENT = 0x0000000000000800; //
-uint64 LOG_HAPPINESS = 0x0000000000001000; //
-uint64 LOG_INTERMEDIATE_UNITIGS = 0x0000000000002000; // At various spots, dump the current unitigs
-uint64 LOG_SET_PARENT_AND_HANG = 0x0000000000004000; //
-uint64 LOG_STDERR = 0x0000000000008000; // Write ALL logging to stderr, not the files.
+uint64 LOG_BUILD_UNITIG = 0x0000000000000008; // Report building of initial unitigs (both unitig creation and fragment placement)
+uint64 LOG_PLACE_UNPLACED = 0x0000000000000010; // Report placing of unplaced reads
+uint64 LOG_BUBBLE_DETAIL = 0x0000000000000020;
+uint64 LOG_SPLIT_DISCONTINUOUS = 0x0000000000000040; //
+uint64 LOG_INTERMEDIATE_UNITIGS = 0x0000000000000080; // At various spots, dump the current unitigs
+uint64 LOG_SET_PARENT_AND_HANG = 0x0000000000000100; //
+uint64 LOG_STDERR = 0x0000000000000200; // Write ALL logging to stderr, not the files.
uint64 LOG_PLACE_FRAG = 0x8000000000000000; // Internal use only.
-char const *logFileFlagNames[64] = { "overlapQuality",
- "overlapsUsed",
+char const *logFileFlagNames[64] = { "overlapScoring",
+ "allBestEdges",
"chunkGraph",
- "intersections",
- "populate",
- "intersectionBreaking",
- "intersectionBubbles",
- "intersectionBubblesDebug",
- "intersectionJoining",
- "intersectionJoiningDebug",
- "splitDiscontinuous",
- "containedPlacement",
- "happiness",
+ "buildUnitig",
+ "placeUnplaced",
+ "bubbles",
+ "splitDiscontinuous", // Update made it to here, need repeats
"intermediateUnitigs",
"setParentAndHang",
"stderr",
@@ -160,15 +151,16 @@ setLogFile(char const *prefix, char const *label) {
assert(prefix != NULL);
- if (logFileFlagSet(LOG_STDERR))
- // Write everything to stderr
- return;
-
// Allocate space.
if (logFileThread == NULL)
logFileThread = new logFileInstance [omp_get_max_threads()];
+ // If writing to stderr, that's all we needed to do.
+
+ if (logFileFlagSet(LOG_STDERR))
+ return;
+
// Close out the old.
logFileMain.close();
@@ -176,7 +168,6 @@ setLogFile(char const *prefix, char const *label) {
for (int32 tn=0; tn<omp_get_max_threads(); tn++)
logFileThread[tn].close();
-
// Move to the next iteration.
logFileOrder++;
@@ -205,7 +196,6 @@ writeLog(char const *fmt, ...) {
logFileInstance *lf = (nt == 1) ? (&logFileMain) : (&logFileThread[tn]);
// Rotate the log file please, HAL.
- // AS_UTL_sizeOfFile(lf->name) > 512 * 1024 * 1024)
uint64 maxLength = 512 * 1024 * 1024;
@@ -216,11 +206,6 @@ writeLog(char const *fmt, ...) {
lf->rotate();
}
- // Default to stderr if no name set.
-
- if (lf->name[0] == 0)
- lf->file = stderr;
-
// Open the file if needed.
if (lf->file == NULL)
@@ -234,3 +219,16 @@ writeLog(char const *fmt, ...) {
va_end(ap);
}
+
+
+
+void
+flushLog(void) {
+ int32 nt = omp_get_num_threads();
+ int32 tn = omp_get_thread_num();
+
+ logFileInstance *lf = (nt == 1) ? (&logFileMain) : (&logFileThread[tn]);
+
+ if (lf->file != NULL)
+ fflush(lf->file);
+}
diff --git a/src/bogart/AS_BAT_Logging.H b/src/bogart/AS_BAT_Logging.H
index 43aa675..246e873 100644
--- a/src/bogart/AS_BAT_Logging.H
+++ b/src/bogart/AS_BAT_Logging.H
@@ -56,25 +56,20 @@
void setLogFile(char const *prefix, char const *name);
void writeLog(char const *fmt, ...);
+void flushLog(void);
#define logFileFlagSet(L) ((logFileFlags & L) == L)
extern uint64 logFileFlags;
extern uint32 logFileOrder; // Used debug tigStore dumps, etc
-extern uint64 LOG_OVERLAP_QUALITY;
-extern uint64 LOG_OVERLAPS_USED;
+extern uint64 LOG_OVERLAP_SCORING;
+extern uint64 LOG_ALL_BEST_EDGES;
extern uint64 LOG_CHUNK_GRAPH;
-extern uint64 LOG_INTERSECTIONS;
-extern uint64 LOG_POPULATE_UNITIG;
-extern uint64 LOG_INTERSECTION_BREAKING;
-extern uint64 LOG_INTERSECTION_BUBBLES;
-extern uint64 LOG_INTERSECTION_BUBBLES_DEBUG;
-extern uint64 LOG_INTERSECTION_JOINING;
-extern uint64 LOG_INTERSECTION_JOINING_DEBUG;
+extern uint64 LOG_BUILD_UNITIG;
+extern uint64 LOG_PLACE_UNPLACED;
+extern uint64 LOG_BUBBLE_DETAIL;
extern uint64 LOG_SPLIT_DISCONTINUOUS;
-extern uint64 LOG_INITIAL_CONTAINED_PLACEMENT;
-extern uint64 LOG_HAPPINESS;
extern uint64 LOG_INTERMEDIATE_UNITIGS;
extern uint64 LOG_SET_PARENT_AND_HANG;
extern uint64 LOG_STDERR;
diff --git a/src/bogart/AS_BAT_MarkRepeatReads.C b/src/bogart/AS_BAT_MarkRepeatReads.C
new file mode 100644
index 0000000..7dc5abe
--- /dev/null
+++ b/src/bogart/AS_BAT_MarkRepeatReads.C
@@ -0,0 +1,1133 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_OverlapCache.H"
+#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
+
+#include "intervalList.H"
+#include "stddev.H"
+
+#include <vector>
+
+using namespace std;
+
+
+
+// Hack.
+uint32 MIN_ANCHOR_HANG = 500; // Require reads to be anchored by this many bases at boundaries of repeats.
+int32 REPEAT_OVERLAP_MIN = 50;
+
+#define REPEAT_FRACTION 0.5
+
+#undef SHOW_ANNOTATION_RAW // Show all overlaps used to annotate reads
+#undef SHOW_ANNOTATION_RAW_FILTERED // Show all overlaps filtered by high error rate
+
+#undef DUMP_READ_COVERAGE
+
+// Each evidence read picks its single best overlap to tig (based on overlaps to reads in the tig).
+// Filter out evidence that aligns at erate higher than expected.
+// Collapse to intervals on tig.
+// If still not significant and not spanned, break.
+
+
+
+class olapDat {
+public:
+ olapDat(uint32 b, uint32 e, uint32 t, uint32 r) {
+ tigbgn = b;
+ tigend = e;
+ eviTid = t;
+ eviRid = r;
+ };
+
+ bool operator<(const olapDat &that) const { return(tigbgn < that.tigbgn); };
+
+ uint32 tigbgn; // Location of the overlap on this tig
+ uint32 tigend; //
+
+ uint32 eviTid; // tig that the evidence read came from
+ uint32 eviRid; // evidence read
+};
+
+
+bool
+olapDatByEviRid(const olapDat &A, const olapDat &B) {
+ if (A.eviRid == B.eviRid)
+ return(A.tigbgn < B.tigbgn);
+
+ return(A.eviRid < B.eviRid);
+}
+
+
+
+
+class breakPointCoords {
+public:
+ breakPointCoords(uint32 tigID, int32 bgn, int32 end, bool rpt=false) {
+ _tigID = tigID;
+ _bgn = bgn;
+ _end = end;
+ _isRepeat = rpt;
+ };
+ ~breakPointCoords() {
+ };
+
+ bool operator<(breakPointCoords const &that) const {
+ return(_bgn < that._bgn);
+ };
+
+ uint32 _tigID;
+ int32 _bgn;
+ int32 _end;
+ bool _isRepeat;
+};
+
+
+
+
+
+// Returns the coordinates the overlap intersects on the A read.
+//
+// lo hi
+// v v
+// ------------
+// ----------
+void
+olapToReadCoords(ufNode *frg,
+ int32 ahang, int32 bhang,
+ int32 &lo, int32 &hi) {
+
+ lo = 0;
+ hi = FI->fragmentLength(frg->ident);
+
+ if (ahang > 0)
+ lo += ahang; // Positive hang!
+
+ if (bhang < 0)
+ hi += bhang; // Negative hang!
+
+ assert(0 <= lo);
+ assert(0 <= hi);
+ assert(lo <= hi);
+ assert(lo <= FI->fragmentLength(frg->ident));
+ assert(hi <= FI->fragmentLength(frg->ident));
+}
+
+
+
+void
+findUnitigCoverage(Unitig *tig,
+ intervalList<uint32> &coverage) {
+ intervalList<uint32> rawcoverage;
+
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode frg = tig->ufpath[fi];
+
+ if (frg.position.bgn < frg.position.end)
+ rawcoverage.add(frg.position.bgn, frg.position.end - frg.position.bgn);
+ else
+ rawcoverage.add(frg.position.end, frg.position.bgn - frg.position.end);
+ }
+
+ coverage.clear();
+ coverage.depth(rawcoverage);
+
+#ifdef DUMP_READ_COVERAGE
+ char fn[FILENAME_MAX];
+ sprintf(fn, "%08u.coverage", tig->id());
+ FILE *F = fopen(fn, "w");
+
+ for (uint32 ii=0; ii<coverage.numberOfIntervals(); ii++)
+ fprintf(F, "%u %u %u\n", coverage.lo(ii), coverage.hi(ii), coverage.depth(ii));
+
+ fclose(F);
+#endif
+}
+
+
+
+
+
+
+
+
+uint32
+splitUnitigs(UnitigVector &unitigs,
+ Unitig *tig,
+ vector<breakPointCoords> &BP,
+ Unitig **newTigs,
+ int32 *lowCoord,
+ uint32 *nRepeat,
+ uint32 *nUnique,
+ bool doMove) {
+ uint32 nTigsCreated = 0;
+
+ if (doMove == true) {
+ memset(newTigs, 0, sizeof(Unitig *) * BP.size());
+ memset(lowCoord, 0, sizeof(int32) * BP.size());
+ } else {
+ memset(nRepeat, 0, sizeof(uint32) * BP.size());
+ memset(nUnique, 0, sizeof(uint32) * BP.size());
+ }
+
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode &frg = tig->ufpath[fi];
+ int32 frgbgn = min(frg.position.bgn, frg.position.end);
+ int32 frgend = max(frg.position.bgn, frg.position.end);
+
+ // Search for the region that matches the read. BP's are sorted in increasing order. It
+ // probably doesn't matter, but makes the logging a little easier to read.
+
+ uint32 rid = UINT32_MAX;
+ bool rpt = false;
+
+ //fprintf(stderr, "Searching for placement for read %u at %u-%u\n", frg.ident, frgbgn, frgend);
+
+ for (uint32 ii=0; ii<BP.size(); ii++) {
+ int32 rgnbgn = BP[ii]._bgn;
+ int32 rgnend = BP[ii]._end;
+ bool repeat = BP[ii]._isRepeat;
+
+ // For repeats, the read must be contained fully.
+
+ if ((repeat == true) && (rgnbgn <= frgbgn) && (frgend <= rgnend)) {
+ rid = ii;
+ rpt = true;
+ break;
+ }
+
+ // For non-repeat, the read just needs to intersect.
+
+ if ((repeat == false) && (rgnbgn < frgend) && (frgbgn < rgnend)) {
+ rid = ii;
+ rpt = false;
+ break;
+ }
+ }
+
+ if (rid == UINT32_MAX) {
+ fprintf(stderr, "Failed to place read %u at %u-%u\n", frg.ident, frgbgn, frgend);
+ for (uint32 ii=0; ii<BP.size(); ii++)
+ fprintf(stderr, "Breakpoints %2u %8u-%8u repeat %u\n", ii, BP[ii]._bgn, BP[ii]._end, BP[ii]._isRepeat);
+ }
+ assert(rid != UINT32_MAX); // We searched all the BP's, the read had better be placed!
+
+ // If moving reads, move the read!
+
+ if (doMove) {
+ if (newTigs[rid] == NULL) {
+ lowCoord[rid] = frgbgn;
+
+ newTigs[rid] = unitigs.newUnitig(true); // LOG_ADDUNITIG_BREAKING
+
+ if (nRepeat[rid] > nUnique[rid])
+ newTigs[rid]->_isRepeat = true;
+ }
+
+ newTigs[rid]->addFrag(frg, -lowCoord[rid], false); //LOG_ADDFRAG_BREAKING);
+ }
+
+ // Else, we're not moving, just count how many reads came from repeats or uniques.
+
+ else {
+ if (rpt)
+ nRepeat[rid]++;
+ else
+ nUnique[rid]++;
+ }
+ }
+
+ // Return the number of tigs created.
+
+ for (uint32 ii=0; ii<BP.size(); ii++)
+ if (nRepeat[ii] + nUnique[ii] > 0)
+ nTigsCreated++;
+
+ return(nTigsCreated);
+}
+
+
+
+
+
+
+// For each overlap, if the b-read is in this tig, ignore it.
+// Otherwise annotate the read with the overlap region.
+//
+// Later, check if the two reads in this unitig overlap; if not, annotate also.
+//
+void
+annotateRepeatsOnRead(UnitigVector &unitigs,
+ Unitig *tgA,
+ ufNode *rdA,
+ double deviationRepeat,
+ vector<olapDat> &repeats) {
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(rdA->ident, AS_MAX_ERATE, ovlLen);
+
+ vector<olapDat> readOlaps; // List of valid repeat overlaps to this read
+
+ uint32 tgAid = tgA->id();
+
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ assert(rdAlo < rdAhi);
+
+ // Beacuse the read is placed with a lot of fudging in the positions, we need
+ // to scale the coordinates we compute here.
+ double sc = (rdAhi - rdAlo) / (double)FI->fragmentLength(rdA->ident);
+
+ // For all overlaps to this read, save the overlap if it is not represented in this tig.
+
+ uint32 nOlaps = ovlLen;
+ uint32 nDiff = 0; // Overlap to different tig
+ uint32 nSelf = 0; // Overlap to same tig, different location
+ uint32 nConf = 0; // Overlap to same tig, confirmed good overlap
+
+ for (uint32 oi=0; oi<ovlLen; oi++) {
+ uint32 rdBid = ovl[oi].b_iid;
+ uint32 tgBid = Unitig::fragIn(rdBid);
+
+ int32 bgn = 0; // Position in the read that
+ int32 end = 0; // the overlap covers
+
+ // If the read is in a singleton, skip. These are unassembled crud.
+ if ((tgBid == 0) ||
+ (unitigs[tgBid] == NULL) ||
+ (unitigs[tgBid]->ufpath.size() == 1))
+ continue;
+
+ // If the read is in an annotated bubble, skip.
+ if (unitigs[tgBid]->_isBubble)
+ continue;
+
+ // If the overlap is to a container read, skip it.
+ if ((ovl[oi].a_hang < 0) && (ovl[oi].b_hang > 0))
+ continue;
+
+ // If the overlap is to a contained read, skip it.
+ if ((ovl[oi].a_hang > 0) && (ovl[oi].b_hang < 0))
+ continue;
+
+ uint32 rdBpos = unitigs[tgBid]->pathPosition(rdBid);
+ ufNode *rdB = &unitigs[tgBid]->ufpath[rdBpos];
+
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ assert(rdBlo < rdBhi);
+
+ // If the overlap is to a read in a different tig, save it.
+ if (tgBid != tgAid) {
+ nDiff++;
+ olapToReadCoords(rdA, ovl[oi].a_hang, ovl[oi].b_hang, bgn, end);
+ }
+
+ // If the overlap is to a read in the same tig, but we don't overlap in the tig, save it.
+ else if ((rdAhi < rdBlo) || (rdBhi < rdAlo)) {
+ nSelf++;
+ olapToReadCoords(rdA, ovl[oi].a_hang, ovl[oi].b_hang, bgn, end);
+ }
+
+ // Otherwise, the overlap is present in the tig, and can't indicate a repeat.
+ else {
+ nConf++;
+ continue;
+ }
+
+ // Find the positions of the read that are covered by the overlap.
+
+ int32 tigbgn = (rdAfwd) ? (rdAlo + sc * bgn) : (rdAhi - sc * end);
+ int32 tigend = (rdAfwd) ? (rdAlo + sc * end) : (rdAhi - sc * bgn);
+
+ assert(tigbgn < tigend);
+
+ if (tigbgn < 0) tigbgn = 0;
+ if (tigend > tgA->getLength()) tigend = tgA->getLength();
+
+ // Filter overlaps that are higher error than expected.
+
+ double consistent = tgA->overlapConsistentWithTig(deviationRepeat, tigbgn, tigend, ovl[oi].erate);
+
+ if (consistent < REPEAT_FRACTION) {
+#ifdef SHOW_ANNOTATION_RAW_FILTERED
+ writeLog("tig %6u read %7u %8u-%8u OVERLAP from tig %6u read %7u %8u-%8u at tigpos %8u-%8u erate %.6f consistent %.4f FILTERED\n",
+ tgAid, rdA->ident, rdAlo, rdAhi,
+ tgBid, rdBid, rdBlo, rdBhi,
+ tigbgn, tigend, ovl[oi].erate, consistent);
+#endif
+ continue;
+ }
+
+#ifdef SHOW_ANNOTATION_RAW
+ writeLog("tig %6u read %7u %8u-%8u OVERLAP from tig %6u read %7u %8u-%8u at tigpos %8u-%8u erate %.6f consistent %.4f\n",
+ tgAid, rdA->ident, rdAlo, rdAhi,
+ tgBid, rdBid, rdBlo, rdBhi,
+ tigbgn, tigend, ovl[oi].erate, consistent);
+#endif
+
+ readOlaps.push_back(olapDat(tigbgn, tigend, tgBid, rdBid));
+ }
+
+ // All overlaps processed. Save to the master list.
+
+#pragma omp critical (repeatsPushBack)
+ for (uint32 rr=0; rr<readOlaps.size(); rr++)
+ repeats.push_back(readOlaps[rr]);
+}
+
+
+
+
+void
+markRepeatReads(UnitigVector &unitigs,
+ double deviationRepeat,
+ uint32 confusedAbsolute,
+ double confusedPercent) {
+ uint32 tiLimit = unitigs.size();
+ uint32 numThreads = omp_get_max_threads();
+ uint32 blockSize = (tiLimit < 100000 * numThreads) ? numThreads : tiLimit / 99999;
+
+ writeLog("repeatDetect()-- working on "F_U32" unitigs, with "F_U32" threads.\n", tiLimit, numThreads);
+
+ vector<olapDat> repeatOlaps; // Overlaps to reads promoted to tig coords
+
+ intervalList<int32> tigMarksR; // Marked repeats based on reads, filtered by spanning reads
+ intervalList<int32> tigMarksU; // Non-repeat invervals, just the inversion of tigMarksR
+
+
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ Unitig *tig = unitigs[ti];
+
+ if (tig == NULL)
+ continue;
+
+ if (tig->ufpath.size() == 1)
+ continue;
+
+ vector<olapDat> repeats;
+
+ writeLog("Annotating repeats in reads for tig %u/%u.\n", ti, tiLimit);
+
+ // Clear out all the existing marks. They're not for this tig.
+
+
+ // Analyze overlaps for each read. For each overlap to a read not in this tig, or not
+ // overlapping in this tig, and of acceptable error rate, add the overlap to repeatOlaps.
+
+ repeatOlaps.clear();
+
+ uint32 fiLimit = tig->ufpath.size();
+ uint32 numThreads = omp_get_max_threads();
+ uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
+
+#pragma omp parallel for if(fiLimit > 100) schedule(dynamic, blockSize)
+ for (uint32 fi=0; fi<fiLimit; fi++)
+ annotateRepeatsOnRead(unitigs, tig, &tig->ufpath[fi], deviationRepeat, repeatOlaps);
+
+ writeLog("Annotated with %lu overlaps.\n", repeatOlaps.size());
+
+ // Merge marks for the same read into the largest possible.
+
+ sort(repeatOlaps.begin(), repeatOlaps.end(), olapDatByEviRid);
+
+#ifdef SHOW_ANNOTATE
+ for (uint32 ii=0; ii<repeatOlaps.size(); ii++)
+ if (repeatOlaps[ii].tigbgn < 1000000)
+ writeLog("repeatOlaps[%u] %u-%u from tig %u read %u RAW\n",
+ ii,
+ repeatOlaps[ii].tigbgn, repeatOlaps[ii].tigend,
+ repeatOlaps[ii].eviTid, repeatOlaps[ii].eviRid);
+
+ flushLog();
+#endif
+
+ for (uint32 dd=0, ss=1; ss<repeatOlaps.size(); ss++) {
+ assert(repeatOlaps[dd].eviRid <= repeatOlaps[ss].eviRid);
+
+ // If different evidence reads, close the destination olap, set up
+ // for a new destination.
+
+ if (repeatOlaps[dd].eviRid != repeatOlaps[ss].eviRid) {
+ dd = ss;
+ continue;
+ }
+
+ // If the destination ends before the source begins, there is no overlap between the
+ // two regions. Close dd, set up for a new dd.
+
+ if (repeatOlaps[dd].tigend <= repeatOlaps[ss].tigbgn) {
+ dd = ss;
+ continue;
+ }
+
+ // Otherwise, there must be an overlap. Extend the destination region, erase the source
+ // region.
+
+ repeatOlaps[dd].tigbgn = min(repeatOlaps[ss].tigbgn, repeatOlaps[dd].tigbgn);
+ repeatOlaps[dd].tigend = max(repeatOlaps[ss].tigend, repeatOlaps[dd].tigend);
+
+ repeatOlaps[ss].tigbgn = UINT32_MAX;
+ repeatOlaps[ss].tigend = UINT32_MAX;
+ repeatOlaps[ss].eviTid = UINT32_MAX;
+ repeatOlaps[ss].eviRid = UINT32_MAX;
+ }
+
+ // Sort overlaps again. This pushes all those 'erased' regions to the end of the list, which
+ // we can then just pop off.
+
+ sort(repeatOlaps.begin(), repeatOlaps.end(), olapDatByEviRid);
+
+ for (uint32 ii=repeatOlaps.size(); ii--; )
+ if (repeatOlaps[ii].eviTid == UINT32_MAX)
+ repeatOlaps.pop_back();
+
+ // For logging, sort by coordinate
+
+ sort(repeatOlaps.begin(), repeatOlaps.end());
+
+#ifdef SHOW_ANNOTATE
+ for (uint32 ii=0; ii<repeatOlaps.size(); ii++)
+ writeLog("repeatOlaps[%d] %u-%u from tig %u read %u MERGED\n",
+ ii,
+ repeatOlaps[ii].tigbgn, repeatOlaps[ii].tigend,
+ repeatOlaps[ii].eviTid, repeatOlaps[ii].eviRid);
+#endif
+
+ // Make a new set of intervals based on all the detected repeats.
+
+ tigMarksR.clear();
+
+ for (uint32 bb=0, ii=0; ii<repeatOlaps.size(); ii++)
+ tigMarksR.add(repeatOlaps[ii].tigbgn, repeatOlaps[ii].tigend - repeatOlaps[ii].tigbgn);
+
+ // Collapse these markings Collapse all the read markings to intervals on the unitig, merging those that overlap
+ // significantly.
+
+ writeLog("Merge marks.\n");
+
+ tigMarksR.merge(REPEAT_OVERLAP_MIN);
+
+ // Scan reads, discard any mark that is contained in a read
+ //
+ // We don't need to filterShort() after every one is removed, but it's simpler to do it Right Now than
+ // to track if it is needed.
+
+ writeLog("Scan reads to discard spanned repeats.\n");
+
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode *frg = &tig->ufpath[fi];
+ bool frgfwd = (frg->position.bgn < frg->position.end);
+ int32 frglo = (frgfwd) ? frg->position.bgn : frg->position.end;
+ int32 frghi = (frgfwd) ? frg->position.end : frg->position.bgn;
+ bool discarded = false;
+
+ for (uint32 ri=0; ri<tigMarksR.numberOfIntervals(); ri++) {
+ bool spanLo = false;
+ bool spanHi = false;
+
+ // The decision of 'spanned by a read' is broken into two pieces: does the read span the
+ // lower (higher) boundary of the region. To be spanned, the boundary needs to be spanned
+ // by at least MIN_ANCHOR_HANG additional bases (to anchor the read to non-repeat
+ // sequence).
+ //
+ // This is a problem at the start/end of the tig, beacuse no read will extend past the
+ // start/end of the tig. Instead, if the repeat is contained within the first (last) read
+ // with no extension at the respective end, it is spanned.
+
+ if ((frglo == 0) && // Read at start of tig, spans off the high end
+ (tigMarksR.hi(ri) + MIN_ANCHOR_HANG <= frghi))
+ spanLo = spanHi = true;
+
+ if ((frghi == tig->getLength()) && // Read at end of tig, spans off the low end
+ (frglo + MIN_ANCHOR_HANG <= tigMarksR.lo(ri)))
+ spanLo = spanHi = true;
+
+ if (frglo + MIN_ANCHOR_HANG <= tigMarksR.lo(ri)) // Read spanned off the low end
+ spanLo = true;
+
+ if (tigMarksR.hi(ri) + MIN_ANCHOR_HANG <= frghi) // Read spanned off the high end
+ spanHi = true;
+
+ if (spanLo && spanHi) {
+ writeLog("discard region %8d:%-8d - contained in read %6u %8d-%8d\n",
+ tigMarksR.lo(ri), tigMarksR.hi(ri), frg->ident, frglo, frghi);
+
+ tigMarksR.lo(ri) = 0;
+ tigMarksR.hi(ri) = 0;
+
+ discarded = true;
+ }
+ }
+
+
+ if (discarded)
+ tigMarksR.filterShort(1);
+ }
+
+ // Run through again, looking for the thickest overlap(s) to the remaining regions.
+ // This isn't caring about the end effect noted above.
+
+#if 1
+ writeLog("thickest edges to the repeat regions:\n");
+
+ for (uint32 ri=0; ri<tigMarksR.numberOfIntervals(); ri++) {
+ uint32 t5 = UINT32_MAX, l5 = 0, t5bgn = 0, t5end = 0;
+ uint32 t3 = UINT32_MAX, l3 = 0, t3bgn = 0, t3end = 0;
+
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode *frg = &tig->ufpath[fi];
+ bool frgfwd = (frg->position.bgn < frg->position.end);
+ int32 frglo = (frgfwd) ? frg->position.bgn : frg->position.end;
+ int32 frghi = (frgfwd) ? frg->position.end : frg->position.bgn;
+ bool discarded = false;
+
+ // Overlap off the 5' end of the region.
+ if (frglo <= tigMarksR.lo(ri) && (tigMarksR.lo(ri) <= frghi)) {
+ uint32 olap = frghi - tigMarksR.lo(ri);
+ if (l5 < olap) {
+ l5 = olap;
+ t5 = fi;
+ t5bgn = frglo; // Easier than recomputing it later on...
+ t5end = frghi;
+ }
+ }
+
+ // Overlap off the 3' end of the region.
+ if (frglo <= tigMarksR.hi(ri) && (tigMarksR.hi(ri) <= frghi)) {
+ uint32 olap = tigMarksR.hi(ri) - frglo;
+ if (l3 < olap) {
+ l3 = olap;
+ t3 = fi;
+ t3bgn = frglo;
+ t3end = frghi;
+ }
+ }
+
+ if (frglo <= tigMarksR.lo(ri) && (tigMarksR.hi(ri) <= frghi)) {
+ writeLog("saved region %8d:%-8d - closest read %6u (%+6d) %8d:%-8d (%+6d) (contained)\n",
+ tigMarksR.lo(ri), tigMarksR.hi(ri),
+ frg->ident,
+ tigMarksR.lo(ri) - frglo, frglo,
+ frghi, frghi - tigMarksR.hi(ri));
+ }
+ }
+
+ if (t5 != UINT32_MAX)
+ writeLog("saved region %8d:%-8d - closest 5' read %6u (%+6d) %8d:%-8d (%+6d)\n",
+ tigMarksR.lo(ri), tigMarksR.hi(ri),
+ tig->ufpath[t5].ident,
+ tigMarksR.lo(ri) - t5bgn, t5bgn,
+ t5end, t5end - tigMarksR.hi(ri));
+
+ if (t3 != UINT32_MAX)
+ writeLog("saved region %8d:%-8d - closest 3' read %6u (%+6d) %8d:%-8d (%+6d)\n",
+ tigMarksR.lo(ri), tigMarksR.hi(ri),
+ tig->ufpath[t3].ident,
+ tigMarksR.lo(ri) - t3bgn, t3bgn,
+ t3end, t3end - tigMarksR.hi(ri));
+ }
+#endif
+
+
+ // Scan reads. If a read intersects a repeat interval, and the best edge for that read
+ // is entirely in the repeat region, decide if there is a near-best edge to something
+ // not in this tig.
+ //
+ // A region with no such near-best edges is _probably_ correct.
+
+ writeLog("search for confused edges:\n");
+
+ uint32 *isConfused = new uint32 [tigMarksR.numberOfIntervals()];
+
+ memset(isConfused, 0, sizeof(uint32) * tigMarksR.numberOfIntervals());
+
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode *rdA = &tig->ufpath[fi];
+ uint32 rdAid = rdA->ident;
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ double sc = (rdAhi - rdAlo) / (double)FI->fragmentLength(rdAid);
+
+ if ((OG->isContained(rdAid) == true) ||
+ (OG->isSuspicious(rdAid) == true))
+ continue;
+
+ for (uint32 ri=0; ri<tigMarksR.numberOfIntervals(); ri++) {
+ uint32 rMin = tigMarksR.lo(ri);
+ uint32 rMax = tigMarksR.hi(ri);
+
+ if ((rdAhi < rMin) || // Read ends before the region
+ (rMax < rdAlo)) // Read starts after the region
+ continue; // -> don't care about this read!
+
+ // Compute the position (in the tig) of the best overlaps.
+
+ int32 tig5bgn=0, tig5end=0;
+ int32 tig3bgn=0, tig3end=0;
+
+ // Instead of using the best edge - which might not be the edge used in the unitig -
+ // we need to scan the layout to return the previous/next dovetail
+
+ // Put this in a function - what to return if no best overlap?
+
+ BestEdgeOverlap *b5 = OG->getBestEdgeOverlap(rdAid, false);
+ BestEdgeOverlap *b3 = OG->getBestEdgeOverlap(rdAid, true);
+
+ // If the best edge is to a read not in this tig, there is nothing to compare against.
+ // Is this confused by default? Possibly. The unitig was constructed somehow, and that
+ // must then be the edge coming into us. We'll pick it up later.
+
+ bool b5use = true;
+ bool b3use = true;
+
+ if (b5->fragId() == 0)
+ b5use = false;
+ if (b3->fragId() == 0)
+ b3use = false;
+
+ if ((b5use) && (Unitig::fragIn(b5->fragId()) != tig->id()))
+ b5use = false;
+ if ((b3use) && (Unitig::fragIn(b3->fragId()) != tig->id()))
+ b3use = false;
+
+ // The best edge read is in this tig. If they don't overlap, again, nothing to compare
+ // against.
+
+ if (b5use) {
+ ufNode *rdB = &tig->ufpath[Unitig::pathPosition(b5->fragId())];
+ uint32 rdBid = rdB->ident;
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ if ((rdAhi < rdBlo) ||
+ (rdBhi < rdAlo))
+ b5use = false;
+ }
+
+ if (b3use) {
+ ufNode *rdB = &tig->ufpath[Unitig::pathPosition(b3->fragId())];
+ uint32 rdBid = rdB->ident;
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ if ((rdAhi < rdBlo) ||
+ (rdBhi < rdAlo))
+ b3use = false;
+ }
+
+ // If we can use this edge, compute the placement of the overlap on the unitig.
+
+ // Call #1;
+
+ if (b5use) {
+ int32 bgn=0, end=0;
+
+ olapToReadCoords(rdA,
+ b5->ahang(), b5->bhang(),
+ bgn, end);
+
+ tig5bgn = (rdAfwd) ? (rdAlo + sc * bgn) : (rdAhi - sc * end);
+ tig5end = (rdAfwd) ? (rdAlo + sc * end) : (rdAhi - sc * bgn);
+
+ assert(tig5bgn < tig5end);
+
+ if (tig5bgn < 0) tig5bgn = 0;
+ if (tig5end > tig->getLength()) tig5end = tig->getLength();
+ }
+
+ // Call #2
+
+ if (b3use) {
+ int32 bgn=0, end=0;
+
+ olapToReadCoords(rdA,
+ b3->ahang(), b3->bhang(),
+ bgn, end);
+
+ tig3bgn = (rdAfwd) ? (rdAlo + sc * bgn) : (rdAhi - sc * end);
+ tig3end = (rdAfwd) ? (rdAlo + sc * end) : (rdAhi - sc * bgn);
+
+ assert(tig3bgn < tig3end);
+
+ if (tig3bgn < 0) tig3bgn = 0;
+ if (tig3end > tig->getLength()) tig3end = tig->getLength();
+ }
+
+ // If either of the 5' or 3' overlaps (or both!) are in the repeat region, we need to check for
+ // close overlaps on that end.
+
+ uint32 len5 = 0;
+ uint32 len3 = 0;
+
+ if ((rMin < tig5bgn) &&
+ (tig5end < rMax) &&
+ (b5use))
+ len5 = FI->overlapLength(rdAid, b5->fragId(), b5->ahang(), b5->bhang());
+ else
+ b5use = false;
+
+ if ((rMin < tig3bgn) &&
+ (tig3end < rMax) &&
+ (b3use))
+ len3 = FI->overlapLength(rdAid, b3->fragId(), b3->ahang(), b3->bhang());
+ else
+ b3use = false;
+
+ double score5 = len5 * (1 - b5->erate());
+ double score3 = len3 * (1 - b3->erate());
+
+ // Neither of the best edges are in the repeat region; move to the next region and/or read.
+ if (len5 + len3 == 0)
+ continue;
+
+ // At least one of the best edge overlaps is in the repeat region. Scan for other edges
+ // that are of comparable length and quality.
+
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(rdAid, AS_MAX_ERATE, ovlLen);
+
+ for (uint32 oo=0; oo<ovlLen; oo++) {
+ uint32 rdBid = ovl[oo].b_iid;
+ uint32 tgBid = Unitig::fragIn(rdBid);
+
+ // If the read is in a singleton, skip. These are unassembled crud.
+ if ((tgBid == 0) ||
+ (unitigs[tgBid] == NULL) ||
+ (unitigs[tgBid]->ufpath.size() == 1))
+ continue;
+
+ // If the read is in an annotated bubble, skip.
+ if (unitigs[tgBid]->_isBubble)
+ continue;
+
+ // Skip if this overlap is the best we're trying to match.
+ if ((rdBid == b5->fragId()) ||
+ (rdBid == b3->fragId()))
+ continue;
+
+ // Skip if this overlap is crappy quality
+ if (OG->isOverlapBadQuality(ovl[oo]))
+ continue;
+
+ // Skip if the read is contained or suspicious.
+ if ((OG->isContained(rdBid) == true) ||
+ (OG->isSuspicious(rdBid) == true))
+ continue;
+
+ // Skip if the overlap isn't dovetail.
+ bool ovl5 = ovl[oo].AEndIs5prime();
+ bool ovl3 = ovl[oo].AEndIs3prime();
+
+ if ((ovl5 == false) &&
+ (ovl3 == false))
+ continue;
+
+ // Skip if we're not using this overlap
+ if ((ovl5 == true) && (b5use == false))
+ continue;
+
+ if ((ovl3 == true) && (b3use == false))
+ continue;
+
+
+ uint32 rdBpos = unitigs[tgBid]->pathPosition(rdBid);
+ ufNode *rdB = &unitigs[tgBid]->ufpath[rdBpos];
+
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ // If the overlap is to a read in a different tig, or
+ // the overlap is to a read in the same tig, but we don't overlap in the tig, check lengths.
+ // Otherwise, the overlap is present in the tig, and can't be confused.
+ if ((tgBid == tig->id()) &&
+ (rdBlo <= rdAhi) &&
+ (rdAlo <= rdBhi))
+ continue;
+
+ uint32 len = FI->overlapLength(rdAid, ovl[oo].b_iid, ovl[oo].a_hang, ovl[oo].b_hang);
+ double score = len * (1 - ovl[oo].erate);
+
+ // Compute percent difference.
+
+ double ad5 = fabs(score - score5);
+ double ad3 = fabs(score - score3);
+
+ double pd5 = 200 * ad5 / (score + score5);
+ double pd3 = 200 * ad3 / (score + score3);
+
+ // Skip if this overlap is vastly worse than the best.
+
+ if ((ovl5 == true) && ((ad5 >= confusedAbsolute) || (pd3 > confusedPercent))) {
+ writeLog("tig %7u read %8u pos %7u-%-7u NOT confused by 5' edge to read %8u - best edge read %8u len %6u erate %.4f score %8.2f - alt edge len %6u erate %.4f score %8.2f - absdiff %8.2f percdiff %8.4f\n",
+ tig->id(), rdAid, rdAlo, rdAhi,
+ rdBid,
+ b5->fragId(), len5, b5->erate(), score5,
+ len, ovl[oo].erate, score,
+ ad5, pd5);
+ continue;
+ }
+
+ if ((ovl3 == true) && ((ad3 >= confusedAbsolute) || (pd3 > confusedPercent))) {
+ writeLog("tig %7u read %8u pos %7u-%-7u NOT confused by 3' edge to read %8u - best edge read %8u len %6u erate %.4f score %8.2f - alt edge len %6u erate %.4f score %8.2f - absdiff %8.2f percdiff %8.4f\n",
+ tig->id(), rdAid, rdAlo, rdAhi,
+ rdBid,
+ b3->fragId(), len3, b3->erate(), score3,
+ len, ovl[oo].erate, score,
+ ad3, pd3);
+ continue;
+ }
+
+ // Potential confusion!
+
+ if (ovl5 == true)
+ writeLog("tig %7u read %8u pos %7u-%-7u IS confused by 5' edge to read %8u - best edge read %8u len %6u erate %.4f score %8.2f - alt edge len %6u erate %.4f score %8.2f - absdiff %8.2f percdiff %8.4f\n",
+ tig->id(), rdAid, rdAlo, rdAhi,
+ rdBid,
+ b5->fragId(), len5, b5->erate(), score5,
+ len, ovl[oo].erate, score,
+ ad5, pd5);
+
+ if (ovl3 == true)
+ writeLog("tig %7u read %8u pos %7u-%-7u IS confused by 3' edge to read %8u - best edge read %8u len %6u erate %.4f score %8.2f - alt edge len %6u erate %.4f score %8.2f - absdiff %8.2f percdiff %8.4f\n",
+ tig->id(), rdAid, rdAlo, rdAhi,
+ rdBid,
+ b3->fragId(), len3, b3->erate(), score3,
+ len, ovl[oo].erate, score,
+ ad3, pd3);
+
+ isConfused[ri]++;
+ }
+ } // Over all marks (ri)
+ } // Over all reads (fi)
+
+
+ // Scan all the regions, and delete any that have no confusion.
+
+ {
+ bool discarded = false;
+
+ for (uint32 ri=0; ri<tigMarksR.numberOfIntervals(); ri++) {
+ if (isConfused[ri] == 0) {
+ writeLog("discard region %8d:%-8d - no confusion in best edges\n",
+ tigMarksR.lo(ri), tigMarksR.hi(ri));
+
+ tigMarksR.lo(ri) = 0;
+ tigMarksR.hi(ri) = 0;
+
+ discarded = true;
+ }
+
+ else {
+ writeLog("saved region %8d:%-8d - %u best edges are potentially confused\n",
+ tigMarksR.lo(ri), tigMarksR.hi(ri), isConfused[ri]);
+ }
+ }
+
+ if (discarded)
+ tigMarksR.filterShort(1);
+ }
+
+ delete [] isConfused;
+
+
+
+
+
+ // Scan reads, join any marks that have their junctions spanned by a sufficiently large amount.
+ //
+ // If the read spans this junction be the usual amount, merge the intervals.
+ //
+ // The intervals can be overlapping (by up to REPEAT_OVERLAP_MIN (x2?) bases. For this junction
+ // to be spanned, the read must span from min-ROM to max+ROM, not just hi(ri-1) to lo(ri).
+ //
+ // We DO need to filterShort() after every merge, otherwise, we'd have an empty bogus interval
+ // in the middle of our list, which could be preventing some other merge. OK, we could
+ //
+ // Anything that gets merged is now no longer a true repeat. It's unique, just bordered by repeats.
+ // We can't track this through the indices (because we delete things). We track it with a set of
+ // begin coordinates.
+
+ set<int32> nonRepeatIntervals;
+
+ writeLog("Scan reads to merge repeat regions.\n");
+
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode *frg = &tig->ufpath[fi];
+ bool frgfwd = (frg->position.bgn < frg->position.end);
+ int32 frglo = (frgfwd) ? frg->position.bgn : frg->position.end;
+ int32 frghi = (frgfwd) ? frg->position.end : frg->position.bgn;
+ bool merged = false;
+
+ for (uint32 ri=1; ri<tigMarksR.numberOfIntervals(); ri++) {
+ uint32 rMin = min(tigMarksR.hi(ri-1), tigMarksR.lo(ri));
+ uint32 rMax = max(tigMarksR.hi(ri-1), tigMarksR.lo(ri));
+
+ if ((frglo + MIN_ANCHOR_HANG <= rMin) && (rMax + MIN_ANCHOR_HANG <= frghi)) {
+ writeLog("merge regions %8d:%-8d and %8d:%-8d - junction contained in read %6u %5d-%5d\n",
+ tigMarksR.lo(ri-1), tigMarksR.hi(ri-1),
+ tigMarksR.lo(ri), tigMarksR.hi(ri),
+ frg->ident, frglo, frghi);
+
+ tigMarksR.lo(ri) = tigMarksR.lo(ri-1);
+
+ tigMarksR.lo(ri-1) = 0; // CRITICAL to delete this interval (and not ri) because the next
+ tigMarksR.hi(ri-1) = 0; // iteration will be using ri-1 (== ri here) and ri (== ri+1).
+
+ merged = true;
+
+ nonRepeatIntervals.insert(tigMarksR.lo(ri));
+ }
+ }
+
+ if (merged)
+ tigMarksR.filterShort(1);
+ }
+
+ // Extend the regions by MIN_ANCHOR_HANG. This makes checking for reads that span and are
+ // anchored in the next region easier. It also solved a quirk when the first/last repeat
+ // region doesn't extend to the end of the sequence:
+ // 0-183 unique (created from inversion below, but useless and incorrect)
+ // 183-9942 repeat
+
+ for (uint32 ii=0; ii<tigMarksR.numberOfIntervals(); ii++) {
+ tigMarksR.lo(ii) = max<int32>(tigMarksR.lo(ii) - MIN_ANCHOR_HANG, 0);
+ tigMarksR.hi(ii) = min<int32>(tigMarksR.hi(ii) + MIN_ANCHOR_HANG, tig->getLength());
+ }
+
+ // Find the non-repeat intervals.
+
+ tigMarksU = tigMarksR;
+ tigMarksU.invert(0, tig->getLength());
+
+ // Create the list of intervals we'll use to make new unitigs.
+ //
+ // The repeat intervals are extended by MIN_ANCHOR_HANG, and then any read fully contained in one of
+ // these is moved here.
+ //
+ // The non-repeat intervals are shortened by the same amount, and any read that intersects one
+ // is moved there.
+ //
+ // Does order matter? Not sure. The repeat intervals are first, then the formerly repeat
+ // merged intervals, then the unique intervals. Splitting might depend on the repeats being
+ // first.
+
+ writeLog("Make breakpoints.\n");
+
+ vector<breakPointCoords> BP;
+
+ for (uint32 ii=0; ii<tigMarksR.numberOfIntervals(); ii++)
+ if (nonRepeatIntervals.count(tigMarksR.lo(ii)) == 0)
+ BP.push_back(breakPointCoords(ti, tigMarksR.lo(ii), tigMarksR.hi(ii), true));
+
+ for (uint32 ii=0; ii<tigMarksR.numberOfIntervals(); ii++)
+ if (nonRepeatIntervals.count(tigMarksR.lo(ii)) != 0)
+ BP.push_back(breakPointCoords(ti, tigMarksR.lo(ii), tigMarksR.hi(ii), true));
+
+ for (uint32 ii=0; ii<tigMarksU.numberOfIntervals(); ii++) {
+ BP.push_back(breakPointCoords(ti, tigMarksU.lo(ii), tigMarksU.hi(ii), false));
+ }
+
+ // If only one region, the whole unitig was declared repeat. Nothing to do.
+
+ if (BP.size() == 1)
+ continue;
+
+ sort(BP.begin(), BP.end());
+
+ // Report.
+
+ writeLog("break tig %u into up to %u pieces:\n", ti, BP.size());
+ for (uint32 ii=0; ii<BP.size(); ii++)
+ writeLog(" %8d %8d %s (length %d)\n",
+ BP[ii]._bgn, BP[ii]._end,
+ BP[ii]._isRepeat ? "repeat" : "unique",
+ BP[ii]._end - BP[ii]._bgn);
+
+ // Scan the reads, counting the number of reads that would be placed in each new tig. This is done
+ // because there are a few 'splits' that don't move any reads around.
+
+ Unitig **newTigs = new Unitig * [BP.size()];
+ int32 *lowCoord = new int32 [BP.size()];
+ uint32 *nRepeat = new uint32 [BP.size()];
+ uint32 *nUnique = new uint32 [BP.size()];
+
+ // First call, count the number of tigs we would create if we let it create them.
+
+ uint32 nTigs = splitUnitigs(unitigs, tig, BP, newTigs, lowCoord, nRepeat, nUnique, false);
+
+ // Second call, actually create the tigs, if anything would change.
+
+ if (nTigs > 1)
+ splitUnitigs(unitigs, tig, BP, newTigs, lowCoord, nRepeat, nUnique, true);
+
+ // Report the tigs created.
+
+ for (uint32 ii=0; ii<BP.size(); ii++) {
+ int32 rgnbgn = BP[ii]._bgn;
+ int32 rgnend = BP[ii]._end;
+ bool repeat = BP[ii]._isRepeat;
+
+ if (nRepeat[ii] + nUnique[ii] == 0)
+ writeLog("For tig %5u %s region %8d %8d - %6u/%6u repeat/unique reads - no new unitig created.\n",
+ ti, (repeat == true) ? "repeat" : "unique", rgnbgn, rgnend, nRepeat[ii], nUnique[ii]);
+
+ else if (nTigs > 1)
+ writeLog("For tig %5u %s region %8d %8d - %6u/%6u reads repeat/unique - unitig %5u created.\n",
+ ti, (repeat == true) ? "repeat" : "unique", rgnbgn, rgnend, nRepeat[ii], nUnique[ii], newTigs[ii]->id());
+
+ else
+ writeLog("For tig %5u %s region %8d %8d - %6u/%6u repeat/unique reads - unitig %5u remains unchanged.\n",
+ ti, (repeat == true) ? "repeat" : "unique", rgnbgn, rgnend, nRepeat[ii], nUnique[ii], tig->id());
+ }
+
+ // Cleanup.
+
+ delete [] newTigs;
+ delete [] lowCoord;
+ delete [] nRepeat;
+ delete [] nUnique;
+
+ // Remove the old unitig....if we made new ones.
+
+ if (nTigs > 1) {
+ delete tig;
+ unitigs[ti] = NULL;
+ }
+ }
+}
+
diff --git a/src/bogart/AS_BAT_PlaceZombies.H b/src/bogart/AS_BAT_MarkRepeatReads.H
similarity index 56%
rename from src/bogart/AS_BAT_PlaceZombies.H
rename to src/bogart/AS_BAT_MarkRepeatReads.H
index e4de0cb..4e9e0f1 100644
--- a/src/bogart/AS_BAT_PlaceZombies.H
+++ b/src/bogart/AS_BAT_MarkRepeatReads.H
@@ -13,21 +13,9 @@
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_PlaceZombies.H
- *
* Modifications by:
*
- * Brian P. Walenz from 2010-DEC-06 to 2013-AUG-01
- * are Copyright 2010-2011,2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-DEC-19 to 2015-APR-24
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
+ * Brian P. Walenz beginning on 2016-MAR-11
* are a 'United States Government Work', and
* are released in the public domain
*
@@ -35,10 +23,15 @@
* full conditions and disclaimers for each license.
*/
-#ifndef INCLUDE_AS_BAT_PLACEZOMBIES
-#define INCLUDE_AS_BAT_PLACEZOMBIES
+#ifndef INCLUDE_AS_BAT_MARKREPEATREADS
+#define INCLUDE_AS_BAT_MARKREPEATREADS
+
void
-placeZombies(UnitigVector &unitigs, double erate);
+markRepeatReads(UnitigVector &unitigs,
+ double deviationRepeat,
+ uint32 confusedAbsolute,
+ double confusedPercent);
+
-#endif // INCLUDE_AS_BAT_PLACEZOMBIES
+#endif // INCLUDE_AS_BAT_MARKREPEATREADS
diff --git a/src/bogart/AS_BAT_MergeSplitJoin.C b/src/bogart/AS_BAT_MergeSplitJoin.C
deleted file mode 100644
index b52c708..0000000
--- a/src/bogart/AS_BAT_MergeSplitJoin.C
+++ /dev/null
@@ -1,1763 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_MergeSplitJoin.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2011-FEB-15 to 2014-MAY-03
- * are Copyright 2011-2014 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-OCT-09 to 2015-AUG-05
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_BestOverlapGraph.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_PlaceFragUsingOverlaps.H"
-
-#include "AS_BAT_OverlapCache.H"
-#include "AS_BAT_IntersectSplit.H"
-
-#include "AS_BAT_MergeSplitJoin.H"
-
-#include "AS_BAT_Breaking.H"
-
-#include "AS_BAT_Instrumentation.H"
-
-#include "AS_BAT_RepeatJunctionEvidence.H"
-
-#include "intervalList.H"
-
-
-uint32 SPURIOUS_COVERAGE_THRESHOLD = 6; // Need to have more than this coverage in non-unitig reads aligned to call it a repeat area
-uint32 ISECT_NEEDED_TO_BREAK = 15; // Need to have at least this number of reads confirming a repeat junction
-uint32 REGION_END_WEIGHT = 15; // Pretend there are this many intersections at the end points of each repeat region
-
-#undef LOG_BUBBLE_TESTS
-#undef LOG_BUBBLE_FAILURE
-#define LOG_BUBBLE_SUCCESS
-
-omp_lock_t markRepeat_breakUnitigs_Lock;
-
-bool
-mergeBubbles_findEnds(UnitigVector &UNUSED(unitigs),
- double UNUSED(erateBubble),
- Unitig *bubble,
- ufNode &fFrg,
- ufNode &lFrg,
- Unitig *UNUSED(target)) {
-
- // Search for edges. For a bubble to exist, at least one of the first or last non-contained
- // fragment must have an edge to the 'target' unitig (by construction of the inputs to this
- // routine). Ideally, both the first and last will have edges to the same unitig, but we'll test
- // and allow only a single edge.
-
- uint32 zIdx = UINT32_MAX;
- uint32 fIdx = zIdx;
- uint32 lIdx = zIdx;
-
- // We'd like to claim that all unitigs begin with a non-contained fragment, but zombie fragments
- // (contained fragments that are in a circular containment relationship) violate this. So, we
- // could then claim that unitigs with more than one fragment begin with a non-contained fragment.
- // But any zombie that has a bubble popped into it violate this.
- //
- // Since nothing is really using the best edges from non-contained fragmnet, in cases where the
- // first fragment is contained, we'll use the first and last fragments, instead of the first and
- // last non-contained fragments.
-
- // Search for the first and last non-contained fragments.
-
- for (uint32 ii=0; ((fIdx == zIdx) && (ii < bubble->ufpath.size())); ii++)
- if (OG->isContained(bubble->ufpath[ii].ident) == false)
- fIdx = ii;
-
- for (uint32 ii=bubble->ufpath.size(); ((lIdx == zIdx) && (ii-- > 0)); )
- if (OG->isContained(bubble->ufpath[ii].ident) == false)
- lIdx = ii;
-
- // Didn't find a non-contained fragment! Reset to the first/last fragments.
-
- if (fIdx == zIdx) {
-#ifdef LOG_BUBBLE_TESTS
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments STARTS WITH A CONTAINED FRAGMENT %d\n",
- bubble->id(), bubble->getLength(), bubble->ufpath.size(),
- bubble->ufpath[0].ident);
-#endif
- fIdx = 0;
- lIdx = bubble->ufpath.size() - 1;
- }
-
- assert(fIdx <= lIdx);
- assert(fIdx != zIdx);
- assert(lIdx != zIdx);
-
- fFrg = bubble->ufpath[fIdx]; // NOTE: A COPY, not a pointer or reference.
- lFrg = bubble->ufpath[lIdx]; // These get modified.
-
- // Grab the best edges outside the unitig. If the first fragment is reversed, we want
- // to grab the edge off of the 3' end; opposite for the last fragment.
- //
- // There is ALWAYS a best edge, even for contained fragments. The edge might be empty, with fragId == 0.
-
- bool f3p = (isReverse(fFrg.position) == true);
- BestEdgeOverlap *fEdge = OG->getBestEdgeOverlap(fFrg.ident, f3p);
- uint32 fUtg = Unitig::fragIn(fEdge->fragId());
-
- bool l3p = (isReverse(lFrg.position) == false);
- BestEdgeOverlap *lEdge = OG->getBestEdgeOverlap(lFrg.ident, l3p);
- uint32 lUtg = Unitig::fragIn(lEdge->fragId());
-
- // Just make sure...those edges should NOT to be to ourself. But if they are, we'll just ignore
- // them -- these can be from circular unitigs (in which case we can't really merge ourself into
- // ourself at the correct spot) OR from a bubble that was already merged and just happened to tie
- // for a fragment at the end.
- //
- // aaaaaaaaaa
- // aaaaaaaaaaa
- // bbbbbbbbb
- // bbbbbbbbbb
- //
- // The 'b' unitig was merged into a. The second b fragment now becomes the last non-contained
- // fragment in the merged unitig, and it has a best edge back to the original 'a' fragment,
- // ourself.
-
- if (fUtg == bubble->id()) {
- fEdge = NULL;
- fUtg = 0;
- }
- if (lUtg == bubble->id()) {
- lEdge = NULL;
- lUtg = 0;
- }
-
-#ifdef LOG_BUBBLE_TESTS
- if ((fUtg != 0) && (lUtg != 0))
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. Edges (%d/%d') from frag %d/%d' and (%d/%d') from frag %d/%d'\n",
- bubble->id(), bubble->getLength(), bubble->ufpath.size(),
- fEdge->fragId(), (fEdge->frag3p() ? 3 : 5), fFrg.ident, (f3p ? 3 : 5),
- lEdge->fragId(), (lEdge->frag3p() ? 3 : 5), lFrg.ident, (l3p ? 3 : 5));
- else if (fUtg != 0)
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. Edge (%d/%d') from frag %d/%d'\n",
- bubble->id(), bubble->getLength(), bubble->ufpath.size(),
- fEdge->fragId(), (fEdge->frag3p() ? 3 : 5), fFrg.ident, (f3p ? 3 : 5));
- else if (lUtg != 0)
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. Edge (%d/%d') from frag %d/%d'\n",
- bubble->id(), bubble->getLength(), bubble->ufpath.size(),
- lEdge->fragId(), (lEdge->frag3p() ? 3 : 5), lFrg.ident, (l3p ? 3 : 5));
- else
- // But then how do we get an intersection?!?!! Intersections from a bubble that was
- // already popped. We pop A into B, and while iterating through fragments in B we find
- // the -- now obsolete -- intersections we originally used and try to pop it again.
- //
- writeLog("popBubbles()-- Potential bubble unitig %d of length %d with %lu fragments. NO EDGES, no bubble.\n",
- bubble->id(), bubble->getLength(), bubble->ufpath.size());
-#endif
-
- if ((fUtg == 0) && (lUtg == 0))
- return(false);
-
-#ifdef LOG_BUBBLE_TESTS
- // The only interesting case here is if we have both edges and they point to different unitigs.
- // We might as well place it aggressively.
-
- if ((fUtg != 0) && (lUtg != 0) && (fUtg != lUtg)) {
- writeLog("popBubbles()-- bubble unitig %d has edges to both unitig %d and unitig %d\n",
- bubble->id(), fUtg, lUtg);
- return(true);
- }
-#endif
-
- return(true);
-}
-
-
-bool
-mergeBubbles_checkEnds(UnitigVector &unitigs,
- double erateBubble,
- Unitig *bubble,
- ufNode &fFrg,
- ufNode &lFrg,
- Unitig *target) {
-
- // Compute placement of the two fragments. Compare the size against the bubble.
-
- ufNode fFrgN = fFrg;
- ufNode lFrgN = lFrg;
-
- overlapPlacement fFrgPlacement;
- overlapPlacement lFrgPlacement;
-
- fFrgPlacement.errors = 4.0e9;
- fFrgPlacement.aligned = 1;
-
- lFrgPlacement.errors = 4.0e9;
- lFrgPlacement.aligned = 1;
-
- vector<overlapPlacement> placements;
-
- placements.clear();
-
- placeFragUsingOverlaps(unitigs, erateBubble, target, fFrg.ident, placements);
-
-#ifdef LOG_BUBBLE_TESTS
- writeLog("popBubbles()-- fFrg %u has %u potential placements in unitig %u.\n",
- fFrg.ident, placements.size(), target->id());
-#endif
-
- for (uint32 i=0; i<placements.size(); i++) {
- assert(placements[i].tigID == target->id());
-
- if (placements[i].fCoverage < 0.99) {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- fFrg %u low coverage %f at unitig %u %u,%u\n",
- fFrg.ident,
- placements[i].fCoverage,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- continue;
- } else {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- fFrg %u GOOD coverage %f at unitig %u %u,%u\n",
- fFrg.ident,
- placements[i].fCoverage,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- }
-
- if (placements[i].errors / placements[i].aligned < fFrgPlacement.errors / fFrgPlacement.aligned) {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- fFrg %u GOOD identity %f at unitig %u %u,%u\n",
- fFrg.ident,
- placements[i].errors / placements[i].aligned,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- fFrgPlacement = placements[i];
- } else {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- fFrg %u low identity %f at unitig %u %u,%u\n",
- fFrg.ident,
- placements[i].errors / placements[i].aligned,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- }
- }
-
- fFrgN.ident = fFrgPlacement.frgID;
- fFrgN.contained = 0;
- fFrgN.parent = 0;
- fFrgN.ahang = 0;
- fFrgN.bhang = 0;
- fFrgN.position = fFrgPlacement.position;
- fFrgN.containment_depth = 0;
-
- if ((fFrgN.position.bgn == 0) &&
- (fFrgN.position.end == 0)) {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- failed to place fFrg.\n");
-#endif
- return(false);
- }
-
-
- placements.clear();
-
- placeFragUsingOverlaps(unitigs, erateBubble, target, lFrg.ident, placements);
-
-#ifdef LOG_BUBBLE_TESTS
- writeLog("popBubbles()-- lFrg %u has %u potential placements in unitig %u.\n",
- lFrg.ident, placements.size(), target->id());
-#endif
-
- for (uint32 i=0; i<placements.size(); i++) {
- assert(placements[i].tigID == target->id());
-
- if (placements[i].fCoverage < 0.99) {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- lFrg %u low coverage %f at unitig %u %u,%u\n",
- lFrg.ident,
- placements[i].fCoverage,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- continue;
- } else {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- lFrg %u GOOD coverage %f at unitig %u %u,%u\n",
- lFrg.ident,
- placements[i].fCoverage,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- }
-
- if (placements[i].errors / placements[i].aligned < lFrgPlacement.errors / lFrgPlacement.aligned) {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- lFrg %u GOOD identity %f at unitig %u %u,%u\n",
- lFrg.ident,
- placements[i].errors / placements[i].aligned,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- lFrgPlacement = placements[i];
- } else {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- lFrg %u low identity %f at unitig %u %u,%u\n",
- lFrg.ident,
- placements[i].errors / placements[i].aligned,
- placements[i].tigID,
- placements[i].position.bgn, placements[i].position.end);
-#endif
- }
- }
-
- lFrgN.ident = lFrgPlacement.frgID;
- lFrgN.contained = 0;
- lFrgN.parent = 0;
- lFrgN.ahang = 0;
- lFrgN.bhang = 0;
- lFrgN.position = lFrgPlacement.position;
- lFrgN.containment_depth = 0;
-
- if ((lFrgN.position.bgn == 0) &&
- (lFrgN.position.end == 0)) {
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- failed to place lFrg.\n");
-#endif
- return(false);
- }
-
-
- int32 minL = MIN(fFrg.position.bgn, fFrg.position.end);
- int32 maxL = MAX(fFrg.position.bgn, fFrg.position.end);
-
- int32 minR = MIN(lFrg.position.bgn, lFrg.position.end);
- int32 maxR = MAX(lFrg.position.bgn, lFrg.position.end);
-
- int32 placedLen = MAX(maxL, maxR) - MIN(minL, minR);
-
- if (2 * placedLen < bubble->getLength()) {
- // Too short.
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- too short. fFrg %d,%d lFrg %d,%d. L %d,%d R %d,%d len %d\n",
- fFrg.position.bgn, fFrg.position.end,
- lFrg.position.bgn, lFrg.position.end,
- minL, maxL, minR, maxR, placedLen);
-#endif
- return(false);
- }
-
- if (2 * bubble->getLength() < placedLen) {
- // Too long.
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- too long. fFrg %d,%d lFrg %d,%d. L %d,%d R %d,%d len %d\n",
- fFrg.position.bgn, fFrg.position.end,
- lFrg.position.bgn, lFrg.position.end,
- minL, maxL, minR, maxR, placedLen);
-#endif
- return(false);
- }
-
- ////////////////////
- //
- // Check orientations
- //
- ////////////////////
-
- // If fFrg and lFrg are the same fragment (bubble is one uncontained fragment) then we're done.
-
- if (fFrg.ident == lFrg.ident) {
- fFrg = fFrgN;
- lFrg = lFrgN;
- return(true);
- }
-
- // Otherwise, check that the orientation and positioning of the before and after fragments is the
- // same.
-
- bool bL = (isReverse(fFrg.position));
- bool bR = (isReverse(lFrg.position));
- bool bOrd = (MIN(fFrg.position.bgn, fFrg.position.end) < MIN(lFrg.position.bgn, lFrg.position.end));
-
- bool nL = (isReverse(fFrgN.position));
- bool nR = (isReverse(lFrgN.position));
- bool nOrd = (MIN(fFrgN.position.bgn, fFrgN.position.end) < MIN(lFrgN.position.bgn, lFrgN.position.end));
-
- if (((bL == nL) && (bR == nR) && (bOrd == nOrd)) ||
- ((bL != nL) && (bR != nR) && (bOrd != nOrd))) {
- // Yup, looks good!
- fFrg = fFrgN;
- lFrg = lFrgN;
- return(true);
- }
-
- // Nope, something got screwed up in alignment.
-
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- Order/Orientation problem. bL %d bR %d bOrd %d nL %d nR %d nOrd %d\n",
- bL, bR, bOrd,
- nL, nR, nOrd);
-#endif
- return(false);
-}
-
-
-// False if any of the fragments in 'bubble' are not fully covered by overlaps to fragments in
-// 'target'. Such uncovered fragments would indicate a large bubble -- large enough that we failed
-// to find an overlap -- and would cause problems in consensus.
-//
-// False if any of the fragments in 'bubble' cannot be placed between fFrg and lFrg. This would
-// indicate the bubble contains a significant rearrangement and would cause problems in consensus.
-//
-// If the above tests pass, 'bubble' is inserted into 'target' and 'bubble' is deleted.
-//
-static
-bool
-mergeBubbles_checkFrags(UnitigVector &unitigs,
- double erateBubble,
- Unitig *bubble,
- ufNode &fFrg,
- ufNode &lFrg,
- Unitig *target) {
-
- // Method:
- //
- // * Call placeFragUsingOverlaps() for every fragment. Save the placements returned.
- // * Count the number of placements that are outside the fFrg/lFrg range.
- // * Isolate down to one 'best' placement for each fragment.
- // * Must be within fFrg/lFrg.
- // * Resolve ties with
- // * Placement in the original unitig
- // * Error rates on overlaps
-
- bool success = false;
-
- vector<overlapPlacement> *placements = new vector<overlapPlacement> [bubble->ufpath.size()];
- overlapPlacement *correctPlace = new overlapPlacement [bubble->ufpath.size()];
-
- for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
- ufNode *frg = &bubble->ufpath[fi];
-
- placeFragUsingOverlaps(unitigs, erateBubble, target, frg->ident, placements[fi]);
-
- // Initialize the final placement to be bad, so we can pick the best.
- correctPlace[fi].fCoverage = 0.0;
- correctPlace[fi].errors = 4.0e9;
- correctPlace[fi].aligned = 1;
- }
-
- // Some bizarre cases -- possibly only from bad data -- confound any logical attempt at finding
- // the min/max extents. Yes, even though this should work, it doesn't. Or maybe it's just
- // broken and I haven't seen how.
- //
- //int32 minE = (fFrg.position.bgn < lFrg.position.bgn) ? MIN(fFrg.position.bgn, fFrg.position.end) : MIN(lFrg.position.bgn, lFrg.position.end);
- //int32 maxE = (fFrg.position.bgn < lFrg.position.bgn) ? MAX(lFrg.position.bgn, lFrg.position.end) : MAX(fFrg.position.bgn, fFrg.position.end);
- //
- // The one case that breaks it is a bubble unitig with a single chimeric fragment.
- // fFrg ident = 367563, contained = 0, parent = 254673, ahang = 144, bhang = 24, bgn = 33406, end = 33238}
- // lFrg ident = 367563, contained = 0, parent = 147697, ahang = -58, bhang = -157, bgn = 33406, end = 33574}
- //
- int32 minE = MIN(MIN(fFrg.position.bgn, fFrg.position.end), MIN(lFrg.position.bgn, lFrg.position.end));
- int32 maxE = MAX(MAX(fFrg.position.bgn, fFrg.position.end), MAX(lFrg.position.bgn, lFrg.position.end));
- int32 diff = maxE - minE;
-
- assert(minE < maxE);
-
- minE -= diff / 2; if (minE < 0) minE = 0;
- maxE += diff / 2;
-
- uint32 nCorrect = 0;
-
- for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
- uint32 nNotPlaced = 0;
- uint32 nNotPlacedInCorrectPosition = 0;
- uint32 nNotPlacedFully = 0;
- uint32 nNotOriented = 0;
-
- if (placements[fi].size() == 0)
- nNotPlaced++;
-
- // If we're contained, and our container is actually in the bubble, we can (or should be able
- // to) safely allow almost any placement.
-
- bool requireFullAlignment = true;
-
- if ((OG->isContained(bubble->ufpath[fi].ident) == true) &&
- (bubble->fragIn(OG->getBestContainer(bubble->ufpath[fi].ident)->container) == bubble->id()))
- requireFullAlignment = false;
-
- for (uint32 pl=0; pl<placements[fi].size(); pl++) {
- assert(placements[fi][pl].tigID == target->id());
- if (placements[fi][pl].tigID != target->id()) continue;
-
- int32 minP = MIN(placements[fi][pl].position.bgn, placements[fi][pl].position.end);
- int32 maxP = MAX(placements[fi][pl].position.bgn, placements[fi][pl].position.end);
-
- if ((maxP < minE) || (maxE < minP)) {
- nNotPlacedInCorrectPosition++;
- continue;
- }
-
- if ((requireFullAlignment == true) && (placements[fi][pl].fCoverage < 0.99)) {
- nNotPlacedFully++;
- continue;
- }
-
- //if ((placements[fi][pl].nForward > 0) &&
- // (placements[fi][pl].nReverse > 0)) {
- // nNotOriented++;
- // continue;
- //}
-
- // The current placement seems like a good one. Should we keep it?
-
- // The length requirement was added to solve a problem during testing on hydra. We tried to
- // place a contained fragment -- so skipped the fCoverage test above. This fragment had two
- // placements in the correct location on the target unitig. One plaement was fCoverage=1.00,
- // the other was fCoverage=0.15. Clearly the first was better, but the second had less
- // error. Without the length filter, we'd incorrectly pick the second placement.
-
- bool keepIt = false;
-
- if (placements[fi][pl].fCoverage > correctPlace[fi].fCoverage)
- // Yes! The current placement has more coverage than the saved one.
- keepIt = true;
-
- if ((placements[fi][pl].fCoverage >= correctPlace[fi].fCoverage) &&
- (placements[fi][pl].errors / placements[fi][pl].aligned < correctPlace[fi].errors / correctPlace[fi].aligned))
- // Yes! The current placement is just as long, and lower error.
- keepIt = true;
-
- // Yup, looks like a better placement.
- if (keepIt)
- correctPlace[fi] = placements[fi][pl];
- } // over all placements
-
- if (correctPlace[fi].fCoverage > 0) {
- nCorrect++;
- } else {
- // We currently require ALL fragments to be well placed, so we can abort on the first fragment that
- // fails.
-#ifdef LOG_BUBBLE_FAILURE
- writeLog("popBubbles()-- Failed to place frag %d notPlaced %d notPlacedInCorrectPosition %d notPlacedFully %d notOriented %d\n",
- bubble->ufpath[fi].ident, nNotPlaced, nNotPlacedInCorrectPosition, nNotPlacedFully, nNotOriented);
-#endif
- break;
- }
- }
-
- if (nCorrect != bubble->ufpath.size())
- goto finished;
-
- // Now just move the fragments into the target unitig and delete the bubble unitig.
- //
- // Explicitly DO NOT propagate the contained, parent, ahang or bhang from the bubble here. We
- // could figure all this stuff out, but it definitely is NOT just a simple copy from the bubble
- // unitig (for example, we could add the bubble unitig reversed).
- //
- //
- for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
- ufNode nFrg;
-
- nFrg.ident = correctPlace[fi].frgID;
- nFrg.contained = 0;
- nFrg.parent = 0;
- nFrg.ahang = 0;
- nFrg.bhang = 0;
- nFrg.position = correctPlace[fi].position;
- nFrg.containment_depth = 0;
-
- target->addFrag(nFrg, 0, logFileFlagSet(LOG_INTERSECTION_BUBBLES_DEBUG));
- }
-
- target->sort();
-
- success = true;
-
-#ifdef LOG_BUBBLE_SUCCESS
- writeLog("popBubbles()-- merged bubble unitig %d with %ld frags into unitig %d now with %ld frags\n",
- bubble->id(), bubble->ufpath.size(), target->id(), target->ufpath.size());
-#endif
-
- finished:
- delete [] placements;
- delete [] correctPlace;
-
- // If not successful, mark this unitig as a potential bubble.
-
- if (success == false) {
- writeLog("popBubbles()-- bubble unitig %d (reads %u length %u) has large differences, not popped into unitig %d\n",
- bubble->id(), bubble->ufpath.size(), bubble->getLength(), target->id());
- bubble->_isBubble = true;
- }
-
- return(success);
-}
-
-
-
-void
-mergeBubbles(UnitigVector &unitigs, double erateBubble, Unitig *target, intersectionList *ilist) {
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- intersectionPoint *isect = ilist->getIntersection(frg->ident, 0);
-
- if (isect == NULL)
- continue;
-
- for (; isect->isectFrg == frg->ident; isect++) {
- assert(target->id() == Unitig::fragIn(isect->isectFrg));
-
- // Grab the potential bubble unitig
-
- Unitig *bubble = unitigs[Unitig::fragIn(isect->invadFrg)];
-
- if (bubble == NULL)
- // Whoops! Unitig was repeat/unique split and the repeats shattered
- continue;
-
- assert(bubble->id() == Unitig::fragIn(isect->invadFrg));
-
- // I don't like a number of reads filter - for 50x Illumina, 500 reads is only 1k of unitig,
- // but for 10x PacBio, this is over 250k of unitig.
-
- if ((bubble == NULL) ||
- (bubble->getLength() > 50000)) {
-#if 0
- writeLog("popBubbles()-- Skip bubble %u length %u with "F_SIZE_T" frags - edge from %d/%c' to utg %d %d/%c'\n",
- bubble->id(), bubble->getLength(), bubble->ufpath.size(),
- isect->invadFrg, isect->invad3p ? '3' : '5',
- target->id(),
- isect->isectFrg, isect->isect3p ? '3' : '5');
-#endif
- continue;
- }
-
- if (bubble->id() == target->id())
- // HEY! We're not a bubble in ourself!
- continue;
-
- ufNode fFrg; // First fragment in the bubble
- ufNode lFrg; // Last fragment in the bubble
-
- // We have no way of deciding if we've tested this bubble unitig already. Each bubble unitig
- // should generate two intersection edges. If those edges are to the same target unitig, and
- // the bubble fails to pop, we'll test the bubble twice.
- //
- // This is kind of by design. The two intersections could be to two different locations, and
- // maybe one will work while the other doesn't. Though, I think we accept a placement only
- // if the two end reads are consistent implying that we'd double test a bubble if the
- // placements are different, and that we'd fail both times.
-
- if (mergeBubbles_findEnds(unitigs, erateBubble, bubble, fFrg, lFrg, target) == false)
- continue;
-
- if (mergeBubbles_checkEnds(unitigs, erateBubble, bubble, fFrg, lFrg, target) == false)
- continue;
-
- if (mergeBubbles_checkFrags(unitigs, erateBubble, bubble, fFrg, lFrg, target) == false)
- continue;
-
- // Merged!
- // o Delete the unitig we just merged in.
- // o Skip the rest of the intersections for this fragment (because....)
- // o Reset iteration over fragments in this unitig -- we'll try some failed merges
- // again, but we might pick up a bunch more from the fragments we just added. Plus,
- // we changed the ufpath vector.
-
- unitigs[bubble->id()] = NULL;
- delete bubble;
-
- fi = 0;
-
- break;
- }
- }
-}
-
-
-
-//void
-//stealBubbles(UnitigVector &unitigs, double erateBubble, Unitig *target, intersectionList *ilist) {
-//}
-
-
-
-
-
-void
-markRepeats_buildOverlapList(Unitig *target, double erateRepeat, set<uint32> &ovlFrags) {
-
- ovlFrags.clear();
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- uint32 ovlLen = 0;
- BAToverlap *ovl = OC->getOverlaps(frg->ident, erateRepeat, ovlLen);
-
- for (uint32 i=0; i<ovlLen; i++) {
- if (Unitig::fragIn(ovl[i].b_iid) != target->id())
- ovlFrags.insert(ovl[i].b_iid);
- }
- }
-}
-
-
-
-
-void
-markRepeats_computeUnitigErrorRate(UnitigVector &unitigs,
- double erateRepeat,
- Unitig *target,
- double &meanError,
- double &stddevError) {
-
- vector<overlapPlacement> op;
- vector<double> error;
-
- meanError = 0;
- stddevError = 0;
-
-#undef DUMPERROR
-#ifdef DUMPERROR
- char N[FILENAME_MAX];
- sprintf(N, "error.%08d.dat", target->id());
- FILE *F = fopen(N, "w");
-#endif
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- uint32 bgn = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 end = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- placeFragUsingOverlaps(unitigs, erateRepeat, target, frg->ident, op);
-
- if (op.size() == 0)
- // Huh? Couldn't be placed in my own unitig?
- continue;
- assert(op.size() > 0);
-
- double minError = 1.0;
- double corError = 1.0;
-
- for (uint32 pl=0; pl<op.size(); pl++) {
- assert(op[pl].tigID == target->id());
-
- double e = op[pl].errors / op[pl].aligned;
-
- minError = MIN(minError, e);
-
- if (op[pl].position.bgn < op[pl].position.end) {
- if ((op[pl].position.end < bgn) ||
- (end < op[pl].position.bgn))
- continue;
- } else {
- if ((op[pl].position.bgn < bgn) ||
- (end < op[pl].position.end))
- continue;
- }
-
- corError = MIN(corError, e);
- }
-
-#ifdef DUMPERROR
- fprintf(F, "%f\t%f\n", corError, minError);
-#endif
-
- if (corError < 1.0)
- error.push_back(corError);
- else
- error.push_back(minError);
- }
-
-#ifdef DUMPERROR
- fclose(F);
-#endif
-
- for (uint32 i=0; i<error.size(); i++)
- meanError += error[i];
-
- meanError /= error.size();
-
- for (uint32 i=0; i<error.size(); i++)
- stddevError += (error[i] - meanError) * (error[i] - meanError);
-
- stddevError = sqrt(stddevError / error.size());
-
- writeLog("markRepeats_computeUnitigErrorRate()-- tig %d error %f +- %f\n",
- target->id(), meanError, stddevError);
-}
-
-
-
-void
-markRepeats_placeAndProcessOverlaps(UnitigVector &unitigs,
- double erateRepeat,
- Unitig *target,
- double meanError,
- double stddevError,
- set<uint32> &ovlFrags,
- intervalList<int32> &aligned,
- vector<repeatJunctionEvidence> &evidence) {
-
- aligned.clear();
- evidence.clear();
-
- for (set<uint32>::iterator it=ovlFrags.begin(); it!=ovlFrags.end(); it++) {
- uint32 iid = *it;
-
- vector<overlapPlacement> op;
-
- placeFragUsingOverlaps(unitigs, erateRepeat, target, iid, op);
-
- // placeFragUsingOverlaps() returns the expected placement for this fragment in 'position', and
- // the amount of the fragment covered by evidence in 'covered'.
- //
- // Below we'll try to decipher this into two intervals, covered by evidence and not covered by
- // evidence.
-
- for (uint32 pl=0; pl<op.size(); pl++) {
- assert(op[pl].tigID == target->id());
-
- double erate = op[pl].errors / op[pl].aligned;
- bool erateTooHigh = (meanError + 3 * stddevError < erate);
-
-#if 0
- writeLog("markRepeats()-- op[%3d] tig %d frag %d fCoverage %f position %d %d verified %d %d erate %f%s\n",
- pl, op[pl].tigID, op[pl].frgID, op[pl].fCoverage,
- op[pl].position.bgn, op[pl].position.end,
- op[pl].verified.bgn, op[pl].verified.end,
- erate,
- erateTooHigh ? " - TOO HIGH" : "");
-#endif
-
- if (erateTooHigh)
- continue;
-
- // Save the aligned portion. This will be used in conjunction with the partially aligned
- // fragments to pick out where to split.
- //
- assert(op[pl].verified.bgn >= 0);
- assert(op[pl].verified.bgn <= target->getLength()); // is bgn unsigned and underflowed?
- assert(op[pl].verified.end <= target->getLength());
-
- if (op[pl].verified.bgn < op[pl].verified.end)
- aligned.add(op[pl].verified.bgn, op[pl].verified.end - op[pl].verified.bgn);
- else
- aligned.add(op[pl].verified.end, op[pl].verified.bgn - op[pl].verified.end);
-
- if (op[pl].fCoverage > 0.99)
- // No worries, fully placed.
- continue;
-
- if ((op[pl].position.bgn < 0) ||
- (op[pl].position.bgn > target->getLength()) ||
- (op[pl].position.end < 0) ||
- (op[pl].position.end > target->getLength()))
- // Placed outside the range of the unitig
- continue;
-
- // Otherwise, placed in the target unitig, at less than perfect coverage. Compute
- // the unitig coordinates that are covered by actual overlaps.
-
- repeatJunctionEvidence ev(target, op[pl]);
-
- if (ev.tigFrag == FragmentEnd())
- // Didn't pass muster; weak overhangs likely.
- continue;
-
- evidence.push_back(ev);
- }
- }
-}
-
-
-#if 0
-uint32
-markRepeats_computeUnitigCoverage(Unitig *tig) {
- intervalList<int32> coverage;
-
- for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
- ufNode frg = tig->ufpath[fi];
-
- if (frg.position.bgn < frg.position.end)
- coverage.add(frg.position.bgn, frg.position.end - frg.position.bgn);
- else
- coverage.add(frg.position.end, frg.position.bgn - frg.position.end);
- }
-
- coverage.merge();
-
- intervalDepth depth(coverage);
-
- uint64 minCov = 0; // Probably 1 or 2
- uint64 medCov = 0; //
- uint64 aveCov = 0; // If repeats present, might be high
- uint64 modCov = 0; // Probably not useful, due to read bias
- uint64 maxCov = UINT32_MAX; //
-
- for (uint32 dd=0; dd<depth.numberOfIntervals(); dd++)
- maxCov = MAX(maxCov, depth.de(dd));
- maxCov++;
-
- uint32 *histogram = new uint32 [maxCov];
-
- memset(histogram, 0, sizeof(uint32) * maxCov);
-
- for (uint32 dd=0; dd<depth.numberOfIntervals(); dd++)
- histogram[depth.de(dd)] += depth.hi(dd) - depth.lo(dd);
-
- for (uint32 dd=0; dd<maxCov; dd++) {
- if (depth.de(dd) == 0)
- continue;
-
- if (minCov < depth.de(dd))
- minCov = depth.de(dd);
-
- if (depth.de(dd) < maxCov)
- maxCov = depth.de(dd);
-
- // blah.
- }
-
- return(medCov);
-}
-#endif
-
-
-
-// Decide on a spurious coverage level using coverage in this unitig, global coverage, and
-// statistics from the potential repeats.
-//
-// This is the end consumer of the 'aligned' data. It is used only to populate 'regions', and
-// 'regions' only cares about bgn,end coords, no underlying data.
-//
-void
-markRepeats_filterIntervalsSpannedByFragment(Unitig *target,
- intervalList<int32> &aligned,
- vector<repeatRegion> ®ions,
- uint32 minOverlap) {
- uint32 tiglen = target->getLength();
-
- uint32 spuriousNoiseThreshold = SPURIOUS_COVERAGE_THRESHOLD;
- uint32 filteredBases = 0;
- uint32 filteredCovered = 0;
-
- intervalList<int32> depth(aligned);
-
- aligned.merge(); // Just for a stupid log message
-
- writeLog("markRepeats()-- filtering low coverage spurious with t=%u in %u repeat regions (%u depth regions)\n",
- spuriousNoiseThreshold, aligned.numberOfIntervals(), depth.numberOfIntervals());
-
- regions.clear();
- aligned.clear();
-
- //
- // Remove low coverage areas by making a new map for the high coverage areas.
- //
-
- for (uint32 dd=0; dd<depth.numberOfIntervals(); dd++) {
- if (depth.depth(dd) == 0)
- continue;
-
- if (depth.depth(dd) <= spuriousNoiseThreshold) {
- filteredBases += depth.hi(dd) - depth.lo(dd);
- continue;
- }
-
- aligned.add(depth.lo(dd), depth.hi(dd) - depth.lo(dd));
- }
-
- aligned.merge();
-
- writeLog("markRepeats()-- filtered %u bases, now with %u repeat regions\n",
- filteredBases, aligned.numberOfIntervals());
-
- //
- // Adjust region boundaries so they land on the first read end that makes sense.
- //
- // For the begin, decide if we need to expand or contract the region. We will expand if the
- // start of the read before the region is not anchored. We will contract otherwise.
- //
- // For the end, it is more complicated because the end points are not sorted.
- //
-
- for (uint32 i=0; i<aligned.numberOfIntervals(); i++) {
- uint32 intbgn = aligned.lo(i);
- uint32 intend = aligned.hi(i);
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- uint32 frgbgn = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 frgend = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- if (frgbgn + minOverlap / 2 < intbgn)
- // Anchored.
- continue;
-
- if (frgbgn == intbgn) {
- // Perfect! Don't change a thing (unless we already expanded to get an unanchored read).
- break;
- }
-
- if ((frgbgn <= intbgn) && (intbgn <= frgbgn + minOverlap / 2)) {
- // Not anchored, expand the region to this location
-#ifdef VERBOSE_REGION_FITTING
- writeLog("markRepeats()-- region["F_U32"].bgn expands from "F_U32" to "F_U32" at frag "F_U32"\n", i, aligned.lo(i), frgbgn, frg->ident);
-#endif
- aligned.lo(i) = frgbgn;
- break;
- }
-
- if (intbgn <= frgbgn) {
- // First read begin that is inside the repeat region
-#ifdef VERBOSE_REGION_FITTING
- writeLog("markRepeats()-- region["F_U32"].bgn contracts from "F_U32" to "F_U32" at frag "F_U32"\n", i, aligned.lo(i), frgbgn, frg->ident);
-#endif
- aligned.lo(i) = frgbgn;
- break;
- }
- }
-
- uint32 newexp = 0, newexpid = UINT32_MAX;
- uint32 newcnt = 0, newcntid = UINT32_MAX;
-
- for (uint32 fi=target->ufpath.size(); fi-- > 0; ) {
- ufNode *frg = &target->ufpath[fi];
- uint32 frgbgn = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 frgend = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- if (intend + minOverlap / 2 < frgend)
- // Anchored.
- continue;
-
- if (frgend == intend) {
- // Perfect!
- //newexpid = UINT32_MAX;
- newcntid = UINT32_MAX;
- break;
- }
-
- if ((intend < frgend) && (frgend <= intend + minOverlap / 2) && (newexp < frgend)) {
- // Not anchored, expand the region to this location (this will pick the largest expansion)
- newexp = frgend;
- newexpid = frg->ident;
- continue;
- }
-
- if ((frgend <= intend) && (newcnt < frgend)) {
- // Pick the largest read end that is within the repeat region
- newcnt = frgend;
- newcntid = frg->ident;
- continue;
- }
-
- if (frgbgn + AS_MAX_READLEN < intend)
- // All done, no more intersections possible
- break;
- }
-
- // Expand the region if one was found, otherwise contract if one was found.
-
- if (newexpid != UINT32_MAX) {
-#ifdef VERBOSE_REGION_FITTING
- writeLog("markRepeats()-- region["F_U32"].end expands from "F_U32" to "F_U32" at frag "F_U32"\n", i, aligned.hi(i), newexp, newexpid);
-#endif
- aligned.hi(i) = newexp;
- }
-
- else if (newcntid != UINT32_MAX) {
-#ifdef VERBOSE_REGION_FITTING
- writeLog("markRepeats()-- region["F_U32"].end contracts from "F_U32" to "F_U32" at frag "F_U32"\n", i, aligned.hi(i), newcnt, newcntid);
-#endif
- aligned.hi(i) = newcnt;
- }
- }
-
-
- {
- uint32 nc = 0;
-
- for (uint32 i=0; i<aligned.numberOfIntervals(); i++)
- if (aligned.hi(i) < aligned.lo(i))
- nc++;
-
- writeLog("markRepeats()-- filtered "F_U32" repeat regions after picking read endpoints, now with "F_U32" repeat regions.\n",
- nc, aligned.numberOfIntervals() - nc);
- }
-
- //
- // Discard the interval if there is a fragment that contains it with enough overhang to
- // unambiguously place it in a unitig.
- //
-
- for (uint32 i=0; i<aligned.numberOfIntervals(); i++) {
- ufNode *containedIn = NULL;
-
- // If the region is backwards, then the region is contained in a read. The easiest case to
- // argue is:
- //
- // ------------
- // -------RRR------
- // --------------
- //
- // The read with the repeat region is anchored on both sides, so it is contracted to the next
- // begin (for the start) and the previous end for the end)
- //
- if (aligned.hi(i) < aligned.lo(i)) {
- //writeLog("markRepeats()-- repeat alignment "F_U64","F_U64" DISCARD - CONTRACTED TO NULL\n",
- // aligned.lo(i), aligned.hi(i));
- continue;
- }
-
- // Ensure that reads not near the end of the unitig have enough non-repeat sequence to anchor the read in this location.
- // This is done by increasing the size of the repeat.
-
- uint32 rptbgn = aligned.lo(i);
- uint32 rptend = aligned.hi(i);
-
- uint32 unique = minOverlap / 2;
-
- bool bgnFull = true;
- bool endFull = true;
-
- if (unique <= rptbgn) {
- bgnFull = false;
- rptbgn -= unique;
- }
-
- if (rptend + unique <= tiglen) {
- endFull = false;
- rptend += unique;
- }
-
- // Search for a covering fragment.
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- uint32 frgbgn = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 frgend = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- if (frgend < rptbgn)
- // Fragment is before the region, keep searching for a spanning fragment.
- continue;
-
- if (rptend < frgbgn)
- // Fragment is after the region, we're finished.
- break;
-
- if ((frgbgn <= rptbgn) &&
- (rptend <= frgend)) {
- // Fragment contains the region with acceptable overhangs into the non-repeat area.
- containedIn = frg;
- break;
- }
- }
-
- if (containedIn != NULL) {
- filteredCovered++;
- //writeLog("markRepeats()-- repeat alignment %s"F_U64","F_U64"%s DISCARD - CONTAINED IN FRAGMENT "F_U32" "F_S32","F_S32"\n",
- // (bgnFull ? "(end) " : ""), aligned.lo(i), aligned.hi(i), (endFull ? " (end)" : ""),
- // containedIn->ident, containedIn->position.bgn, containedIn->position.end);
- continue;
- }
-
- if ((regions.size() == 0) ||
- (regions.back().end + 100 < aligned.lo(i))) {
- regions.push_back(repeatRegion(aligned.lo(i), aligned.hi(i)));
- //writeLog("markRepeats()-- repeat alignment "F_U32","F_U32"\n",
- // regions.back().bgn, regions.back().end);
-
- } else {
- regions.back().bgn = MIN(regions.back().bgn, aligned.lo(i));
- regions.back().end = MAX(regions.back().end, aligned.hi(i));
- //writeLog("markRepeats()-- repeat alignment "F_U32","F_U32" (merged from "F_U64","F_U64")\n",
- // regions.back().bgn, regions.back().end,
- // aligned.lo(i), aligned.hi(i));
- }
- }
-
- writeLog("markRepeats()-- filtered %u repeat regions contained in a read, now with %u repeat regions\n",
- filteredCovered, regions.size());
-}
-
-
-
-
-// Unitig fragment is completely within the repeat interval, or is close enough to the edge that
-// maybe it couldn't be placed uniquely.
-//
-void
-markRepeats_findFragsInRegions(Unitig *target,
- vector<repeatRegion> ®ions,
- set<uint32> &rptFrags,
- set<uint32> &UNUSED(ejtFrags),
- uint32 minOverlap) {
-
- for (uint32 i=0; i<regions.size(); i++) {
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- uint32 bgn = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 end = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- if (regions[i].bgn == bgn)
- regions[i].rujBgn = repeatUniqueBreakPoint(regions[i].bgn,
- FragmentEnd(frg->ident, end < bgn),
- false);
-
- if (regions[i].end == end)
- regions[i].rujEnd = repeatUniqueBreakPoint(regions[i].end,
- FragmentEnd(frg->ident, bgn < end),
- true);
-
- // If the read has at least minOverlap/2 bases outside the repeat, assume it
- // is placed correctly.
- if ((bgn + minOverlap/2 < regions[i].bgn) ||
- (regions[i].end + minOverlap/2 < end))
- continue;
-
- // Otherwise, the read is 'contained' in a repeat region. Remember it for later processing.
- rptFrags.insert(frg->ident);
-
- // Read is unanchored in a repeat region, toss it out, but place it with the mate.
- //ejtFrags.insert(frg->ident);
- }
- }
-}
-
-
-
-
-
-
-// Find any junctions in a region, and append them to list of fragments to split on.
-// This takes multiple lines of evidence (pointing to the same fragment end) and
-// combines them into a list of break points.
-void
-markRepeats_filterJunctions(Unitig *target,
- vector<repeatRegion> ®ions,
- vector<repeatJunctionEvidence> &evidence,
- vector<repeatUniqueBreakPoint> &breakpoints) {
-
- map<FragmentEnd,uint32> brkFrags;
- map<FragmentEnd,repeatUniqueBreakPoint> brkJunct;
-
- // Add breakpoints for each of the end points of the repeat region.
-
- for (uint32 rr=0; rr<regions.size(); rr++) {
- FragmentEnd &rujBgnFrg = regions[rr].rujBgn.breakFrag;
- FragmentEnd &rujEndFrg = regions[rr].rujEnd.breakFrag;
-
- if (rujBgnFrg.fragId() > 0) {
- brkFrags[rujBgnFrg] = REGION_END_WEIGHT;
- brkJunct[rujBgnFrg] = regions[rr].rujBgn;
- }
-
- if (rujEndFrg.fragId() > 0) {
- brkFrags[rujEndFrg] = REGION_END_WEIGHT;
- brkJunct[rujEndFrg] = regions[rr].rujEnd;
- }
- }
-
- //
-
- sort(evidence.begin(), evidence.end());
-
- for (uint32 ai=0, bi=0; ai<evidence.size(); ai++) {
- repeatUniqueBreakPoint ruj;
-
- if (evidence[ai].is3 == false) {
- assert(evidence[ai].uncovered5bgn < evidence[ai].uncovered5end);
- ruj = repeatUniqueBreakPoint(evidence[ai].point, evidence[ai].tigFrag, false);
- } else {
- assert(evidence[ai].uncovered3bgn < evidence[ai].uncovered3end);
- ruj = repeatUniqueBreakPoint(evidence[ai].point, evidence[ai].tigFrag, true);
- }
-
- // Try to associate this junction with one of the repeat regions. If there is no region,
- // this is NOT a junction we care about. The region must have been contained in a fragment.
-
- while ((bi < regions.size()) &&
- (regions[bi].end < ruj.point))
- // Advance the region until it ends after the point.
- bi++;
-
- // If this point is in the region, the region bgn will be lower (or equal) than the point. We
- // already ensured that the region end is after the point.
-
- if ((bi >= regions.size()) ||
- (ruj.point < regions[bi].bgn))
- continue;
-
- assert(regions[bi].bgn <= ruj.point);
- assert(ruj.point <= regions[bi].end);
-
- // A new valid break point.
-
- // NOTE! ruj's seem to be different. We used to save the 5th ruj, and switching to saving the
- // last showed differences in position (ruj.point) of the break. The point comes directly from
- // the evidence[] above, so no surprise.
-
- brkFrags[evidence[ai].tigFrag]++;
- brkJunct[evidence[ai].tigFrag] = ruj;
- }
-
- for (map<FragmentEnd,uint32>::iterator it=brkFrags.begin(); it != brkFrags.end(); it++) {
- uint32 cnt = brkFrags[it->first];
- repeatUniqueBreakPoint ruj = brkJunct[it->first];
-
- if (cnt < ISECT_NEEDED_TO_BREAK)
- continue;
-
- breakpoints.push_back(ruj);
- }
-
- sort(breakpoints.begin(), breakpoints.end());
-
- writeLog("markRepeats()-- unitig %d has "F_SIZE_T" interesting junctions:\n",
- target->id(), breakpoints.size());
-
- for (uint32 ji=0; ji<breakpoints.size(); ji++)
- writeLog("markRepeats()-- junction["F_U32"] at "F_U32"/%c' position "F_U32" repeat %s count "F_U32"\n",
- ji,
- breakpoints[ji].breakFrag.fragId(), breakpoints[ji].breakFrag.frag3p() ? '3' : '5',
- breakpoints[ji].point,
- breakpoints[ji].rptLeft ? "<-" : "->",
- brkFrags[breakpoints[ji].breakFrag]);
-}
-
-
-
-
-void
-markRepeats_breakUnitigs(UnitigVector &unitigs,
- double erateRepeat,
- Unitig *target,
- vector<overlapPlacement> &UNUSED(places),
- vector<repeatUniqueBreakPoint> &breakpoints,
- set<uint32> &jctFrags,
- set<uint32> &rptFrags,
- set<uint32> &ejtFrags) {
-
- jctFrags.clear();
-
- if (breakpoints.size() == 0)
- return;
-
- uint32 *breakID = new uint32 [target->ufpath.size()];
-
- uint32 nextBreakPoint = 0;
- uint32 curr = 1;
- uint32 next = 2;
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode *frg = &target->ufpath[fi];
- uint32 bgn = (frg->position.bgn < frg->position.end) ? frg->position.bgn : frg->position.end;
- uint32 end = (frg->position.bgn < frg->position.end) ? frg->position.end : frg->position.bgn;
-
- // If out of breakpoints, put all the remaining reads into the current tig.
- if (nextBreakPoint >= breakpoints.size()) {
- breakID[fi] = curr;
-
- } else if (breakpoints[nextBreakPoint].rptLeft == false) {
- // Repeat to the right. If the fragment starts at or after the junction, place this and
- // future fragments into a new (repeat) unitig.
-
- if (breakpoints[nextBreakPoint].point <= bgn) {
- nextBreakPoint++;
- curr++;
- next++;
- jctFrags.insert(frg->ident);
- }
-
- // If the fragment ends after the next junction, this fragment goes to the next
- // unitig. Otherwise, to the current one.
- //
- if ((nextBreakPoint < breakpoints.size()) &&
- (breakpoints[nextBreakPoint].point < end) &&
- (breakpoints[nextBreakPoint].rptLeft == true)) {
- breakID[fi] = next;
- } else {
- breakID[fi] = curr;
- }
-
- } else {
- // Repeat to the left. If the fragment ends before the junction, move to the current unitig
-
- if (end < breakpoints[nextBreakPoint].point) {
- breakID[fi] = curr;
- jctFrags.insert(frg->ident);
- } else {
- breakID[fi] = next;
- }
-
- // Once we pass the junction, update pointers. We are out of the repeat interval now.
- if (breakpoints[nextBreakPoint].point < bgn) {
- nextBreakPoint++;
- curr++;
- next++;
- }
- }
- }
-
- // Append new unitigs.
-
- vector<Unitig *> newTigs;
- Unitig **uidToUnitig = new Unitig * [next + 1];
- uint32 *uidToOffset = new uint32 [next + 1];
-
- memset(uidToUnitig, 0, sizeof(Unitig *) * (next + 1));
- memset(uidToOffset, 0, sizeof(uint32) * (next + 1));
-
- for (uint32 fi=0; fi<target->ufpath.size(); fi++) {
- ufNode &frg = target->ufpath[fi];
- uint32 bid = breakID[fi];
-
- if (ejtFrags.count(frg.ident) > 0) {
- writeLog("markRepeats()-- EJECT unanchored frag %u from unitig %u\n",
- frg.ident, target->id());
- target->removeFrag(frg.ident);
- continue;
- }
-
- if (uidToUnitig[bid] == NULL) {
- uidToUnitig[bid] = unitigs.newUnitig(false); // Add a new unitig to the unitigs list
- //uidToUnitig[bid]->_isRepeat = true;
-
- uidToOffset[bid] = -MIN(frg.position.bgn, frg.position.end);
-
- newTigs.push_back(uidToUnitig[bid]); // For reporting below.
- }
-
- uidToUnitig[bid]->addFrag(frg, uidToOffset[bid], false);
- }
-
- delete [] breakID;
- delete [] uidToUnitig;
- delete [] uidToOffset;
-
-
- if (newTigs.size() > 0) {
- writeLog("markRepeats()-- SPLIT unitig %d of length %u with %ld fragments into "F_SIZE_T" unitigs:\n",
- target->id(), target->getLength(), target->ufpath.size(),
- newTigs.size());
-
- for (uint32 ti=0; ti<newTigs.size(); ti++) {
- Unitig *tig = newTigs[ti];
- uint32 nRept = 0;
- uint32 nUniq = 0;
-
- for (uint32 fi=0; fi < tig->ufpath.size(); fi++) {
- ufNode &frg = tig->ufpath[fi];
-
- if (rptFrags.count(frg.ident) == 1)
- nRept++;
- else
- nUniq++;
- }
-
- if (nRept > nUniq)
- tig->_isRepeat = true;
-
- writeLog("markRepeats()-- unitig %u of length %u with %ld fragments (%u %.4f repeat and %u %.4f non-repeat).\n",
- tig->id(),
- tig->getLength(),
- tig->ufpath.size(),
- nRept, (double)nRept / (nRept + nUniq),
- nUniq, (double)nUniq / (nRept + nUniq));
- }
-
- writeLog("markRepeats()-- DELETE unitig %d\n", target->id());
- unitigs[target->id()] = NULL;
- delete target;
- }
-
- // Run back over the ejected frags, and place them at their best location.
-
- for (set<uint32>::iterator it=ejtFrags.begin(); it!=ejtFrags.end(); it++) {
- writeLog("markRepeats()-- EJECT frag "F_U32"\n", *it);
- placeFragInBestLocation(unitigs, erateRepeat, *it);
- }
-
- writeLog("markRepeats()-- FINISHED.\n");
-}
-
-
-
-void
-markRepeats_shatterRepeats(UnitigVector &unitigs,
- set<uint32> &jctFrags,
- set<uint32> &rptFrags) {
-
- // For each junction read (defined to be the first/last read in a repeat unitig), shatter the unitig.
-
- for (set<uint32>::iterator it=jctFrags.begin(); it!=jctFrags.end(); it++) {
- uint32 iid = *it;
- uint32 ti = Unitig::fragIn(iid);
- Unitig *rpt = unitigs[ti];
-
- if ((ti == 0) || (rpt == NULL))
- // Already shattered?
- continue;
-
- writeLog("markRepeats()-- shatter unitig %u with "F_SIZE_T" fragments from repeat frag %u\n",
- ti, rpt->ufpath.size(), iid);
-
- for (uint32 fi=0; fi<rpt->ufpath.size(); fi++)
- rpt->removeFrag(rpt->ufpath[fi].ident);
-
- unitigs[ti] = NULL;
- delete rpt;
- }
-
- // For each repeat read (defined to be a read contained nearly entirely in a repeat region), count
- // the number that are still in a unitig.
-
- for (set<uint32>::iterator it=rptFrags.begin(); it!=rptFrags.end(); it++) {
- uint32 iid = *it;
- uint32 ti = Unitig::fragIn(iid);
- Unitig *rpt = unitigs[ti];
-
- if ((ti == 0) || (rpt == NULL))
- // Already shattered?
- continue;
-
- writeLog("markRepeats()-- frag "F_U32" covered by repeats, but still in unitig %d\n",
- iid, ti);
- }
-}
-
-
-
-
-// Build a list of all the fragments that have overlaps to some fragment in this unitig.
-// Exclude from the list fragments that are already in this unitig. We expect these fragments
-// to have multiple overlaps to the unitig, and we want to examine each one only once.
-//
-// Use placeFragUsingOverlaps() to place each of these fragments. Ideally, we'd restrict to just
-// this unitig, but for now we need to filter the results. Three results:
-//
-// o Fragment is fully contained in this unitig. Why isn't it a bubble? It could be contained
-// completely in a repeat, and the repeat is in two different unitigs.
-//
-// o Fragment is partially aligned. This could be indicating a repeat or chimera that we should
-// be splitting. We save the location of the break, and direction of the unaligned piece.
-//
-// After all fragments are 'placed' the list of breaks is examined.
-//
-// o A chimer will induce about as many breaks as the local depth. Breaks will be on both
-// sides of the chimeric point.
-//
-// o A repeat will have many breaks on one side, and few to none on the other side.
-//
-// o A spur will look like a repeat, but at the end of a unitig, with few to no fragments
-// following.
-//
-void
-markRepeats(UnitigVector &unitigs,
- double erateRepeat,
- Unitig *target,
- uint32 minOverlap,
- bool shatterRepeats) {
-
- set<uint32> ovlFrags;
- set<uint32> rptFrags; // Frag IIDs of fragments covered by repeat alignments
- set<uint32> jctFrags; // Frag IIDs of the first/last fragment in a repeat unitig
- set<uint32> ejtFrags; // Frag IIDs of frags we should eject instead of split
-
- double meanError = 0;
- double stddevError = 0;
-
- intervalList<int32> aligned;
-
- vector<overlapPlacement> places;
-
- vector<repeatRegion> regions;
- vector<repeatJunctionEvidence> evidence;
-
- vector<repeatUniqueBreakPoint> breakpoints;
-
- // Build a list of all the fragments that have overlaps to this unitig.
- markRepeats_buildOverlapList(target, erateRepeat, ovlFrags);
-
- // Decide what a decent alignment should look like.
- markRepeats_computeUnitigErrorRate(unitigs, erateRepeat, target, meanError, stddevError);
-
- // For each overlapping fragment, place it and process.
- markRepeats_placeAndProcessOverlaps(unitigs, erateRepeat, target, meanError, stddevError, ovlFrags, aligned, evidence);
-
- // Convert 'aligned' into regions, throwing out weak ones and those contained in a fragment.
- markRepeats_filterIntervalsSpannedByFragment(target, aligned, regions, minOverlap);
-
- markRepeats_findFragsInRegions(target, regions, rptFrags, ejtFrags, minOverlap);
-
- // Discard junctions that are not in a remaining region.
- markRepeats_filterJunctions(target, regions, evidence, breakpoints);
-
- // Split at whatever junctions remain.
-
- // You'd think declaring this a critical region would work, but it resulted in deadlock on
- // Linux 2.6.32-279.22.1.el6.x86_64
- // g++ (GCC) 4.7.1
- //#pragma omp critical
-
- omp_set_lock(&markRepeat_breakUnitigs_Lock);
- markRepeats_breakUnitigs(unitigs, erateRepeat, target, places, breakpoints, jctFrags, rptFrags, ejtFrags);
- omp_unset_lock(&markRepeat_breakUnitigs_Lock);
-
- // For each repeat unitig, shatter into fragments (not singleton unitigs) so we can later re-BOG.
- if (shatterRepeats)
- markRepeats_shatterRepeats(unitigs, jctFrags, rptFrags);
-}
-
-
-
-
-//void
-//markChimera(UnitigVector &unitigs,
-// double erateRepeat,
-// Unitig *target) {
-//}
-
-
-
-void
-mergeSplitJoin(UnitigVector &unitigs,
- double UNUSED(erateGraph), double erateBubble, double UNUSED(erateMerge), double erateRepeat,
- const char *prefix,
- uint32 minOverlap,
- bool shatterRepeats,
- uint64 genomeSize) {
-
- //logFileFlags |= LOG_PLACE_FRAG;
- //logFileFlags &= ~LOG_PLACE_FRAG;
-
- // BUILD A LIST OF ALL INTERSECTIONS - build a reverse mapping of all BestEdges that are between
- // unitigs. For each fragment, we want to have a list of the incoming edges from other unitigs.
-
- intersectionList *ilist = new intersectionList(unitigs);
-
- //ilist->logIntersections();
-
-#if 0
- {
- Unitig *target = unitigs[5];
-
- writeLog("popBubbles()-- WORKING on unitig %d/"F_SIZE_T" with %ld fragments.\n",
- target->id(), unitigs.size(), target->ufpath.size());
-
- mergeBubbles(unitigs, erateBubble, target, ilist);
- //stealBubbles(unitigs, erateBubble, target, ilist);
- markRepeats(unitigs, erateRepeat, target, minOverlap, shatterRepeats);
- //markChimera(unitigs, erateRepeat, target);
- exit(1);
- }
-#endif
-
- // Bubble popping
- //
- // This used to be done right before each unitig was examined for repeats.
- // It cannot be done in parallel -- there is a race condition when both unitigs
- // A and B are considering merging in unitig C.
-
- setLogFile(prefix, "popBubbles");
- writeLog("popBubbles()-- working on "F_U64" unitigs.\n", unitigs.size());
-
- for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *target = unitigs[ti];
-
- if ((target == NULL) ||
- (target->ufpath.size() < 1) || // was 15
- (target->getLength() < 300))
- continue;
-
- //writeLog("popBubbles()-- WORKING on unitig %d/"F_SIZE_T" of length %u with %ld fragments.\n",
- // target->id(), unitigs.size(), target->getLength(), target->ufpath.size());
-
- mergeBubbles(unitigs, erateBubble, target, ilist);
- //stealBubbles(unitigs, erateBubble, target, ilist);
- }
-
- reportOverlapsUsed(unitigs, prefix, "popBubbles");
- reportUnitigs(unitigs, prefix, "popBubbles", genomeSize);
-
- // Since we create new unitigs for any of the splits, we need to remember
- // where to stop. We don't want to re-examine any of the split unitigs.
- // Reevaluating seems to just trim off a few fragments at the end of the unitig.
-
- uint32 tiLimit = unitigs.size();
- uint32 numThreads = omp_get_max_threads();
- uint32 blockSize = (tiLimit < 100000 * numThreads) ? numThreads : tiLimit / 99999;
-
- setLogFile(prefix, "mergeSplitJoin");
- writeLog("repeatDetect()-- working on "F_U32" unitigs, with "F_U32" threads.\n", tiLimit, numThreads);
-
- omp_init_lock(&markRepeat_breakUnitigs_Lock);
-
-#pragma omp parallel for schedule(dynamic, blockSize)
- for (uint32 ti=0; ti<tiLimit; ti++) {
- Unitig *target = unitigs[ti];
-
- if ((target == NULL) ||
- (target->ufpath.size() < 15) ||
- (target->getLength() < 300))
- continue;
-
- //writeLog("repeatDetect()-- WORKING on unitig %d/"F_SIZE_T" of length %u with %ld fragments.\n",
- // target->id(), unitigs.size(), target->getLength(), target->ufpath.size());
-
- markRepeats(unitigs, erateRepeat, target, minOverlap, shatterRepeats);
- //markChimera(unitigs, erateRepeat, target);
- }
-
- omp_destroy_lock(&markRepeat_breakUnitigs_Lock);
-
- reportOverlapsUsed(unitigs, prefix, "mergeSplitJoin");
- reportUnitigs(unitigs, prefix, "mergeSplitJoin", genomeSize);
-
- // JOIN EXPOSED BEST - after bubbles are stolen, this should leave some unitigs
- // with exposed best edges that can now be connected.
-
- // do we need to re-mark repeats after joining?
-
- // SPLIT MARKED REPEATS -
-
- // SPLIT MARKED CHIMERA -
-
- // MERGE LEFTOVERS - these are the leftover pieces after repeats/chimera are split. Hopefully
- // they'll just be low coverage spurs
-
- delete ilist;
-
- logFileFlags &= ~LOG_PLACE_FRAG;
-}
diff --git a/src/bogart/AS_BAT_MergeSplitJoin.H b/src/bogart/AS_BAT_MergeSplitJoin.H
deleted file mode 100644
index 6b872f4..0000000
--- a/src/bogart/AS_BAT_MergeSplitJoin.H
+++ /dev/null
@@ -1,57 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_MergeSplitJoin.H
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2011-FEB-15 to 2013-AUG-01
- * are Copyright 2011-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-DEC-19 to 2015-JUN-03
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#ifndef INCLUDE_AS_BAT_MERGESPLITJOIN
-#define INCLUDE_AS_BAT_MERGESPLITJOIN
-
-void
-mergeSplitJoin(UnitigVector &unitigs,
- double erateGraph, double erateBubble, double erateMerge, double erateRepeat,
- const char *prefix,
- uint32 minOverlap,
- bool shatterRepeats,
- uint64 genomeSize);
-
-void
-reconstructRepeats(UnitigVector &unitigs,
- double erateGraph);
-
-void
-promoteToSingleton(UnitigVector &unitigs);
-
-
-#endif // INCLUDE_AS_BAT_MERGESPLITJOIN
diff --git a/src/bogart/AS_BAT_MergeUnitigs.C b/src/bogart/AS_BAT_MergeUnitigs.C
new file mode 100644
index 0000000..e513550
--- /dev/null
+++ b/src/bogart/AS_BAT_MergeUnitigs.C
@@ -0,0 +1,246 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-MAY-17
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_OverlapCache.H"
+#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
+#include "AS_BAT_PlaceFragUsingOverlaps.H"
+
+#include "intervalList.H"
+#include "stddev.H"
+
+#include <vector>
+
+using namespace std;
+
+
+
+
+void
+mergeUnitigs_findPlacements(UnitigVector &unitigs,
+ ufNode *rd,
+ double deviation,
+ vector<overlapPlacement> &validPlacements) {
+ vector<overlapPlacement> placements;
+
+ placeFragUsingOverlaps(unitigs, AS_MAX_ERATE, NULL, rd->ident, placements);
+
+ for (uint32 pi=0; pi<placements.size(); pi++) {
+ Unitig *tig = unitigs[placements[pi].tigID];
+
+ uint32 bgn = placements[pi].position.min();
+ uint32 end = placements[pi].position.max();
+
+ double erate = placements[pi].errors / placements[pi].aligned;
+
+ if ((rd->position.min() < end) && (bgn < rd->position.max())) // Ignore placements to the same place
+ continue;
+
+ if ((placements[pi].fCoverage < 0.99) || // Ignore partially placed reads.
+ (tig->ufpath.size() == 1)) { // Ignore placements in singletons.
+ //writeLog("read %8u tig %6u (%8u-%8u) placed -- tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f) - LOW COV or SINGLETON\n",
+ // rd->ident, Unitig::fragIn(rd->ident), rd->position.bgn, rd->position.end,
+ // placements[pi].tigID, tig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+ continue;
+ }
+
+ if (tig->overlapConsistentWithTig(deviation, bgn, end, erate) < 0.5) {
+ //if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
+ // writeLog("read %8u tig %6u (%8u-%8u) placed -- tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f) - HIGH ERROR\n",
+ // rd->ident, Unitig::fragIn(rd->ident), rd->position.bgn, rd->position.end,
+ // placements[pi].tigID, tig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+ continue;
+ }
+
+ writeLog("read %8u tig %6u (%8u-%8u) placed -- tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n",
+ rd->ident, Unitig::fragIn(rd->ident), rd->position.bgn, rd->position.end,
+ placements[pi].tigID, tig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+
+ validPlacements.push_back(placements[pi]);
+ }
+}
+
+
+
+
+
+void
+mergeUnitigs(UnitigVector &unitigs,
+ double deviation,
+ bool findCircularTigs) {
+
+
+ // For every tig, decide if it can merge, end-to-end, with some other tig. This operation
+ // should occur before bubbles are popped (so that whatever we chop off can be popped as a
+ // bubble) and repeats are split (so that whatever we join can be split if it's not supported).
+ //
+ // The basic idea is that the end read on each tig should align to the middle of the other tig.
+ // If the reads between those also align, we can merge. If they do not align, we should split
+ // off one end and join. The split off end is either a bubble, or we made bad joins and will
+ // end up with four pieces after repeat breaking.
+ //
+ // -----------------------------------
+ // ^^^^ ----
+ // |||| ||||
+ // ---- vvvv
+ // ----------------------------------
+ //
+ // This is the same basic operation as for finding circular tigs, and those
+ // are found too. However, this should occur after bubbles and repeats.
+
+
+
+
+ // Step 1: For every end read, place it. Save only placements that are full-length and
+ // compatible with the destination tig.
+
+ vector<overlapPlacement> validPlacements;
+
+ for (uint32 ti=0; ti<unitigs.size(); ti++) {
+ Unitig *tig = unitigs[ti];
+
+ if ((tig == NULL) ||
+ (tig->getNumFrags() < 2) ||
+ (tig->_isUnassembled == true))
+ continue;
+
+ ufNode *f = tig->firstRead();
+ ufNode *l = tig->lastRead();
+
+ if (f == l)
+ continue;
+
+ mergeUnitigs_findPlacements(unitigs, f, deviation, validPlacements);
+ mergeUnitigs_findPlacements(unitigs, l, deviation, validPlacements);
+ }
+
+ writeLog("Found "F_SIZE_T" valid placements of end reads.\n", validPlacements.size());
+
+ // Step 2: Find pairs of placements between two tigs.
+
+ vector<pair<uint32, uint32> > potentialCircles;
+ vector<pair<uint32, uint32> > potentialMerges;
+
+ for (uint32 pa=0; pa<validPlacements.size(); pa++) {
+ uint32 paSrcTigID = Unitig::fragIn(validPlacements[pa].frgID);
+ uint32 paDstTigID = validPlacements[pa].tigID;
+
+ for (uint32 pb=pa+1; pb<validPlacements.size(); pb++) {
+ uint32 pbSrcTigID = Unitig::fragIn(validPlacements[pb].frgID);
+ uint32 pbDstTigID = validPlacements[pb].tigID;
+
+ if (validPlacements[pa].frgID == validPlacements[pb].frgID) // Whatever we're trying, we can't use the same read twice.
+ continue;
+
+ if ((paDstTigID == paSrcTigID) && // pa placed in same tig as it came from
+ (pbDstTigID == pbSrcTigID) && // pb placed in same tig as it came from
+ (paSrcTigID == pbSrcTigID)) { // and both placed in same tig
+ potentialCircles.push_back(pair<uint32,uint32>(pa, pb));
+ continue;
+ }
+
+ if ((paDstTigID == pbSrcTigID) && // pa placed in same tig as pb came from
+ (pbDstTigID == paSrcTigID)) { // pb placed in same tig as pa came from
+ potentialMerges.push_back(pair<uint32,uint32>(pa, pb));
+ continue;
+ }
+ }
+ }
+
+ writeLog("Found "F_SIZE_T" potential circular tigs.\n", potentialCircles.size());
+ writeLog("Found "F_SIZE_T" potential joins.\n", potentialMerges.size());
+
+
+ // Step 3: For the potential circles, each read needs to be placed with the same orientation as
+ // its source, and the distance between (paSrc,pbDst) and (pbSrc,paDst) needs to be
+ // (approximately) the same. Then, we should really check the reads between those two points.
+
+ for (uint32 pc=0; pc<potentialCircles.size(); pc++) {
+ uint32 pa = potentialCircles[pc].first;
+ uint32 pb = potentialCircles[pc].second;
+
+ uint32 paReadID = validPlacements[pa].frgID;
+ uint32 pbReadID = validPlacements[pb].frgID;
+
+ uint32 tigID = validPlacements[pa].tigID; // All reads placed in the same tig, see above.
+ Unitig *tig = unitigs[tigID];
+
+ ufNode *paRead = &tig->ufpath[Unitig::pathPosition(paReadID)];
+ bool paSrcFwd = paRead->position.isForward();
+ bool paDstFwd = validPlacements[pa].position.isForward();
+
+ ufNode *pbRead = &tig->ufpath[Unitig::pathPosition(pbReadID)];
+ bool pbSrcFwd = pbRead->position.isForward();
+ bool pbDstFwd = validPlacements[pb].position.isForward();
+
+ writeLog("TEST CIRCULAR - tig %u - pa=%u pb=%u - reads %u @ %u-%u -> %u-%u and %u @ %u-%u -> %u-%u\n",
+ tigID, pa, pb,
+ paReadID, paRead->position.bgn, paRead->position.end, validPlacements[pa].position.bgn, validPlacements[pa].position.end,
+ pbReadID, pbRead->position.bgn, pbRead->position.end, validPlacements[pb].position.bgn, validPlacements[pb].position.end);
+
+ if ((paSrcFwd != paDstFwd) ||
+ (pbSrcFwd != pbDstFwd)) {
+ writeLog("not circular - orient mismatch for tig %u pa %u pb %u reads %u and %u\n",
+ tigID, pa, pb, paReadID, pbReadID);
+ continue;
+ }
+ }
+
+
+
+ for (uint32 pc=0; pc<potentialMerges.size(); pc++) {
+ uint32 pa = potentialMerges[pc].first;
+ uint32 pb = potentialMerges[pc].second;
+
+ uint32 paReadID = validPlacements[pa].frgID;
+ uint32 pbReadID = validPlacements[pb].frgID;
+
+ uint32 paTigID = validPlacements[pa].tigID; // All reads placed in the same tig, see above.
+ uint32 pbTigID = validPlacements[pb].tigID; // All reads placed in the same tig, see above.
+
+ Unitig *paTig = unitigs[paTigID];
+ Unitig *pbTig = unitigs[pbTigID];
+
+ ufNode *paRead = &paTig->ufpath[Unitig::pathPosition(paReadID)];
+ bool paSrcFwd = paRead->position.isForward();
+ bool paDstFwd = validPlacements[pa].position.isForward();
+
+ ufNode *pbRead = &pbTig->ufpath[Unitig::pathPosition(pbReadID)];
+ bool pbSrcFwd = pbRead->position.isForward();
+ bool pbDstFwd = validPlacements[pb].position.isForward();
+
+ writeLog("TEST JOIN - pa tig %u read %u @ %u-%u -> %u-%u -- pb tig %u read %u @ %u-%u -> %u-%u\n",
+ paTigID, paReadID, paRead->position.bgn, paRead->position.end, validPlacements[pa].position.bgn, validPlacements[pa].position.end,
+ pbTigID, pbReadID, pbRead->position.bgn, pbRead->position.end, validPlacements[pb].position.bgn, validPlacements[pb].position.end);
+ }
+
+
+
+
+ exit(0);
+}
diff --git a/src/AS_UTL/stddev.C b/src/bogart/AS_BAT_MergeUnitigs.H
similarity index 61%
copy from src/AS_UTL/stddev.C
copy to src/bogart/AS_BAT_MergeUnitigs.H
index 2b587b7..a876c76 100644
--- a/src/AS_UTL/stddev.C
+++ b/src/bogart/AS_BAT_MergeUnitigs.H
@@ -13,21 +13,9 @@
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
- * This file is derived from:
- *
- * src/AS_TER/analyzePosMap-libraryFate.C
- *
* Modifications by:
*
- * Brian P. Walenz from 2012-DEC-04 to 2013-SEP-23
- * are Copyright 2012-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2015-APR-10 to 2015-AUG-18
- * are Copyright 2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
+ * Brian P. Walenz beginning on 2016-MAY-17
* are a 'United States Government Work', and
* are released in the public domain
*
@@ -35,7 +23,13 @@
* full conditions and disclaimers for each license.
*/
-#include "stddev.H"
+#ifndef INCLUDE_AS_BAT_MERGE_UNITIGS
+#define INCLUDE_AS_BAT_MERGE_UNITIGS
+
-#include <algorithm>
+void
+mergeUnitigs(UnitigVector &unitigs,
+ double deviation,
+ bool findCircularTigs);
+#endif // INCLUDE_AS_BAT_MERGE_UNITIGS
diff --git a/src/bogart/AS_BAT_Outputs.C b/src/bogart/AS_BAT_Outputs.C
index e7074cd..d763304 100644
--- a/src/bogart/AS_BAT_Outputs.C
+++ b/src/bogart/AS_BAT_Outputs.C
@@ -31,29 +31,33 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-30
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_OverlapCache.H"
#include "AS_BAT_BestOverlapGraph.H"
-#include "AS_BAT_Instrumentation.H"
-
-#include "AS_BAT_Outputs.H"
+#include "AS_BAT_Logging.H"
-//#include "AS_CGB_histo.H"
-//#include "tgStore.H"
+#include "AS_BAT_Unitig.H"
+#include "AS_BAT_PlaceFragUsingOverlaps.H"
+#include "tgStore.H"
-// Massage the Unitig into a MultiAlignT (also used in SplitChunks_CGW.c)
void
unitigToTig(tgTig *tig,
uint32 tigid,
Unitig *utg) {
+ // Initialize the output tig.
+
tig->clear();
tig->_tigID = tigid;
@@ -62,6 +66,8 @@ unitigToTig(tgTig *tig,
tig->_coverageStat = 1.0; // Default to just barely unique
tig->_microhetProb = 1.0; // Default to 100% probability of unique
+ // Set the class.
+
if (utg->_isUnassembled == true)
tig->_class = tgTig_unassembled;
@@ -76,116 +82,17 @@ unitigToTig(tgTig *tig,
tig->_layoutLen = utg->getLength();
- resizeArray(tig->_children, tig->_childrenLen, tig->_childrenMax, utg->ufpath.size(), resizeArray_doNothing);
+ // Transfer reads from the bogart tig to the output tig.
- map<uint32,bool> forward;
- map<uint32,bool> allreads;
-
- // Just for stats, build a map fo the reads in the unitig.
-
- for (uint32 ti=0; ti<utg->ufpath.size(); ti++)
- allreads[utg->ufpath[ti].ident] = true;
-
- // Process all reads.
+ resizeArray(tig->_children, tig->_childrenLen, tig->_childrenMax, utg->ufpath.size(), resizeArray_doNothing);
for (uint32 ti=0; ti<utg->ufpath.size(); ti++) {
ufNode *frg = &utg->ufpath[ti];
- // Remember that we've placed this read, and if it was forward or reverse.
- forward[frg->ident] = (frg->position.bgn < frg->position.end);
-
- // If the first read, just dump it in the unitig with no parent.
- if (ti == 0) {
- tig->addChild()->set(frg->ident, 0, 0, 0, frg->position.bgn, frg->position.end);
- continue;
- }
-
- // Otherwise, find the thickest overlap to any read already placed in the unitig.
-
- uint32 olapsLen = 0;
- BAToverlap *olaps = OC->getOverlaps(frg->ident, AS_MAX_EVALUE, olapsLen);
-
- uint32 tt = UINT32_MAX;
- uint32 ttLen = 0;
- double ttErr = DBL_MAX;
-
- int32 ah = 0;
- int32 bh = 0;
-
- uint32 notPresent = 0; // Potential parent isn't in the unitig
- uint32 notPlaced = 0; // Potential parent isn't placed yet
- uint32 negHang = 0; // Potential parent has a negative hang to a placed read
- uint32 goodOlap = 0;
-
- for (uint32 oo=0; oo<olapsLen; oo++) {
-
- if (allreads.count(olaps[oo].b_iid) == 0) {
- notPresent++;
- continue;
- }
-
- if (forward.count(olaps[oo].b_iid) == 0) { // Potential parent not placed yet
- notPlaced++;
- continue;
- }
-
- uint32 l = FI->overlapLength(olaps[oo].a_iid, olaps[oo].b_iid, olaps[oo].a_hang, olaps[oo].b_hang);
-
- // Compute the hangs, so we can ignore those that would place this read before the parent.
- // This is a flaw somewhere in bogart, and should be caught and fixed earlier.
-
- // Consensus is expecting the have the hangs for the parent read, not this read, and some
- // fiddling is needed to flip the overlap for this:
- // First, swap the reads so it's b-vs-a.
- // Then, flip the overlap if the b read is in the unitig flipped.
-
- int32 ah = (olaps[oo].flipped == false) ? (-olaps[oo].a_hang) : (olaps[oo].b_hang);
- int32 bh = (olaps[oo].flipped == false) ? (-olaps[oo].b_hang) : (olaps[oo].a_hang);
-
- if (forward[olaps[oo].b_iid] == false) {
- swap(ah, bh);
- ah = -ah;
- bh = -bh;
- }
-
- // If the ahang is negative, we flubbed up somewhere, and want to place this read before
- // the parent (even though positions say to place it after, because we sorted by position).
-
- if (ah < 0) {
- //fprintf(stderr, "ERROR: read %u in tig %u has negative ahang from parent read %u, ejected.\n",
- // frg->ident, ti, olaps[oo].b_iid);
- negHang++;
- continue;
- }
-
- // The overlap is good. Count it as such.
-
- goodOlap++;
-
- // If the overlap is worse than the one we already have, we don't care.
-
- if ((l < ttLen) || // Too short
- (ttErr < olaps[oo].erate)) { // Too noisy
- continue;
- }
-
- tt = oo;
- ttLen = l;
- ttErr = olaps[oo].erate;
- }
-
- // If no thickest overlap, we screwed up somewhere. Complain and eject the read.
-
- if (tt == UINT32_MAX) {
- fprintf(stderr, "ERROR: read %u in tig %u has no overlap to any previous read, ejected. %u overlaps total. %u negative hang. %u to read not in tig. %u to read later in tig. %u good overlaps.\n",
- frg->ident, tig->tigID(), olapsLen, negHang, notPresent, notPlaced, goodOlap);
- continue;
- }
-
- tig->addChild()->set(frg->ident, olaps[tt].b_iid, ah, bh, frg->position.bgn, frg->position.end);
+ tig->addChild()->set(frg->ident,
+ frg->parent, frg->ahang, frg->bhang,
+ frg->position.bgn, frg->position.end);
}
-
- //fprintf(stderr, "unitigToTig()-- tig %u has %u children\n", tig->_tigID, tig->_childrenLen);
}
@@ -200,12 +107,6 @@ writeUnitigsToStore(UnitigVector &unitigs,
uint32 frg_count = 0;
uint32 prt_count = 1;
char filename[FILENAME_MAX] = {0};
- uint32 *partmap = new uint32 [unitigs.size()];
-
- // This code closely follows that in AS_CGB_unitigger.c::output_the_chunks()
-
- if (isFinal)
- checkUnitigMembership(unitigs);
// Open up the initial output file
@@ -282,80 +183,325 @@ writeUnitigsToStore(UnitigVector &unitigs,
}
-// For every unitig, report the best overlaps contained in the
-// unitig, and all overlaps contained in the unitig.
-//
-// Wow, this is ancient.
-//
+
+class rawEdge_t {
+public:
+ rawEdge_t(uint32 o, uint32 t, int32 ab, int32 ae, int32 bb, int32 be) {
+ oi = o;
+ tigID = t;
+
+ Abgn = ab;
+ Aend = ae;
+
+ Bbgn = bb;
+ Bend = be;
+ };
+
+ uint32 oi;
+ int32 tigID;
+
+ int32 Abgn; // Overlapping read placement.
+ int32 Aend;
+
+ int32 Bbgn; // Parent placement.
+ int32 Bend;
+
+ bool operator<(rawEdge_t const &that) const {
+ if (tigID != that.tigID)
+ return(tigID < that.tigID);
+
+ return(Abgn < that.Abgn);
+ }
+};
+
+
+
void
-writeOverlapsUsed(UnitigVector &unitigs,
- char *prefix) {
- char N[FILENAME_MAX];
+findUnusedEdges(UnitigVector &unitigs,
+ ufNode *rdA, // Read we're finding edges for
+ bool rdA3p, // Overlaps from the 3' end of the read
+ set<uint32> edgeReads,
+ FILE *EF) {
+
+ uint32 rdAid = rdA->ident;
+ uint32 rdAlen = FI->fragmentLength(rdAid);
+ bool rdAfwd = rdA->isForward();
+ int32 rdAlo = (rdAfwd) ? (rdA->position.bgn) : (rdA->position.end);
+ int32 rdAhi = (rdAfwd) ? (rdA->position.end) : (rdA->position.bgn);
+ uint32 rdAtigID = Unitig::fragIn(rdAid);
+ Unitig *rdAtig = unitigs[rdAtigID];
+
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(rdA->ident, AS_MAX_ERATE, ovlLen);
+
+ vector<rawEdge_t> rawEdges;
+
+ //fprintf(stderr, "WORKING ON read rdA=%u 3p=%d\n", rdA->ident, rdA3p);
+
+ // Over all overlaps for this read, find and report edges to 'edgeReads'. Though
+ // edgeReads should be just one read per tig end, the code below was originally written
+ // to find all edges to all reads, then pick the longest for each cluster.
+
+ for (uint32 oi=0; oi<ovlLen; oi++) {
+ if ((ovl[oi].AisContainer()) || // Not interested in container overlaps.
+ (ovl[oi].AisContained()) || // Allow A-is-contained overlaps? Should be OK, but only really care about dovetails.
+ (ovl[oi].AEndIs3prime() != rdA3p)) // Overlap off the wrong end of A.
+ continue;
+
+ uint32 rdBid = ovl[oi].b_iid;
+ uint32 rdBtigID = Unitig::fragIn(rdBid);
+ Unitig *rdBtig = unitigs[rdBtigID];
- sprintf(N, "%s.unused.best.edges", prefix);
+ if ((rdBtig == NULL) ||
+ (rdBtig->getNumFrags() == 0) || // Not interested in edges to singletons
+ (rdBtig->_isUnassembled == true)) // Or other unassembled crap. rdA filtered outside here.
+ continue;
+
+ if ((rdAtigID != rdBtigID) && // Not to self (circular) and
+ (edgeReads.count(rdBid) == 0)) // not a read we can overlap to.
+ continue;
- FILE *F = fopen(N, "w");
+ ufNode *rdB = &rdBtig->ufpath[ Unitig::pathPosition(rdBid) ];
+ bool rdBfwd = rdB->isForward();
+ int32 rdBlo = (rdBfwd) ? (rdB->position.bgn) : (rdB->position.end);
+ int32 rdBhi = (rdBfwd) ? (rdB->position.end) : (rdB->position.bgn);
- for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *tig = unitigs[ti];
- Unitig *ovl = NULL;
- char tyt = 'C';
+ // Exclude overlaps satisfied in the same tig.
- if (tig == NULL)
+ if ((rdAtigID == rdBtigID) && (rdAlo < rdBhi) && (rdBlo < rdAhi))
continue;
- if (tig->_isUnassembled) tyt = 'U';
- if (tig->_isBubble) tyt = 'B';
- if (tig->_isRepeat) tyt = 'R';
- if (tig->_isCircular) tyt = 'O';
+ // Exclude overlaps that are higher than expected error.
- for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
- ufNode *frg = &tig->ufpath[fi];
- ufNode *oth = NULL;
+ ;
+
+ // Compute the placement of rdA on rdBtig.
- // Report the unused best edge
+ ufNode placed;
+ BestEdgeOverlap edge(ovl[oi]);
- BestEdgeOverlap *be5 = OG->getBestEdgeOverlap(frg->ident, false);
- uint32 rd5 = (be5 == NULL) ? 0 : be5->fragId();
- Unitig *tg5 = (be5 == NULL) ? NULL : unitigs[Unitig::fragIn(rd5)];
- char ty5 = 'C';
+ rdBtig->placeFrag(placed,
+ rdAid,
+ rdA3p,
+ &edge);
- if ((tg5 != NULL) && (tg5->tigID() != tig->tigID())) {
- uint32 ord = Unitig::pathPosition(rd5);
- ufNode *oth = &tg5->ufpath[ord];
+ //writeLog("placed tig %u rdA %u %d-%d on tig %u %d-%d from rdB %u %d-%d oi %u\n",
+ // rdAtigID, rdAid, rdAlo, rdAhi, rdBtigID, placed.position.bgn, placed.position.end, rdBid, rdBlo, rdBhi, oi);
- if (tig->_isUnassembled) ty5 = 'U';
- if (tig->_isBubble) ty5 = 'B';
- if (tig->_isRepeat) ty5 = 'R';
- if (tig->_isCircular) ty5 = 'O';
+ // Save the overlap.
- fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n",
- tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '5',
- be5->ahang(), be5->bhang(),
- tg5->tigID(), ty5, oth->ident, oth->position.bgn, oth->position.end, (be5->frag3p() == false) ? '5' : '3');
+ rawEdges.push_back(rawEdge_t(oi, rdBtigID, placed.position.min(), placed.position.max(), rdBlo, rdBhi));
+ }
+
+ // We've now got a pile of (unsorted) overlaps to reads in other tigs. We need to pick one
+ // overlap (the longest?) from each pile and output it.
+
+ sort(rawEdges.begin(), rawEdges.end());
+
+ // We expect to have a pile of placements that are 'the same', generated by each one of the
+ // overlapping reads in the target tig. We need to group these placements together and pick
+ // one exemplar overlap to output the edge for.
+ //
+ // A complication is caused by large tandem repeats. We can get two distinct placements that
+ // overlap:
+ //
+ // [rrrr][rrrr]
+ // --------------- (rdA aligning to the first and second repeat)
+ // ---------------- (rdA aligning to only the second repeat)
+ //
+ // These are just overlaps, and we don't know that the rest of rdA fails to align.
+ //
+ // Overlaps are sorted by the start of rdA on rdBtig. We'll use the simple and largely unvalidated
+ // heuristic of any placement that starts within 500bp of the last is for the same placement.
+
+ for (uint32 ri=0, rj=0; ri<rawEdges.size(); ri = rj) {
+ for (rj=ri+1; ((rj < rawEdges.size()) &&
+ (rawEdges[rj].tigID == rawEdges[ri].tigID) &&
+ (rawEdges[rj-1].Abgn + 500 >= rawEdges[rj].Abgn)); )
+ rj++;
+
+ // Scan overlaps from ri to rj, retain the thickest.
+
+ //fprintf(stderr, "Scan batch from ri=%u to rj=%u\n", ri, rj);
+
+ uint32 rrMax = 0;
+ int32 rrIdx = INT32_MAX;
+
+ for (uint32 rr=ri; rr<rj; rr++) {
+ int32 olapLen = 0;
+
+ if (rawEdges[rr].Abgn < rawEdges[rr].Bbgn) {
+ assert(rawEdges[rr].Bend >= rawEdges[rr].Abgn);
+ olapLen = rawEdges[rr].Bend - rawEdges[rr].Abgn;
+ } else {
+ assert(rawEdges[rr].Aend >= rawEdges[rr].Bbgn);
+ olapLen = rawEdges[rr].Aend - rawEdges[rr].Bbgn;
}
- BestEdgeOverlap *be3 = OG->getBestEdgeOverlap(frg->ident, true);
- uint32 rd3 = (be3 == NULL) ? 0 : be3->fragId();
- Unitig *tg3 = (be3 == NULL) ? NULL : unitigs[Unitig::fragIn(rd3)];
- char ty3 = 'C';
+ if (rrMax < olapLen) {
+ rrMax = olapLen;
+ rrIdx = rr;
+ }
+ }
+
+ // Emit the edge.
+
+ uint32 oi = rawEdges[rrIdx].oi;
+
+ uint32 rdBid = ovl[oi].b_iid;
+ uint32 rdBtigID = Unitig::fragIn(rdBid);
+ Unitig *rdBtig = unitigs[rdBtigID];
+
+ ufNode *rdB = &rdBtig->ufpath[ Unitig::pathPosition(rdBid) ];
+ bool rdBfwd = rdB->isForward();
+ int32 rdBlo = (rdBfwd) ? (rdB->position.bgn) : (rdB->position.end);
+ int32 rdBhi = (rdBfwd) ? (rdB->position.end) : (rdB->position.bgn);
+
+ char rdAEnd, rdBEnd;
+
+ if (ovl[oi].isDovetail()) {
+ rdAEnd = ovl[oi].AEndIs5prime() ? '5' : '3';
+ rdBEnd = ovl[oi].BEndIs5prime() ? '5' : '3';
+ } else {
+ rdAEnd = ovl[oi].AisContainer() ? 'C' : 'c';
+ rdBEnd = ovl[oi].AisContainer() ? 'c' : 'C';
+ }
+
+ char ori = (ovl[oi].flipped) ? '<' : '>';
+
+ fprintf(EF, "tig %7u %c read %8u at %9u %-9u %8d %c %-8d tig %7u %c read %8u at %9u %-9u\n",
+ rdAtig->_tigID, rdAtig->type(), rdAid, rdA->position.bgn, rdA->position.end,
+ ovl[oi].a_hang, ori, ovl[oi].b_hang,
+ rdBtig->_tigID, rdBtig->type(), rdBid, rdB->position.bgn, rdB->position.end);
+ }
+}
+
+
+
+
+void
+writeUnusedEdges(UnitigVector &unitigs,
+ char *fileprefix) {
+ char filename[FILENAME_MAX] = {0};
+
+ sprintf(filename, "%s.unused.edges", fileprefix);
+ FILE *EF = fopen(filename, "w");
+ if (errno)
+ fprintf(stderr, "Failed to create unused edge output '%s': %s\n", filename, strerror(errno)), exit(1);
+
+ // Find reads we're allowed to find edges to. We can pick either the outer-most non-contained reads,
+ // or just the reads touching the edge of the tig.
+
+ set<uint32> edgeReads; // Reads at the end of the tig
+ set<uint32> nearReads; // Reads close to the end of the tig
- if ((tg3 != NULL) && (tg3->tigID() != tig->tigID())) {
- uint32 ord = Unitig::pathPosition(rd3);
- ufNode *oth = &tg3->ufpath[ord];
- if (tig->_isUnassembled) ty3 = 'U';
- if (tig->_isBubble) ty3 = 'B';
- if (tig->_isRepeat) ty3 = 'R';
- if (tig->_isCircular) ty3 = 'O';
+ // Find the outer-most non-contained reads in each unitig.
- fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n",
- tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '3',
- be3->ahang(), be3->bhang(),
- tg3->tigID(), ty3, oth->ident, oth->position.bgn, oth->position.end, (be3->frag3p() == false) ? '5' : '3');
+#if 0
+ for (uint32 ti=0; ti<unitigs.size(); ti++) {
+ Unitig *tig = unitigs[ti];
+
+ if ((tig == NULL) ||
+ (tig->getNumFrags() == 0) ||
+ (tig->_isUnassembled == true))
+ continue;
+
+ // Find reads at the start of the tig
+
+ for (uint32 ct=0, fi=0; (ct < 5) && (fi < tig->ufpath.size()); fi++) {
+ ufNode *frg = &tig->ufpath[fi];
+
+ if (OG->isContained(frg->ident) == false) {
+ if (ct == 0)
+ edgeReads5e.insert(frg->ident);
+ else
+ nearReads5m.insert(frg->ident);
+ ct++;
+ }
+ }
+
+ // Find reads at the end of the tig
+
+ for (uint32 ct=0, fi=tig->ufpath.size(); (ct < 5) && (fi-- > 0); ) {
+ ufNode *frg = &tig->ufpath[fi];
+
+ if (OG->isContained(frg->ident) == false) {
+ if (ct == 0)
+ edgeReads3e.insert(frg->ident);
+ else
+ nearReads3m.insert(frg->ident);
+ ct++;
}
}
}
+#endif
+
+ // Find the reads at the ends of the tig.
+
+#if 1
+ for (uint32 ti=0; ti<unitigs.size(); ti++) {
+ Unitig *tig = unitigs[ti];
+
+ if ((tig == NULL) ||
+ (tig->getNumFrags() == 0) ||
+ (tig->_isUnassembled == true))
+ continue;
+
+ edgeReads.insert(tig->firstRead()->ident);
+ edgeReads.insert(tig->lastRead()->ident);
+ }
+#endif
+
+
+ // Step through all the unitigs, find all unused overlaps off the ends of the tig.
+
+
+ for (uint32 ti=0; ti<unitigs.size(); ti++) {
+ Unitig *tig = unitigs[ti];
- fclose(F);
+ if ((tig == NULL) ||
+ (tig->getNumFrags() == 0) ||
+ (tig->_isUnassembled == true))
+ continue;
+
+ assert(tig->getLength() > 0);
+
+ // Find the first/last non-contained reads in the tig.
+
+#if 0
+ ufNode *rd5 = &tig->ufpath.front();
+ ufNode *rd3 = &tig->ufpath.back();
+
+ for (uint32 fi=1; (fi < tig->ufpath.size()) && (OG->isContained(rd5->ident) == true); fi++)
+ rd5 = &tig->ufpath[fi];
+
+ for (uint32 fi=tig->ufpath.size()-1; (fi-- > 0) && (OG->isContained(rd3->ident) == true); )
+ rd3 = &tig->ufpath[fi];
+
+ // What to do if either of those reads are contained? If so (then both will be contained; no
+ // dovetail at all) we've swapped the meaning of 5' and 3'.
+
+ if ((OG->isContained(rd5) == true) || (OG->isContained(rd3) == true)) {
+ rd5 = &tig->ufpath.front();
+ rd3 = &tig->ufpath.back();
+ }
+#endif
+
+ // Find the smallest/largest read position - the two reads that are at the end of the tig.
+
+#if 1
+ ufNode *rd5 = tig->firstRead();
+ ufNode *rd3 = tig->lastRead();
+#endif
+
+ // Finally, we probably should be finding just the reads touching the ends of the unitig, not the
+ // first/last non-contained read.
+
+ findUnusedEdges(unitigs, rd5, rd5->isReverse(), edgeReads, EF); // First read, if reverse, find edges off 3' end
+ findUnusedEdges(unitigs, rd3, rd3->isForward(), edgeReads, EF); // Last read, if forward, find edges off 3' end
+ }
+
+ fclose(EF);
}
+
diff --git a/src/bogart/AS_BAT_Outputs.H b/src/bogart/AS_BAT_Outputs.H
index d2e26c1..dad3784 100644
--- a/src/bogart/AS_BAT_Outputs.H
+++ b/src/bogart/AS_BAT_Outputs.H
@@ -38,11 +38,20 @@
#ifndef INCLUDE_AS_BAT_OUTPUTS
#define INCLUDE_AS_BAT_OUTPUTS
-#include "tgStore.H"
+#include "AS_BAT_Unitig.H"
+#include "AS_BAT_UnitigVector.H"
-void unitigToMA(tgTig *tig, uint32 tigid, Unitig *utg);
-void writeUnitigsToStore(UnitigVector &unitigs, char *fileprefix, char *tigStorePath, uint32 fragment_count_target, bool isFinal=true);
-void writeOverlapsUsed(UnitigVector &unitigs, char *fileprefix);
+void
+writeUnitigsToStore(UnitigVector &unitigs,
+ char *fileprefix,
+ char *tigStorePath,
+ uint32 frg_count_target,
+ bool isFinal);
+
+void
+writeUnusedEdges(UnitigVector &unitigs,
+ char *fileprefix);
+
#endif // INCLUDE_AS_BAT_OUTPUTS
diff --git a/src/bogart/AS_BAT_OverlapCache.C b/src/bogart/AS_BAT_OverlapCache.C
index c40dce6..6bfcae7 100644
--- a/src/bogart/AS_BAT_OverlapCache.C
+++ b/src/bogart/AS_BAT_OverlapCache.C
@@ -35,21 +35,32 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-APR-26
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_OverlapCache.H"
-#include "AS_BAT_Unitig.H" // For sizeof(ufNode)
+#include "AS_BAT_BestOverlapGraph.H" // sizeof(BestEdgeOverlap)
+#include "AS_BAT_Unitig.H" // sizeof(ufNode)
+#include "AS_BAT_Logging.H"
#include "memoryMappedFile.H"
#include <sys/types.h>
-#include <sys/sysctl.h>
uint64 ovlCacheMagic = 0x65686361436c766fLLU; //0102030405060708LLU;
+#ifndef __CYGWIN__
+ #ifndef _WIN32
+ #include <sys/sysctl.h>
+ #endif
+#endif
+
#ifdef HW_PHYSMEM
uint64
@@ -107,6 +118,33 @@ OverlapCache::OverlapCache(ovStore *ovlStoreUniq,
bool onlySave,
bool doSave) {
+ _memLimit = 0;
+ _memUsed = 0;
+
+ _storMax = 0;
+ _storLen = 0;
+ _stor = NULL;
+
+ _heaps.clear();
+
+ _cacheMMF = NULL;
+
+ _cachePtr = NULL;
+ _cacheLen = NULL;
+
+ _maxPer = 0;
+
+ _ovsMax = 0;
+ _ovs = NULL;
+ _ovsSco = NULL;
+ _ovsTmp = NULL;
+
+ _threadMax = 0;
+ _thread = NULL;
+
+ _ovlStoreUniq = NULL;
+ _ovlStoreRept = NULL;
+
if (load(prefix, erate) == true)
return;
@@ -135,34 +173,35 @@ OverlapCache::OverlapCache(ovStore *ovlStoreUniq,
uint64 memFI = FI->memoryUsage();
uint64 memBE = FI->numFragments() * sizeof(BestEdgeOverlap);
- uint64 memBC = FI->numFragments() * sizeof(BestContainment);
- uint64 memUL = FI->numFragments() * sizeof(ufNode); // For fragment positions in unitigs
- uint64 memUT = FI->numFragments() * sizeof(uint32) / 16; // For unitigs (assumes 32 frag / unitig)
- uint64 memID = FI->numFragments() * sizeof(uint32) * 2; // For maps of fragment id to unitig id
+ uint64 memUL = FI->numFragments() * sizeof(ufNode); // For fragment positions in unitigs
+ uint64 memUT = FI->numFragments() * sizeof(uint32) / 16; // For unitigs (assumes 32 frag / unitig)
+ uint64 memID = FI->numFragments() * sizeof(uint32) * 2; // For maps of fragment id to unitig id
+ uint64 memEP = FI->numFragments() * Unitig::epValueSize() * 2; // For error profile
+
uint64 memC1 = (FI->numFragments() + 1) * (sizeof(BAToverlapInt *) + sizeof(uint32));
uint64 memC2 = _ovsMax * (sizeof(ovOverlap) + sizeof(uint64) + sizeof(uint64));
uint64 memC3 = _threadMax * _thread[0]._batMax * sizeof(BAToverlap);
uint64 memC4 = (FI->numFragments() + 1) * sizeof(uint32);
+
uint64 memOS = (_memLimit == getMemorySize()) ? (0.1 * getMemorySize()) : 0.0;
- uint64 memTT = memFI + memBE + memBC + memUL + memUT + memID + memC1 + memC2 + memC3 + memC4 + memOS;
+ uint64 memTT = memFI + memBE + memUL + memUT + memID + memC1 + memC2 + memC3 + memC4 + memOS;
if (onlySave) {
fprintf(stderr, "OverlapCache()-- Only saving overlaps, not computing unitigs.\n");
memBE = 0;
- memBC = 0;
memUL = 0;
memUT = 0;
memID = 0;
- memTT = memFI + memBE + memBC + memUL + memUT + memID + memOS + memC1 + memC2 + memC3 + memC4;
+ memTT = memFI + memBE + memUL + memUT + memID + memOS + memC1 + memC2 + memC3 + memC4;
}
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for fragment data.\n", memFI >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for best edges.\n", memBE >> 20);
- fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for best containments.\n", memBC >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for unitig layouts.\n", memUL >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for unitigs.\n", memUT >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for id maps.\n", memID >> 20);
+ fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for error profiles.\n", memEP >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for overlap cache pointers.\n", memC1 >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for overlap cache initial bucket.\n", memC2 >> 20);
fprintf(stderr, "OverlapCache()-- %7"F_U64P"MB for overlap cache thread data.\n", memC3 >> 20);
@@ -221,9 +260,6 @@ OverlapCache::OverlapCache(ovStore *ovlStoreUniq,
_ovsSco = new uint64 [_ovsMax];
_ovsTmp = new uint64 [_ovsMax];
- //_threadMax = omp_get_max_threads();
- //_thread = new OverlapCacheThreadData [_threadMax];
-
_ovlStoreUniq = ovlStoreUniq;
_ovlStoreRept = ovlStoreRept;
@@ -754,6 +790,7 @@ OverlapCache::removeWeakOverlaps(uint32 *minEvalue5p,
+#if 0
double
OverlapCache::findErate(uint32 aIID, uint32 bIID) {
@@ -767,7 +804,7 @@ OverlapCache::findErate(uint32 aIID, uint32 bIID) {
return(1.0);
}
-
+#endif
diff --git a/src/bogart/AS_BAT_OverlapCache.H b/src/bogart/AS_BAT_OverlapCache.H
index ac53029..814c2f4 100644
--- a/src/bogart/AS_BAT_OverlapCache.H
+++ b/src/bogart/AS_BAT_OverlapCache.H
@@ -42,8 +42,12 @@
#ifndef INCLUDE_AS_BAT_OVERLAPCACHE
#define INCLUDE_AS_BAT_OVERLAPCACHE
+#include "AS_global.H"
+#include "ovStore.H"
+#include "gkStore.H"
#include "memoryMappedFile.H"
+
// CA8 used to re-encode the error rate into a smaller-precision number. This was
// confusing and broken (it tried to use a log-based encoding to give more precision
// to the smaller values). CA3g gives up and uses all 12 bits of precision.
@@ -68,7 +72,65 @@ struct BAToverlapInt {
// For working with overlaps, 32 bytes per overlap. This data is copied
// from the overlap storage (from a BAToverlapInt) with the erate expanded,
// and a_iid added.
-struct BAToverlap {
+
+class BAToverlap {
+public:
+ BAToverlap() {
+ };
+ ~BAToverlap() {
+ };
+
+public:
+
+ // Return which end of the read the overlap is on. For 'Ais', the orientation
+ // of B doesn't matter; likewise for 'Bis'.
+ //
+ // If the overlap is a containment relationship, both Is5 and Is3 are false.
+ //
+ bool
+ isDovetail(void) const {
+ return(((a_hang < 0) && (b_hang < 0)) ||
+ ((a_hang > 0) && (b_hang > 0)));
+ };
+
+ bool
+ AEndIs5prime(void) const { // -------->
+ return((a_hang < 0) && (b_hang < 0)); // -------
+ };
+
+ bool
+ AEndIs3prime(void) const { // -------->
+ return((a_hang > 0) && (b_hang > 0)); // -------
+ };
+
+ bool
+ AisContainer(void) const { // -------->
+ return((a_hang >= 0) && (b_hang <= 0)); // ----
+ };
+
+ bool
+ AisContained(void) const { // --->
+ return((a_hang <= 0) && (b_hang >= 0)); // ---------
+ };
+
+ bool
+ BEndIs3prime(void) const {
+ assert(AisContainer() == false); // Function is not defined
+ assert(AisContained() == false); // for containments.
+ return((AEndIs5prime() && (flipped == false)) || // <=== ------>
+ (AEndIs3prime() && (flipped == true))); // ---->
+ };
+
+ bool
+ BEndIs5prime(void) const {
+ assert(AisContainer() == false); // Function is not defined
+ assert(AisContained() == false); // for containments.
+ return((AEndIs5prime() && (flipped == true)) || // ------>
+ (AEndIs3prime() && (flipped == false))); // <=== ---->
+ };
+
+
+public:
int32 a_hang;
int32 b_hang;
@@ -136,7 +198,7 @@ public:
void removeWeakOverlaps(uint32 *minEvalue5p,
uint32 *minEvalue3p);
- double findErate(uint32 aIID, uint32 bIID);
+ //double findErate(uint32 aIID, uint32 bIID);
private:
bool load(const char *prefix, double erate);
@@ -171,4 +233,8 @@ private:
ovStore *_ovlStoreRept;
};
+
+
+extern OverlapCache *OC;
+
#endif // INCLUDE_AS_BAT_OVERLAPCACHE
diff --git a/src/bogart/AS_BAT_PlaceContains.C b/src/bogart/AS_BAT_PlaceContains.C
index 100cbc1..f28b931 100644
--- a/src/bogart/AS_BAT_PlaceContains.C
+++ b/src/bogart/AS_BAT_PlaceContains.C
@@ -35,197 +35,216 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
-#include "AS_BAT_PlaceContains.H"
+#include "AS_BAT_Logging.H"
+#include "AS_BAT_Unitig.H"
+#include "AS_BAT_PlaceContains.H"
#include "AS_BAT_PlaceFragUsingOverlaps.H"
+#define SHOW_PLACEMENT_DETAIL // Reports evidence (too much) for placing reads.
+#define SHOW_PLACEMENT // Reports where the read was placed.
+
void
-placeContainsUsingBestOverlaps(UnitigVector &unitigs) {
- uint32 fragsPlaced = 1;
- uint32 fragsPending = 0;
+breakSingletonTigs(UnitigVector &unitigs) {
- uint32 *nReadsPer = new uint32 [unitigs.size()];
+ // For any singleton unitig, eject the read and delete the unitig. Eventually,
+ // we will stop making singleton unitigs.
- uint32 totalPlaced = 0;
- uint32 totalPlacedInSingleton = 0;
+ uint32 removed = 0;
- logFileFlags &= ~LOG_PLACE_FRAG;
+ for (uint32 ti=1; ti<unitigs.size(); ti++) {
+ Unitig *utg = unitigs[ti];
- for (uint32 ii=0; ii<unitigs.size(); ii++)
- nReadsPer[ii] = (unitigs[ii] == NULL) ? 0 : unitigs[ii]->getNumFrags();
+ if (utg == NULL)
+ continue;
- while (fragsPlaced > 0) {
- fragsPlaced = 0;
- fragsPending = 0;
+ if (utg->ufpath.size() > 1)
+ continue;
- writeLog("==> PLACING CONTAINED FRAGMENTS\n");
+ unitigs[ti] = NULL; // Remove the unitig from the list
+ utg->removeFrag(utg->ufpath[0].ident); // Eject the read
+ delete utg; // Reclaim space
+ removed++; // Count
+ }
- for (uint32 fid=1; fid<FI->numFragments()+1; fid++) {
- BestContainment *bestcont = OG->getBestContainer(fid);
+ writeLog("Removed %u read%s from %u singleton unitig%s.\n",
+ removed, (removed != 1) ? "" : "s",
+ removed, (removed != 1) ? "" : "s");
+}
- if (bestcont->isContained == false)
- // Not a contained fragment.
- continue;
- if (Unitig::fragIn(fid) != 0)
- // Containee already placed.
- continue;
- if (Unitig::fragIn(bestcont->container) == 0) {
- // Container not placed (yet).
- fragsPending++;
- continue;
- }
+void
+placeUnplacedUsingAllOverlaps(UnitigVector &unitigs,
+ const char *prefix) {
+ uint32 fiLimit = FI->numFragments();
+ uint32 numThreads = omp_get_max_threads();
+ uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
- uint32 utgid = Unitig::fragIn(bestcont->container);
- Unitig *utg = unitigs[utgid];
+ uint32 *placedTig = new uint32 [FI->numFragments() + 1];
+ SeqInterval *placedPos = new SeqInterval [FI->numFragments() + 1];
- totalPlaced++;
+ memset(placedTig, 0, sizeof(uint32) * (FI->numFragments() + 1));
+ memset(placedPos, 0, sizeof(SeqInterval) * (FI->numFragments() + 1));
- if (nReadsPer[utgid] == 1)
- totalPlacedInSingleton++;
+ // Just some logging. Count the number of reads we try to place.
- utg->addContainedFrag(fid, bestcont, true); //logFileFlagSet(LOG_INITIAL_CONTAINED_PLACEMENT));
+ uint32 nToPlaceContained = 0;
+ uint32 nToPlace = 0;
+ uint32 nPlacedContained = 0;
+ uint32 nPlaced = 0;
+ uint32 nFailedContained = 0;
+ uint32 nFailed = 0;
- if (utg->id() != Unitig::fragIn(fid))
- writeLog("placeContainsUsingBestOverlaps()-- FAILED to add frag %d to unitig %d.\n", fid, bestcont->container);
- assert(utg->id() == Unitig::fragIn(fid));
+ for (uint32 fid=1; fid<FI->numFragments()+1; fid++)
+ if (Unitig::fragIn(fid) == 0)
+ if (OG->isContained(fid))
+ nToPlaceContained++;
+ else
+ nToPlace++;
+ writeLog("placeContains()-- placing %u contained and %u unplaced reads, with %d threads.\n",
+ nToPlaceContained, nToPlace, numThreads);
- fragsPlaced++;
- }
+ // Do the placing!
- writeLog("placeContainsUsingBestOverlaps()-- Placed %d fragments; still need to place %d\n",
- fragsPlaced, fragsPending);
+#pragma omp parallel for schedule(dynamic, blockSize)
+ for (uint32 fid=1; fid<FI->numFragments()+1; fid++) {
+ bool enableLog = true;
- if ((fragsPlaced == 0) && (fragsPending > 0))
- writeLog("placeContainsUsingBestOverlaps()-- Stopping contained fragment placement due to zombies.\n");
- }
+ if (Unitig::fragIn(fid) > 0)
+ continue;
- writeLog("placeContainsUsingBestOverlaps()-- %u frags placed in unitigs (including singleton unitigs)\n", totalPlaced);
- writeLog("placeContainsUsingBestOverlaps()-- %u frags placed in singleton unitigs\n", totalPlacedInSingleton);
- writeLog("placeContainsUsingBestOverlaps()-- %u frags unplaced\n", fragsPending);
+ // Place the read.
- delete [] nReadsPer;
+ vector<overlapPlacement> placements;
- for (uint32 ti=1; ti<unitigs.size(); ti++) {
- Unitig *utg = unitigs[ti];
+ placeFragUsingOverlaps(unitigs, AS_MAX_ERATE, NULL, fid, placements);
- if (utg)
- utg->sort();
- }
-}
+ // Search the placements for the highest expected identity placement using all overlaps in the unitig.
+ uint32 b = UINT32_MAX;
+ for (uint32 i=0; i<placements.size(); i++) {
+ Unitig *tig = unitigs[placements[i].tigID];
-void
-placeContainsUsingBestOverlaps(Unitig *target, set<uint32> *fragments) {
- uint32 fragsPlaced = 1;
-
- logFileFlags &= ~LOG_PLACE_FRAG;
-
- while (fragsPlaced > 0) {
- fragsPlaced = 0;
-
- for (set<uint32>::iterator it=fragments->begin(); it != fragments->end(); it++) {
- uint32 fid = *it;
- BestContainment *bestcont = OG->getBestContainer(fid);
-
- if ((bestcont->isContained == false) ||
- (Unitig::fragIn(fid) != 0) ||
- (Unitig::fragIn(bestcont->container) == 0) ||
- (Unitig::fragIn(bestcont->container) != target->id()))
- // Not a contained fragment OR
- // Containee already placed OR
- // Container not placed (yet) OR
- // Containee not in the target unitig
+ if (placements[i].fCoverage < 0.99) // Ignore partially placed reads.
+ continue;
+
+ if (tig->ufpath.size() == 1) // Ignore placements in singletons.
continue;
- target->addContainedFrag(fid, bestcont, false);
+ uint32 bgn = (placements[i].position.bgn < placements[i].position.end) ? placements[i].position.bgn : placements[i].position.end;
+ uint32 end = (placements[i].position.bgn < placements[i].position.end) ? placements[i].position.end : placements[i].position.bgn;
+
+ double erate = placements[i].errors / placements[i].aligned;
+
+ if (tig->overlapConsistentWithTig(5.0, bgn, end, erate) < 0.5) {
+ if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
+ writeLog("frag %8u tested tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f) - HIGH ERROR\n",
+ fid, placements[i].tigID, tig->ufpath.size(), placements[i].position.bgn, placements[i].position.end, placements[i].fCoverage, erate);
+ continue;
+ }
- if (target->id() != Unitig::fragIn(fid))
- writeLog("placeContainsUsingBestOverlaps()-- FAILED to add frag %d to unitig %d.\n", fid, bestcont->container);
- assert(target->id() == Unitig::fragIn(fid));
+ if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
+ writeLog("frag %8u tested tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n",
+ fid, placements[i].tigID, tig->ufpath.size(), placements[i].position.bgn, placements[i].position.end, placements[i].fCoverage, erate);
- fragsPlaced++;
+ if ((b == UINT32_MAX) ||
+ (placements[i].errors / placements[i].aligned < placements[b].errors / placements[b].aligned))
+ b = i;
}
- }
- target->sort();
-}
+ // If we didn't find a best, b will be invalid; set positions for adding to a new tig.
+ // If we did, save both the position it was placed at, and the tigID it was placed in.
+ if (b == UINT32_MAX) {
+ if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
+ writeLog("frag %8u remains unplaced\n", fid);
+ placedPos[fid].bgn = 0;
+ placedPos[fid].end = FI->fragmentLength(fid);
+ }
-void
-placeContainsUsingAllOverlaps(UnitigVector &unitigs,
- double erate) {
+ else {
+ if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED)))
+ writeLog("frag %8u placed tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n",
+ fid, placements[b].tigID, unitigs[placements[b].tigID]->ufpath.size(),
+ placements[b].position.bgn, placements[b].position.end,
+ placements[b].fCoverage,
+ placements[b].errors / placements[b].aligned);
+ placedTig[fid] = placements[b].tigID;
+ placedPos[fid] = placements[b].position;
+ }
+ }
- // UNFINISHED. This results in crashes later in the process.
+ // All reads placed, now just dump them in their correct tigs.
for (uint32 fid=1; fid<FI->numFragments()+1; fid++) {
- ufNode frg;
- //ufNode mat;
+ Unitig *tig = NULL;
+ ufNode frg;
if (Unitig::fragIn(fid) > 0)
- // Fragment placed already.
continue;
- frg.ident = fid;
- //mat.ident = 0; //mid;
+ // If not placed, dump it in a new unitig. Well, not anymore. These reads were not placed in
+ // any tig initially, were not allowed to seed a tig, and now, could find no place to go.
+ // They're garbage. Plus, it screws up the logging above because we don't know the new tig ID
+ // until now.
- overlapPlacement frgPlacement;
- //overlapPlacement matPlacement;
+ if (placedTig[fid] == 0) {
+ if (OG->isContained(fid))
+ nFailedContained++;
+ else
+ nFailed++;
- frgPlacement.errors = 4.0e9;
- frgPlacement.aligned = 1;
+ //tig = unitigs.newUnitig(false);
+ }
- //matPlacement.errors = 4.0e9;
- //matPlacement.aligned = 1;
+ // Otherwise, it was placed somewhere, grab the tig.
- vector<overlapPlacement> placements;
+ else {
+ if (OG->isContained(fid))
+ nPlacedContained++;
+ else
+ nPlaced++;
- // Place the read.
+ tig = unitigs[placedTig[fid]];
+ }
- placeFragUsingOverlaps(unitigs, erate, NULL, frg.ident, placements);
+ // Regardless, add it to the tig. Logging for this is above.
- // Search the placements for the highest expect identity placement using all overlaps in the unitig.
+ if (tig) {
+ frg.ident = fid;
+ frg.contained = 0;
+ frg.parent = 0;
+ frg.ahang = 0;
+ frg.bhang = 0;
+ frg.position = placedPos[fid];
- Unitig *frgTig = NULL;
- Unitig *matTig = NULL;
+ tig->addFrag(frg, 0, false);
+ }
+ }
- for (uint32 i=0; i<placements.size(); i++) {
- if (placements[i].fCoverage < 0.99)
- continue;
+ // Cleanup.
- if (placements[i].errors / placements[i].aligned < frgPlacement.errors / frgPlacement.aligned) {
- frgPlacement = placements[i];
- frgTig = unitigs[placements[i].tigID];
- }
- }
+ delete [] placedPos;
+ delete [] placedTig;
- frg.ident = frgPlacement.frgID;
- frg.contained = 0;
- frg.parent = 0;
- frg.ahang = 0;
- frg.bhang = 0;
- frg.position = frgPlacement.position;
- frg.containment_depth = 0;
-
- if ((frg.position.bgn == 0) &&
- (frg.position.end == 0))
- // Failed to place the contained read anywhere. We should probably just make a new unitig
- // for it right here.
- continue;
+ writeLog("placeContains()-- Placed %u contained reads and %u unplaced reads.\n", nPlacedContained, nPlaced);
+ writeLog("placeContains()-- Failed to place %u contained reads (too high error suspected) and %u unplaced reads (lack of overlaps suspected).\n", nFailedContained, nFailed);
- // Add the placed read to the unitig.
+ // But wait! All the tigs need to be sorted. Well, not really _all_, but the hard ones to sort
+ // are big, and those quite likely had reads added to them, so it's really not worth the effort
+ // of tracking which ones need sorting, since the ones that don't need it are trivial to sort.
- writeLog("placeContainsUsingAllOverlaps()-- frag %u placed in tig %u at %u-%u.\n",
- frg.ident, frgTig->id(), frg.position.bgn, frg.position.end);
+ for (uint32 ti=1; ti<unitigs.size(); ti++) {
+ Unitig *utg = unitigs[ti];
- frgTig->addFrag(frg, 0, false);
+ if (utg)
+ utg->sort();
}
}
diff --git a/src/bogart/AS_BAT_PlaceContains.H b/src/bogart/AS_BAT_PlaceContains.H
index 5784cab..c6827f1 100644
--- a/src/bogart/AS_BAT_PlaceContains.H
+++ b/src/bogart/AS_BAT_PlaceContains.H
@@ -38,12 +38,13 @@
#ifndef INCLUDE_AS_BAT_PLACECONTAINS
#define INCLUDE_AS_BAT_PLACECONTAINS
-void placeContainsUsingBestOverlaps(UnitigVector &unitigs);
+#include "AS_BAT_UnitigVector.H"
+
+void breakSingletonTigs(UnitigVector &unitigs);
+
+void placeUnplacedUsingAllOverlaps(UnitigVector &unitigs,
+ const char *prefix);
-void placeContainsUsingBestOverlaps(Unitig *target,
- set<uint32> *fragments);
-void placeContainsUsingAllOverlaps(UnitigVector &unitigs,
- double erate);
#endif // INCLUDE_AS_BAT_PLACECONTAINS
diff --git a/src/bogart/AS_BAT_PlaceFragUsingOverlaps.C b/src/bogart/AS_BAT_PlaceFragUsingOverlaps.C
index 2aa3316..118a1da 100644
--- a/src/bogart/AS_BAT_PlaceFragUsingOverlaps.C
+++ b/src/bogart/AS_BAT_PlaceFragUsingOverlaps.C
@@ -35,10 +35,11 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+#include "AS_BAT_Unitig.H"
#include "AS_BAT_PlaceFragUsingOverlaps.H"
#include "intervalList.H"
@@ -47,248 +48,6 @@
#undef VERBOSE_PLACEMENT
-// Given an implicit fragment -- a ufNode with only the 'ident' set -- this will compute the
-// best placement for the fragment in an existing unitig. ALL overlaps are used, not just
-// the best.
-//
-// Ties are broken using overlap identities or arbitrarily.
-//
-// Returns true if any placement is found, false otherwise.
-//
-
-
-bool
-placeAcontainsB(Unitig *utg, ufNode &frag, BAToverlap &ovl, overlapPlacement &op) {
- BestContainment best;
-
- // The placeFrag() function is expecting the overlap to be from the container to the us fragment,
- // which is opposite the overlap that we have. We need to flip the fragments in the overlap --
- // negate the hangs.
-
- best.container = ovl.b_iid; // Not really the container...
- best.isContained = false; // ...so mark this as a false BestContainment
- best.a_hang = ovl.flipped ? ovl.b_hang : -ovl.a_hang;
- best.b_hang = ovl.flipped ? ovl.a_hang : -ovl.b_hang;
- best.sameOrientation = ovl.flipped ? false : true;
-
- if (utg->placeFrag(frag, &best) == false)
- return(false);
-
- uint32 parentOrd = utg->pathPosition(ovl.b_iid);
- ufNode &parent = utg->ufpath[parentOrd];
-
- op.frgID = frag.ident;
- op.refID = ovl.b_iid;
- op.tigID = utg->id();
- op.position = frag.position;
- op.errors = FI->fragmentLength(ovl.b_iid) * ovl.erate;
- op.covered.bgn = MIN(parent.position.bgn, parent.position.end); // Adjusted by hang later
- op.covered.end = MAX(parent.position.bgn, parent.position.end);
- op.aligned = op.covered.end - op.covered.bgn;
-
- assert(op.covered.bgn < op.covered.end);
-
- // Compute the portion of the unitig that is actually verified by
- // the overlap.
-
- if (op.position.bgn < op.position.end) {
- int32 poslo = op.position.bgn;
- int32 poshi = op.position.end;
-
- assert(op.position.bgn <= op.covered.bgn);
- assert(op.position.bgn <= op.covered.end);
-
- op.covered.bgn -= op.position.bgn;
- op.covered.end -= op.position.bgn;
-
- op.verified.bgn = poslo + op.covered.bgn;
- op.verified.end = poslo + op.covered.end;
-
- if (op.verified.end > poshi)
- op.verified.end = poshi;
-
- assert(op.verified.bgn < op.verified.end);
- assert(poslo <= op.verified.bgn);
- assert(op.verified.end <= poshi);
-
- } else {
- int32 poslo = op.position.end;
- int32 poshi = op.position.bgn;
-
- assert(op.position.end <= op.covered.bgn);
- assert(op.position.end <= op.covered.end);
-
- op.covered.bgn -= op.position.end;
- op.covered.end -= op.position.end;
-
- op.verified.bgn = poslo + op.covered.end;
- op.verified.end = poslo + op.covered.bgn;
-
- if (op.verified.bgn > poshi)
- op.verified.bgn = poshi;
-
- assert(op.verified.end < op.verified.bgn);
- assert(poslo <= op.verified.end);
- assert(op.verified.bgn <= poshi);
- }
-
- // Disallow any placements that exceed the boundary of the unitig. These cannot be confirmed
- // by overlaps and might be wrong. Sample cases:
- // o sticking a unique/repeat fragment onto a repeat (leaving the unique uncovered)
- // o sticking a chimeric fragment onto the end of a unitig (leaving the chimeric join uncovered)
-
- if ((MIN(op.position.bgn, op.position.end) < 0) ||
- (MAX(op.position.bgn, op.position.end) > utg->getLength())) {
-#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- (container) - frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d DISALLOWED\n",
- frag.ident, utg->id(), op.position.bgn, op.position.end, op.verified.bgn, op.verified.end,
- ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang, ovl.flipped,
- op.covered.bgn, op.covered.end);
-#endif
- op = overlapPlacement();
-
- } else {
-#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- (container) - frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d\n",
- frag.ident, utg->id(), op.position.bgn, op.position.end, op.verified.bgn, op.verified.end,
- ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang, ovl.flipped,
- op.covered.bgn, op.covered.end);
-#endif
- }
-
- return(true);
-}
-
-
-
-bool
-placeBcontainsA(Unitig *utg, ufNode &frag, BAToverlap &ovl, overlapPlacement &op) {
- BestContainment best;
-
- best.container = ovl.b_iid;
- best.isContained = true;
- best.a_hang = ovl.flipped ? ovl.b_hang : -ovl.a_hang;
- best.b_hang = ovl.flipped ? ovl.a_hang : -ovl.b_hang;
- best.sameOrientation = ovl.flipped ? false : true;
-
- if (utg->placeFrag(frag, &best) == false)
- return(false);
-
- op.frgID = frag.ident;
- op.refID = ovl.b_iid;
- op.tigID = utg->id();
- op.position = frag.position;
- op.errors = FI->fragmentLength(ovl.a_iid) * ovl.erate;
- op.covered.bgn = 0;
- op.covered.end = FI->fragmentLength(ovl.a_iid);
- op.aligned = op.covered.end - op.covered.bgn;
-
- assert(op.covered.bgn < op.covered.end);
-
- op.verified.bgn = op.position.bgn;
- op.verified.end = op.position.end;
-
- if ((MIN(op.position.bgn, op.position.end) < 0) ||
- (MAX(op.position.bgn, op.position.end) > utg->getLength())) {
-#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- (contained) - frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d DISALLOWED\n",
- frag.ident, utg->id(), op.position.bgn, op.position.end, op.verified.bgn, op.verified.end,
- ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang, ovl.flipped,
- op.covered.bgn, op.covered.end);
-#endif
- op = overlapPlacement();
-
- } else {
-#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- (contained) - frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d\n",
- frag.ident, utg->id(), op.position.bgn, op.position.end, op.verified.bgn, op.verified.end,
- ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang, ovl.flipped,
- op.covered.bgn, op.covered.end);
-#endif
- }
-
- return(true);
-}
-
-
-
-bool
-placeDovetail(Unitig *utg, ufNode &frag, BAToverlap &ovl, overlapPlacement &op) {
- BestEdgeOverlap best;
- int32 plac3, plac5;
- int32 aend3p = AS_BAT_overlapAEndIs3prime(ovl);
-
- best.set(ovl);
-
- if (utg->placeFrag(frag, plac5, (aend3p ? NULL : &best),
- frag, plac3, (aend3p ? &best : NULL)) == false)
- return(false);
-
- uint32 olen = FI->overlapLength(ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang);
- uint32 flen = FI->fragmentLength(ovl.a_iid);
-
- op.frgID = frag.ident;
- op.refID = ovl.b_iid;
- op.tigID = utg->id();
- op.position = frag.position;
- op.errors = olen * ovl.erate;
- op.covered.bgn = (ovl.a_hang < 0) ? 0 : ovl.a_hang;
- op.covered.end = (ovl.b_hang > 0) ? flen : ovl.b_hang + flen;
- op.aligned = op.covered.end - op.covered.bgn;
-
- assert(op.covered.bgn < op.covered.end);
-
- if (op.position.bgn < op.position.end) {
- op.verified.bgn = op.position.bgn + op.covered.bgn;
- op.verified.end = op.position.bgn + op.covered.end;
-
- if (op.verified.end > op.position.end)
- op.verified.end = op.position.end;
-
- assert(op.verified.bgn >= op.position.bgn);
- assert(op.verified.end <= op.position.end);
- } else {
- op.verified.bgn = op.position.bgn - op.covered.bgn;
- op.verified.end = op.position.bgn - op.covered.end;
-
- if (op.verified.end < op.position.end)
- op.verified.end = op.position.end;
-
- assert(op.verified.end >= op.position.end);
- assert(op.verified.bgn <= op.position.bgn);
- }
-
- if ((MIN(op.position.bgn, op.position.end) < 0) ||
- (MAX(op.position.bgn, op.position.end) > utg->getLength())) {
-#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- (dovetail) - frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d DISALLOWED\n",
- frag.ident, utg->id(), op.position.bgn, op.position.end, op.verified.bgn, op.verified.end,
- ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang, ovl.flipped,
- op.covered.bgn, op.covered.end);
-#endif
- op = overlapPlacement();
-
- } else {
-#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- (dovetail) - frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d\n",
- frag.ident, utg->id(), op.position.bgn, op.position.end, op.verified.bgn, op.verified.end,
- ovl.a_iid, ovl.b_iid, ovl.a_hang, ovl.b_hang, ovl.flipped,
- op.covered.bgn, op.covered.end);
-#endif
- }
-
- return(true);
-}
-
-
-
-
@@ -316,7 +75,6 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
frag.bhang = 0;
frag.position.bgn = 0;
frag.position.end = 0;
- frag.containment_depth = 0;
placements.clear();
@@ -331,40 +89,99 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
for (uint32 i=0; i<ovlLen; i++)
ovlPlace[i] = overlapPlacement();
- // Compute placements. Anything that doesn't get placed is left as 'nowhere', in particular, unitig == 0.
+ // Compute placements. Anything that doesn't get placed is left as 'nowhere', specifically, in unitig 0 (which doesn't exist).
for (uint32 i=0; i<ovlLen; i++) {
- int32 utgID = Unitig::fragIn(ovl[i].b_iid);
- Unitig *utg = unitigs[utgID];
+ int32 tigID = Unitig::fragIn(ovl[i].b_iid);
+ Unitig *tig = unitigs[tigID];
assert(ovl[i].a_iid == frag.ident);
- if (utgID == 0)
+ if (tigID == 0)
// Fragment not in a unitig yet -- possibly this is a contained fragment that we haven't
// placed yet, or have temporarily removed it from a unitig.
continue;
- if ((target != NULL) && (target != utg))
+ if ((target != NULL) && (target != tig))
// Requested placement in a specific unitig, and this isn't it.
continue;
- // Depending on the type of overlap (containment vs dovetail), place the fragment relative to
- // the other fragment.
+ // Place the fragment relative to the other fragment.
- if ((ovl[i].a_hang >= 0) && (ovl[i].b_hang <= 0)) {
- // A (us) contains B (the other fragment)
- if (placeAcontainsB(utg, frag, ovl[i], ovlPlace[i]) == false)
- nFragmentsNotPlaced++;
+ BestEdgeOverlap edge(ovl[i]);
+ ufNode frag;
- } else if ((ovl[i].a_hang <= 0) && (ovl[i].b_hang >= 0)) {
- // A (us) is contained in B (the other fragment)
- if (placeBcontainsA(utg, frag, ovl[i], ovlPlace[i]) == false)
- nFragmentsNotPlaced++;
+ if (tig->placeFrag(frag,
+ fid,
+ ovl[i].AEndIs3prime(),
+ &edge) == false) {
+ nFragmentsNotPlaced++;
+ continue;
+ }
+
+ uint32 olen = FI->overlapLength(ovl[i].a_iid, ovl[i].b_iid, ovl[i].a_hang, ovl[i].b_hang);
+ uint32 flen = FI->fragmentLength(ovl[i].a_iid);
+
+ ovlPlace[i].frgID = frag.ident;
+ ovlPlace[i].refID = ovl[i].b_iid;
+ ovlPlace[i].tigID = tig->id();
+ ovlPlace[i].position = frag.position;
+ ovlPlace[i].errors = olen * ovl[i].erate;
+ ovlPlace[i].covered.bgn = (ovl[i].a_hang < 0) ? 0 : ovl[i].a_hang;
+ ovlPlace[i].covered.end = (ovl[i].b_hang > 0) ? flen : ovl[i].b_hang + flen;
+ ovlPlace[i].aligned = ovlPlace[i].covered.end - ovlPlace[i].covered.bgn;
+
+ assert(ovlPlace[i].covered.bgn < ovlPlace[i].covered.end);
+
+ // Compute the portion of the unitig that is actually verified by
+ // the overlap.
+
+ if (ovlPlace[i].position.bgn < ovlPlace[i].position.end) {
+ ovlPlace[i].verified.bgn = ovlPlace[i].position.bgn + ovlPlace[i].covered.bgn;
+ ovlPlace[i].verified.end = ovlPlace[i].position.bgn + ovlPlace[i].covered.end;
+
+ if (ovlPlace[i].verified.end > ovlPlace[i].position.end)
+ ovlPlace[i].verified.end = ovlPlace[i].position.end;
+
+ assert(ovlPlace[i].verified.bgn >= ovlPlace[i].position.bgn);
+ assert(ovlPlace[i].verified.end <= ovlPlace[i].position.end);
+ assert(ovlPlace[i].verified.bgn < ovlPlace[i].verified.end);
+ } else {
+ ovlPlace[i].verified.bgn = ovlPlace[i].position.bgn - ovlPlace[i].covered.bgn; // pos.bgn is the larger and cov.bgn the smaller, so ver.bgn is the larger
+ ovlPlace[i].verified.end = ovlPlace[i].position.bgn - ovlPlace[i].covered.end; // pos.bgn is the larger and cov.bgn the larger, so ver.end the smaller
+
+ if (ovlPlace[i].verified.end < ovlPlace[i].position.end)
+ ovlPlace[i].verified.end = ovlPlace[i].position.end;
+
+ assert(ovlPlace[i].verified.end >= ovlPlace[i].position.end);
+ assert(ovlPlace[i].verified.bgn <= ovlPlace[i].position.bgn);
+ assert(ovlPlace[i].verified.end < ovlPlace[i].verified.bgn);
+ }
+
+ // Disallow any placements that exceed the boundary of the unitig. These cannot be confirmed
+ // by overlaps and might be wrong. Sample cases:
+ // o sticking a unique/repeat fragment onto a repeat (leaving the unique uncovered)
+ // o sticking a chimeric fragment onto the end of a unitig (leaving the chimeric join uncovered)
+
+ if ((MIN(ovlPlace[i].position.bgn, ovlPlace[i].position.end) < 0) ||
+ (MAX(ovlPlace[i].position.bgn, ovlPlace[i].position.end) > tig->getLength())) {
+#ifdef VERBOSE_PLACEMENT
+ //if (logFileFlagSet(LOG_PLACE_FRAG))
+ writeLog("placeFragUsingOverlaps()-- frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d DISALLOWED\n",
+ frag.ident, tig->id(), ovlPlace[i].position.bgn, ovlPlace[i].position.end, ovlPlace[i].verified.bgn, ovlPlace[i].verified.end,
+ ovl[i].a_iid, ovl[i].b_iid, ovl[i].a_hang, ovl[i].b_hang, ovl[i].flipped,
+ ovlPlace[i].covered.bgn, ovlPlace[i].covered.end);
+#endif
+ ovlPlace[i] = overlapPlacement();
} else {
- // A dovetail, use the existing placement routine
- if (placeDovetail(utg, frag, ovl[i], ovlPlace[i]) == false)
- nFragmentsNotPlaced++;
+#ifdef VERBOSE_PLACEMENT
+ //if (logFileFlagSet(LOG_PLACE_FRAG))
+ writeLog("placeFragUsingOverlaps()-- frag %d in unitig %d at %d,%d (verified %d,%d) from overlap ident %d %d hang %d %d flipped %d covered %d,%d\n",
+ frag.ident, tig->id(), ovlPlace[i].position.bgn, ovlPlace[i].position.end, ovlPlace[i].verified.bgn, ovlPlace[i].verified.end,
+ ovl[i].a_iid, ovl[i].b_iid, ovl[i].a_hang, ovl[i].b_hang, ovl[i].flipped,
+ ovlPlace[i].covered.bgn, ovlPlace[i].covered.end);
+#endif
}
assert((ovlPlace[i].position.bgn < ovlPlace[i].position.end) == (ovlPlace[i].verified.bgn < ovlPlace[i].verified.end));
@@ -376,8 +193,8 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
#ifdef VERBOSE_PLACEMENT
if (nFragmentsNotPlaced > 0)
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- WARNING: Failed to place %d fragments\n", nFragmentsNotPlaced);
+ //if (logFileFlagSet(LOG_PLACE_FRAG))
+ writeLog("placeFragUsingOverlaps()-- WARNING: Failed to place %d fragments\n", nFragmentsNotPlaced);
#endif
// Sort all the placements. Any overlap we couldn't place is automatically in Unitig 0, the
@@ -424,13 +241,17 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
end = bgn + 1;
while ((end < ovlLen) &&
(ovlPlace[bgn].tigID == ovlPlace[end].tigID) &&
- (isReverse(ovlPlace[bgn].verified) == isReverse(ovlPlace[end].verified)))
+ (ovlPlace[bgn].verified.isReverse() == ovlPlace[end].verified.isReverse()))
end++;
// Over all placements with the same unitig/orientation (that'd be from bgn to end), build
// interval lists for the begin point and the end point. Remember, this is all fragments to a
// single unitig (the whole picture above), not just the overlapping fragment sets (left or
// right blocks).
+ //
+ // This used to (before MAY-2016) use the 'verified' placement, instead of the 'full' placement.
+ // In long pacbio reads, this seems to result in far too many clusters - each placement is
+ // derived from one overlap, which will almost never cover the whole read.
intervalList<int32> bgnPoints;
intervalList<int32> endPoints;
@@ -440,19 +261,23 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
if (windowSlop < 5)
windowSlop = 5;
+#ifdef VERBOSE_PLACEMENT
+ writeLog("placeFragUsingOverlaps()-- windowSlop = %d\n", windowSlop);
+#endif
+
for (uint32 oo=bgn; oo<end; oo++) {
assert(ovlPlace[oo].tigID > 0);
- int32 b = ovlPlace[oo].verified.bgn;
- int32 be = ovlPlace[oo].verified.bgn + windowSlop;
- int32 e = ovlPlace[oo].verified.end;
- int32 ee = ovlPlace[oo].verified.end + windowSlop;
+ int32 bb = ovlPlace[oo].position.bgn;
+ int32 be = ovlPlace[oo].position.bgn + windowSlop;
+ int32 eb = ovlPlace[oo].position.end;
+ int32 ee = ovlPlace[oo].position.end + windowSlop;
- b = (b < windowSlop) ? 0 : b - windowSlop;
- e = (e < windowSlop) ? 0 : e - windowSlop;
+ bb = (bb < windowSlop) ? 0 : bb - windowSlop;
+ eb = (eb < windowSlop) ? 0 : eb - windowSlop;
- bgnPoints.add(b, be - b);
- endPoints.add(e, ee - e);
+ bgnPoints.add(bb, be - bb);
+ endPoints.add(eb, ee - eb);
}
bgnPoints.merge();
@@ -470,8 +295,8 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
int32 numEndPoints = endPoints.numberOfIntervals();
for (uint32 oo=bgn; oo<end; oo++) {
- int32 b = ovlPlace[oo].verified.bgn; // WAS expected position of read in tig!
- int32 e = ovlPlace[oo].verified.end;
+ int32 b = ovlPlace[oo].position.bgn;
+ int32 e = ovlPlace[oo].position.end;
int32 c = 0;
ovlPlace[oo].clusterID = 0;
@@ -542,7 +367,7 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
op.covered.bgn = MIN(op.covered.bgn, ovlPlace[oo].covered.bgn);
op.covered.end = MAX(op.covered.end, ovlPlace[oo].covered.end);
- if (isReverse(ovlPlace[oo].position))
+ if (ovlPlace[oo].position.isReverse())
nReverse++;
else
nForward++;
@@ -631,9 +456,9 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
if (ovlPlace[oo].position.bgn < ovlPlace[oo].position.end) {
if (ovlPlace[oo].verified.bgn >= ovlPlace[oo].verified.end)
writeLog("placeFragUsingOverlaps()-- frag %d FWD verified placement invalid (bgn,end %d,%d) for position (bgn,end %d,%d)\n",
- ovlPlace[oo].frgID,
- ovlPlace[oo].verified.bgn, ovlPlace[oo].verified.end,
- ovlPlace[oo].position.bgn, ovlPlace[oo].position.end);
+ ovlPlace[oo].frgID,
+ ovlPlace[oo].verified.bgn, ovlPlace[oo].verified.end,
+ ovlPlace[oo].position.bgn, ovlPlace[oo].position.end);
assert(ovlPlace[oo].verified.bgn < ovlPlace[oo].verified.end);
bgnMean += ovlPlace[oo].position.bgn;
@@ -645,9 +470,9 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
} else {
if (ovlPlace[oo].verified.bgn < ovlPlace[oo].verified.end)
writeLog("placeFragUsingOverlaps()-- frag %d REV verified placement invalid (bgn,end %d,%d) for position (bgn,end %d,%d)\n",
- ovlPlace[oo].frgID,
- ovlPlace[oo].verified.bgn, ovlPlace[oo].verified.end,
- ovlPlace[oo].position.bgn, ovlPlace[oo].position.end);
+ ovlPlace[oo].frgID,
+ ovlPlace[oo].verified.bgn, ovlPlace[oo].verified.end,
+ ovlPlace[oo].position.bgn, ovlPlace[oo].position.end);
assert(ovlPlace[oo].verified.bgn >= ovlPlace[oo].verified.end);
bgnMean += ovlPlace[oo].position.end;
@@ -727,35 +552,34 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
if (reject) {
#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG)) {
- writeLog("placeFragUsingOverlaps()-- frag %d in unitig %d at %d,%d (+- %.2f,%.2f) -- cov %.2f (%d,%d) errors %.2f aligned %d novl %d -- INVALID stddev weak %d overlapping %d bad size %d\n",
- op.frgID, op.tigID, op.position.bgn, op.position.end, op.bgnStdDev, op.endStdDev,
- op.fCoverage, op.covered.bgn, op.covered.end,
- op.errors,
- op.aligned,
- oe - os,
- weakStdDev, overlappingSpan, spanBad);
- for (uint32 oo=os; oo<oe; oo++) {
- if ((ovlPlace[oo].position.bgn == 0) &&
- (ovlPlace[oo].position.end == 0))
- continue;
-
- writeLog("placeFragUsingOverlaps()-- %8u,%8u\n", ovlPlace[oo].position.bgn, ovlPlace[oo].position.end);
-
- }
+ //if (logFileFlagSet(LOG_PLACE_FRAG)) {
+ writeLog("placeFragUsingOverlaps()-- frag %d in unitig %d at %d,%d (+- %.2f,%.2f) -- cov %.2f (%d,%d) errors %.2f aligned %d novl %d -- INVALID stddev weak %d overlapping %d bad size %d\n",
+ op.frgID, op.tigID, op.position.bgn, op.position.end, op.bgnStdDev, op.endStdDev,
+ op.fCoverage, op.covered.bgn, op.covered.end,
+ op.errors,
+ op.aligned,
+ oe - os,
+ weakStdDev, overlappingSpan, spanBad);
+ for (uint32 oo=os; oo<oe; oo++) {
+ if ((ovlPlace[oo].position.bgn == 0) &&
+ (ovlPlace[oo].position.end == 0))
+ continue;
+
+ writeLog("placeFragUsingOverlaps()-- %8u,%8u\n", ovlPlace[oo].position.bgn, ovlPlace[oo].position.end);
}
+ //}
#endif
} else {
placements.push_back(op);
#ifdef VERBOSE_PLACEMENT
- if (logFileFlagSet(LOG_PLACE_FRAG))
- writeLog("placeFragUsingOverlaps()-- frag %d in unitig %d at %d,%d (+- %.2f,%.2f) -- cov %.2f (%d,%d) errors %.2f aligned %d novl %d\n",
- op.frgID, op.tigID, op.position.bgn, op.position.end, op.bgnStdDev, op.endStdDev,
- op.fCoverage, op.covered.bgn, op.covered.end,
- op.errors,
- op.aligned,
- oe - os);
+ //if (logFileFlagSet(LOG_PLACE_FRAG))
+ writeLog("placeFragUsingOverlaps()-- frag %d in unitig %d at %d,%d (+- %.2f,%.2f) -- cov %.2f (%d,%d) errors %.2f aligned %d novl %d\n",
+ op.frgID, op.tigID, op.position.bgn, op.position.end, op.bgnStdDev, op.endStdDev,
+ op.fCoverage, op.covered.bgn, op.covered.end,
+ op.errors,
+ op.aligned,
+ oe - os);
#endif
}
@@ -774,68 +598,3 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
return(true);
}
-
-
-
-
-void
-placeFragInBestLocation(UnitigVector &unitigs,
- double erate,
- uint32 fid) {
-
- if (Unitig::fragIn(fid) != 0)
- // Already placed.
- return;
-
- ufNode frg;
- vector<overlapPlacement> op;
-
- frg.ident = fid;
- frg.contained = 0;
- frg.parent = 0;
- frg.ahang = 0;
- frg.bhang = 0;
- frg.position.bgn = 0;
- frg.position.end = FI->fragmentLength(fid);
- frg.containment_depth = 0;
-
- placeFragUsingOverlaps(unitigs, erate, NULL, fid, op);
-
- // Pick the lowest error placement, and of those lowest, the least aligned region.
-
- double minError = DBL_MAX;
- uint32 minAlign = UINT32_MAX;
- uint32 bp = UINT32_MAX;
-
- for (uint32 pl=0; pl<op.size(); pl++) {
- if (op[pl].fCoverage < 0.99)
- continue;
-
- double e = op[pl].errors / op[pl].aligned;
-
- if ((e < minError) ||
- ((e <= minError) && (op[pl].aligned < minAlign))) {
- minError = e;
- minAlign = op[pl].aligned;
- bp = pl;
- }
- }
-
- // No placement? New unitig!
-
- if (bp == UINT32_MAX) {
- Unitig *sing = unitigs.newUnitig(false);
- sing->addFrag(frg, 0, false);
- return;
- }
-
- // Place the frag in the unitig at the spot.
-
- Unitig *tig = unitigs[op[bp].tigID];
-
- frg.position.bgn = op[bp].position.bgn;
- frg.position.end = op[bp].position.end;
-
- tig->addFrag(frg, 0, false);
- tig->bubbleSortLastFrag();
-}
diff --git a/src/bogart/AS_BAT_PlaceFragUsingOverlaps.H b/src/bogart/AS_BAT_PlaceFragUsingOverlaps.H
index 63e86f6..3067503 100644
--- a/src/bogart/AS_BAT_PlaceFragUsingOverlaps.H
+++ b/src/bogart/AS_BAT_PlaceFragUsingOverlaps.H
@@ -39,6 +39,10 @@
#define INCLUDE_AS_BAT_PLACEFRAGUSINGOVERLAPS
#include "AS_BAT_OverlapCache.H"
+#include "AS_BAT_BestOverlapGraph.H" // For FragmentEnd
+#include "AS_BAT_Unitig.H" // For SeqInterval
+#include "AS_BAT_UnitigVector.H"
+
class overlapPlacement {
public:
@@ -105,8 +109,8 @@ bool
overlapPlacement_byLocation(const overlapPlacement &A, const overlapPlacement &B) {
if (A.tigID != B.tigID)
return(A.tigID < B.tigID);
- if (isReverse(A.position) != isReverse(B.position))
- return(isReverse(A.position) < isReverse(B.position));
+ if (A.position.isReverse() != B.position.isReverse())
+ return(A.position.isReverse() < B.position.isReverse());
return(A.position < B.position);
}
@@ -130,10 +134,5 @@ placeFragUsingOverlaps(UnitigVector &unitigs,
uint32 fid,
vector<overlapPlacement> &placements);
-void
-placeFragInBestLocation(UnitigVector &unitigs,
- double erate,
- uint32 fid);
-
#endif // INCLUDE_AS_BAT_PLACEFRAGUSINGOVERLAPS
diff --git a/src/bogart/AS_BAT_PlaceZombies.C b/src/bogart/AS_BAT_PlaceZombies.C
deleted file mode 100644
index 48444d1..0000000
--- a/src/bogart/AS_BAT_PlaceZombies.C
+++ /dev/null
@@ -1,110 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_PlaceZombies.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
- * are Copyright 2010-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-DEC-19 to 2015-APR-24
- * are Copyright 2014-2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_BestOverlapGraph.H"
-
-#include "AS_BAT_PlaceZombies.H"
-
-
-// Zombies are caused by a fragment being contained in a fragment that is eventually contained in
-// the original fragment -- circular containmnents.
-//
-// Here we detect Zombies, and reset their best container to something that is already placed.
-
-void
-placeZombies(UnitigVector &unitigs, double erate) {
-
- writeLog("==> SEARCHING FOR ZOMBIES\n");
-
- uint32 *inUnitig = new uint32 [FI->numFragments()+1];
- int numZombies = 0;
-
- // Mark fragments as dead, then unmark them if they are in a real living unitig.
-
- for (uint32 i=0; i<FI->numFragments()+1; i++)
- inUnitig[i] = noUnitig;
-
- for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *utg = unitigs[ti];
-
- if (utg == NULL)
- continue;
-
- for (uint32 fi=0; fi<utg->ufpath.size(); fi++)
- inUnitig[utg->ufpath[fi].ident] = utg->id();
- }
-
- // For anything not in a living unitig, reload the overlaps and find a new container.
- // (NOT IMPLEMENTED - for now we just move these to new singleton unitigs).
-
- for (uint32 i=0; i<FI->numFragments()+1; i++) {
- if (FI->fragmentLength(i) == 0)
- // Deleted fragment
- continue;
-
- if (inUnitig[i] != noUnitig)
- // Valid fragment in a unitig
- continue;
-
- Unitig *utg = unitigs.newUnitig(false);
- ufNode frg;
-
- frg.ident = i;
- frg.contained = 0;
- frg.parent = 0;
-
- frg.ahang = 0;
- frg.bhang = 0;
-
- frg.position.bgn = 0;
- frg.position.end = FI->fragmentLength(i);
-
- frg.containment_depth = 0;
-
- utg->addFrag(frg, 0, false);
-
- writeLog("placeZombies()-- unitig %d created from zombie fragment %d\n",
- utg->id(), i);
- numZombies++;
- }
-
- writeLog("RESURRECTED %d ZOMBIE FRAGMENT%s.\n", numZombies, (numZombies != 1) ? "s" : "");
-
- delete [] inUnitig;
-}
diff --git a/src/bogart/AS_BAT_PopBubbles.C b/src/bogart/AS_BAT_PopBubbles.C
new file mode 100644
index 0000000..17f0951
--- /dev/null
+++ b/src/bogart/AS_BAT_PopBubbles.C
@@ -0,0 +1,679 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_OverlapCache.H"
+#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
+#include "AS_BAT_PlaceFragUsingOverlaps.H"
+
+#include "AS_BAT_Instrumentation.H"
+
+#include "intervalList.H"
+
+#include <vector>
+#include <set>
+#include <map>
+
+using namespace std;
+
+#define BUBBLE_READ_FRACTION 0.5
+
+#undef SHOW_MULTIPLE_PLACEMENTS // Reports reads that are placed multiple times in a single target region
+
+class candidatePop {
+public:
+ candidatePop() {
+ };
+ candidatePop(Unitig *bubble_, Unitig *target_, uint32 bgn_, uint32 end_) {
+ bubble = bubble_;
+ target = target_;
+ bgn = bgn_;
+ end = end_;
+ };
+ ~candidatePop() {
+ };
+
+ Unitig *bubble;
+ Unitig *target;
+
+ uint32 bgn;
+ uint32 end;
+
+ vector<overlapPlacement> placed;
+};
+
+
+// A list of the target unitigs that a bubble could be popped into.
+typedef map<uint32, vector<uint32> > BubTargetList;
+
+
+
+// Decide which unitigs can be bubbles. The first pass finds unitigs that can be potential
+// bubbles. Any unitig where every dovetail read has an overlap to some other unitig is a
+// candidate for bubble popping.
+
+void
+findPotentialBubbles(UnitigVector &unitigs,
+ BubTargetList &potentialBubbles) {
+ uint32 tiLimit = unitigs.size();
+ uint32 tiNumThreads = omp_get_max_threads();
+ uint32 tiBlockSize = (tiLimit < 100000 * tiNumThreads) ? tiNumThreads : tiLimit / 99999;
+
+ writeLog("bubbleDetect()-- working on "F_U32" unitigs, with "F_U32" threads.\n", tiLimit, tiNumThreads);
+
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ Unitig *tig = unitigs[ti];
+
+ if ((tig == NULL) || // Not a tig, ignore it.
+ (tig->ufpath.size() == 1)) // Singleton, handled elsewhere.
+ continue;
+
+ uint32 nonContainedReads = 0;
+ bool validBubble = true;
+
+ map<uint32,uint32> tigOlapsTo;
+
+ uint32 fiLimit = tig->ufpath.size();
+ uint32 fiNumThreads = omp_get_max_threads();
+ uint32 fiBlockSize = (fiLimit < 100 * fiNumThreads) ? fiNumThreads : fiLimit / 99;
+
+ for (uint32 fi=0; (validBubble == true) && (fi<fiLimit); fi++) {
+ uint32 rid = tig->ufpath[fi].ident;
+
+ if (OG->isContained(rid) == true) // Don't need to check contained reads. If their container
+ continue; // passes the tests below, the contained read will too.
+
+ nonContainedReads++;
+
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(rid, AS_MAX_ERATE, ovlLen);
+
+ set<uint32> readOlapsTo;
+
+ for (uint32 oi=0; oi<ovlLen; oi++) {
+ uint32 ovlTigID = Unitig::fragIn(ovl[oi].b_iid);
+ Unitig *ovlTig = unitigs[ovlTigID];
+
+ // Skip this overlap if it is to an unplaced read, to a singleton tig, to ourself,
+ // or to a unitig that is shorter than us. We can not pop this tig as a bubble
+ // in any of those cases.
+
+ if ((ovlTigID == 0) ||
+ (ovlTig == NULL) ||
+ (ovlTig->ufpath.size() == 1) ||
+ (ovlTig->id() == tig->id()) ||
+ (ovlTig->getLength() < tig->getLength()))
+ continue;
+
+ // Otherwise, remember that we had an overlap to ovlTig.
+
+ //writeLog("tig %u read %u overlap to tig %u read %u\n",
+ // tig->id(), rid, ovlTigID, ovl[oi].b_iid);
+
+ readOlapsTo.insert(ovlTigID);
+ }
+
+ //writeLog("tig %8u read %8u has %u olaps\n", tig->id(), rid, readOlapsTo.size());
+
+ // Transfer the per-read counts to the per-unitig counts: add one to the counter for each tig
+ // that we have overlaps to.
+
+ for (set<uint32>::iterator it=readOlapsTo.begin(); it != readOlapsTo.end(); ++it)
+ tigOlapsTo[*it]++;
+
+ // Decide if we're a valid potential bubble. If tig id (in it->first) has overlaps to every
+ // read we've seen so far (nonContainedReads), we're still a valid bubble.
+ //
+ // To _attempt_ to have differences in the bubble, we'll accept it if 3/4 of the reads
+ // have overlaps.
+
+ validBubble = false;
+
+ for (map<uint32,uint32>::iterator it=tigOlapsTo.begin(); it != tigOlapsTo.end(); ++it)
+ if (it->second >= BUBBLE_READ_FRACTION * nonContainedReads)
+ validBubble = true;
+
+ // If we've not seen that many reads, pretend it's a valid bubble. It'll get screened out later.
+
+ if (nonContainedReads < 16)
+ validBubble = true;
+ }
+
+ // If not validBubble, report.
+
+#if 0
+ if (validBubble == false) {
+ writeLog("notValidBubble tig %8d expects %6u reads\n", tig->id(), nonContainedReads);
+
+ for (map<uint32,uint32>::iterator it=tigOlapsTo.begin(); it != tigOlapsTo.end(); ++it)
+ writeLog(" to tig %8u overlaps %6u\n", it->first, it->second);
+ }
+#endif
+
+ // If validBubble, then there is a tig that every dovetail read has at least one overlap to.
+ // Save those tigs in potentialBubbles.
+
+ uint32 nTigs = 0;
+
+ if (validBubble) {
+ for (map<uint32,uint32>::iterator it=tigOlapsTo.begin(); it != tigOlapsTo.end(); ++it)
+ if (it->second >= BUBBLE_READ_FRACTION * nonContainedReads)
+ nTigs++;
+ }
+
+ // ALWAYS log potential bubbles.
+
+ if (nTigs > 0) {
+ writeLog("\n");
+ writeLog("potential bubble tig %8u length %9u nReads %7u to %3u tigs:\n",
+ tig->id(), tig->getLength(), tig->ufpath.size(), nTigs);
+
+ for (map<uint32,uint32>::iterator it=tigOlapsTo.begin(); it != tigOlapsTo.end(); ++it) {
+ if (it->second >= BUBBLE_READ_FRACTION * nonContainedReads) {
+ Unitig *dest = unitigs[it->first];
+
+ writeLog(" tig %8u length %9u nReads %7u\n", dest->id(), dest->getLength(), dest->ufpath.size());
+
+ potentialBubbles[ti].push_back(dest->id());
+ }
+ }
+ }
+ }
+
+ flushLog();
+}
+
+
+
+
+// Find filtered placements for all the reads in the potential bubble tigs.
+
+vector<overlapPlacement> *
+findBubbleReadPlacements(UnitigVector &unitigs,
+ BubTargetList &potentialBubbles,
+ double deviationBubble) {
+ uint32 fiLimit = FI->numFragments();
+ uint32 fiNumThreads = omp_get_max_threads();
+ uint32 fiBlockSize = (fiLimit < 1000 * fiNumThreads) ? fiNumThreads : fiLimit / 999;
+
+ vector<overlapPlacement> *placed = new vector<overlapPlacement> [fiLimit + 1];
+
+#pragma omp parallel for schedule(dynamic, fiBlockSize)
+ for (uint32 fi=0; fi<fiLimit; fi++) {
+ uint32 rdAtigID = Unitig::fragIn(fi);
+
+ if ((rdAtigID == 0) || // Read not placed in a tig, ignore it.
+ (OG->isContained(fi)) || // Read is contained, ignore it.
+ (potentialBubbles.count(rdAtigID) == 0)) // Read isn't in a potential bubble, ignore it.
+ continue;
+
+ Unitig *rdAtig = unitigs[rdAtigID];
+ ufNode *rdA = &rdAtig->ufpath[ Unitig::pathPosition(fi) ];
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(rdA->ident, AS_MAX_ERATE, ovlLen);
+
+ set<uint32> intersections;
+
+ //if ((fi % 100) == 0)
+ // fprintf(stderr, "findBubbleReadPlacements()-- read %8u with %6u overlaps - %6.2f%% finished.\r",
+ // rdA->ident, ovlLen, 100.0 * fi / fiLimit);
+
+ // Compute all placements for this read.
+
+ vector<overlapPlacement> placements;
+
+ placeFragUsingOverlaps(unitigs, AS_MAX_ERATE, NULL, rdA->ident, placements);
+
+ // Weed out placements that aren't for bubbles, or that are for bubbles but are poor quality. Or are to ourself!
+
+ for (uint32 pi=0; pi<placements.size(); pi++) {
+ uint32 rdBtigID = placements[pi].tigID;
+ Unitig *rdBtig = unitigs[rdBtigID];
+
+ uint32 lo = (placements[pi].position.bgn < placements[pi].position.end) ? placements[pi].position.bgn : placements[pi].position.end;
+ uint32 hi = (placements[pi].position.bgn < placements[pi].position.end) ? placements[pi].position.end : placements[pi].position.bgn;
+
+ double erate = placements[pi].errors / placements[pi].aligned;
+
+ // Ignore the placement if it is to ourself.
+
+ if (rdAtigID == rdBtigID) {
+ //writeLog("tig %6u frag %8u -> tig %6u %6u reads at %8u-%8u (cov %7.5f erate %6.4f) - SAME TIG\n",
+ // rdAtigID, placements[pi].frgID, placements[pi].tigID, rdBtig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+ continue;
+ }
+
+ // Ignore the placement if it is to a non-tig / singleton read, or if it didn't place the
+ // read fully.
+
+ if ((rdBtigID == 0) ||
+ (rdBtig == NULL) ||
+ (rdBtig->ufpath.size() == 1) ||
+ (placements[pi].fCoverage < 0.99)) {
+ if (logFileFlagSet(LOG_BUBBLE_DETAIL))
+ writeLog("tig %6u frag %8u -> tig %6u %6u reads at %8u-%8u (cov %7.5f erate %6.4f) - PARTIALLY PLACED\n",
+ rdAtigID, placements[pi].frgID, placements[pi].tigID, rdBtig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+ continue;
+ }
+
+ // Ignore the placement if it isn't to one of our bubble-popping candidate unitigs.
+
+ bool dontcare = true;
+ vector<uint32> &pbubbles = potentialBubbles[rdAtigID];
+
+ for (uint32 pb=0; pb<pbubbles.size(); pb++) {
+ if (pbubbles[pb] == rdBtigID)
+ dontcare = false;
+ }
+
+ if (dontcare) {
+ if (logFileFlagSet(LOG_BUBBLE_DETAIL))
+ writeLog("tig %6u frag %8u -> tig %6u %6u reads at %8u-%8u (cov %7.5f erate %6.4f) - NOT CANDIDATE TIG\n",
+ rdAtigID, placements[pi].frgID, placements[pi].tigID, rdBtig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+ continue;
+ }
+
+ // Ignore the placement if it is too diverged from the destination tig.
+
+ if (rdBtig->overlapConsistentWithTig(deviationBubble, lo, hi, erate) < 0.5) {
+ if (logFileFlagSet(LOG_BUBBLE_DETAIL))
+ writeLog("tig %6u frag %8u -> tig %6u %6u reads at %8u-%8u (cov %7.5f erate %6.4f) - HIGH ERROR\n",
+ rdAtigID, placements[pi].frgID, placements[pi].tigID, rdBtig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+ continue;
+ }
+
+ // Good placement!
+
+ if (logFileFlagSet(LOG_BUBBLE_DETAIL))
+ writeLog("tig %6u frag %8u -> tig %6u %6u reads at %8u-%8u (cov %7.5f erate %6.4f)\n",
+ rdAtigID, placements[pi].frgID, placements[pi].tigID, rdBtig->ufpath.size(), placements[pi].position.bgn, placements[pi].position.end, placements[pi].fCoverage, erate);
+
+ placed[fi].push_back(placements[pi]);
+ }
+ }
+
+ return(placed);
+}
+
+
+
+
+
+
+// Bubble popping cannot be done in parallel -- there is a race condition when both unitigs
+// A and B are considering merging in unitig C.
+
+void
+popBubbles(UnitigVector &unitigs,
+ double deviationBubble) {
+
+ BubTargetList potentialBubbles;
+
+ findPotentialBubbles(unitigs, potentialBubbles);
+
+ writeLog("\n");
+ writeLog("Found "F_SIZE_T" potential bubbles.\n", potentialBubbles.size());
+ writeLog("\n");
+
+ vector<overlapPlacement> *placed = findBubbleReadPlacements(unitigs, potentialBubbles, deviationBubble);
+
+ // We now have, in 'placed', a list of all the places that each read could be placed. Decide if there is a _single_
+ // place for each bubble to be popped.
+
+ uint32 tiLimit = unitigs.size();
+ //uint32 tiNumThreads = omp_get_max_threads();
+ //uint32 tiBlockSize = (tiLimit < 100000 * tiNumThreads) ? tiNumThreads : tiLimit / 99999;
+
+ // Clear flags.
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ if (unitigs[ti]) {
+ unitigs[ti]->_isBubble = false;
+ unitigs[ti]->_isRepeat = false;
+ }
+ }
+
+ // In parallel, process the placements.
+
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ if (potentialBubbles.count(ti) == 0) // Not a potential bubble
+ continue;
+
+ // Scan the bubble, decide if there are _ANY_ read placements. Log appropriately.
+
+ Unitig *bubble = unitigs[ti];
+ bool hasPlacements = false;
+
+ for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
+ uint32 readID = bubble->ufpath[fi].ident;
+
+ if (placed[readID].size() > 0)
+ hasPlacements = true;
+ }
+
+ if (hasPlacements == false)
+ writeLog("potential bubble %u had no valid placements (all were not contained in target tig)\n", ti);
+ else
+ writeLog("potential bubble %u\n", ti);
+
+ // Split the placements into piles for each target and build an interval list for each target.
+ // For each read in the tig, convert the vector of placements into interval lists, one list per target tig.
+
+ map<uint32, intervalList<uint32> *> targetIntervals;
+
+ for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
+ uint32 readID = bubble->ufpath[fi].ident;
+
+ for (uint32 pp=0; pp<placed[readID].size(); pp++) {
+ uint32 tid = placed[readID][pp].tigID;
+
+ assert(placed[readID][pp].frgID > 0);
+
+ uint32 bgn = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.bgn : placed[readID][pp].position.end;
+ uint32 end = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.end : placed[readID][pp].position.bgn;
+
+ if (targetIntervals[tid] == NULL)
+ targetIntervals[tid] = new intervalList<uint32>;
+
+ //writeLog("read %u -> tig %u intervals %u-%u\n", readID, tid, bgn, end);
+
+ targetIntervals[tid]->add(bgn, end-bgn);
+ }
+ }
+
+ vector<candidatePop *> targets;
+
+ // Squish the intervals. Create new candidatePops for each interval that isn't too big or
+ // small. Assign each overlapPlacements to the correct candidatePop.
+
+ for (map<uint32, intervalList<uint32> *>::iterator it=targetIntervals.begin(); it != targetIntervals.end(); ++it) {
+ uint32 targetID = it->first;
+ intervalList<uint32> *IL = it->second;
+
+ IL->merge();
+
+ // Discard intervals that are significantly too small or large. Save the ones that are
+ // nicely sized. Logging here isn't terribly useful, it's just repeated (out of order) later
+ // when we try to make sense of the read alignments.
+
+ for (uint32 ii=0; ii<IL->numberOfIntervals(); ii++) {
+ if ((IL->hi(ii) - IL->lo(ii) < 0.75 * bubble->getLength()) || // Too small!
+ (1.25 * bubble->getLength() < IL->hi(ii) - IL->lo(ii))) { // Too big!
+ writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u - size mismatch, discarded\n",
+ bubble->id(), bubble->getLength(),
+ targetID, ii, IL->lo(ii), IL->hi(ii), IL->hi(ii) - IL->lo(ii));
+ continue;
+ }
+
+ writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u\n",
+ bubble->id(), bubble->getLength(),
+ targetID, ii, IL->lo(ii), IL->hi(ii), IL->hi(ii) - IL->lo(ii));
+
+ targets.push_back(new candidatePop(bubble, unitigs[targetID], IL->lo(ii), IL->hi(ii)));
+ }
+
+ delete IL;
+ }
+
+ targetIntervals.clear();
+
+ // If no targets, nothing to do.
+
+ if (targets.size() == 0)
+ continue;
+
+ // Run through the placements again, and assign them to the correct target.
+ //
+ // For each read:
+ // For each acceptable placement:
+ // For each target location:
+ // If the placement is for this target, save it.
+
+ for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) {
+ uint32 readID = bubble->ufpath[fi].ident;
+
+ for (uint32 pp=0; pp<placed[readID].size(); pp++) {
+ uint32 tid = placed[readID][pp].tigID;
+
+ uint32 bgn = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.bgn : placed[readID][pp].position.end;
+ uint32 end = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.end : placed[readID][pp].position.bgn;
+
+ for (uint32 tt=0; tt<targets.size(); tt++)
+ if ((targets[tt]->target->id() == tid) &&
+ (targets[tt]->bgn < end) && (bgn < targets[tt]->end))
+ targets[tt]->placed.push_back(placed[readID][pp]);
+ }
+ }
+
+ // Count the number of targets that have all the reads (later: in the correct order, etc, etc). Remove those
+ // that don't.
+
+ uint32 nTargets = 0;
+
+ set<uint32> tigReads; // Reads in the bubble tig.
+ set<uint32> tgtReads; // Reads in the bubble that have a placement in the target.
+
+ // Remove duplicate placements from each target.
+
+ for (uint32 tt=0; tt<targets.size(); tt++) {
+ candidatePop *t = targets[tt];
+
+ // Detect duplicates, keep the one with lower error. There are a lot of duplicate
+ // placements, logging isn't terribly useful.
+
+ for (uint32 aa=0; aa<t->placed.size(); aa++) {
+ for (uint32 bb=0; bb<t->placed.size(); bb++) {
+ if ((aa == bb) ||
+ (t->placed[aa].frgID != t->placed[bb].frgID) ||
+ (t->placed[aa].frgID == 0) ||
+ (t->placed[bb].frgID == 0))
+ continue;
+
+ if (t->placed[aa].errors / t->placed[aa].aligned < t->placed[bb].errors / t->placed[bb].aligned) {
+#ifdef SHOW_MULTIPLE_PLACEMENTS
+ writeLog("duplicate read alignment for tig %u read %u - better %u-%u %.4f - worse %u-%u %.4f\n",
+ t->placed[aa].tigID, t->placed[aa].frgID,
+ t->placed[aa].position.bgn, t->placed[aa].position.end, t->placed[aa].errors / t->placed[aa].aligned,
+ t->placed[bb].position.bgn, t->placed[bb].position.end, t->placed[bb].errors / t->placed[bb].aligned);
+#endif
+ t->placed[bb] = overlapPlacement();
+ } else {
+#ifdef SHOW_MULTIPLE_PLACEMENTS
+ writeLog("duplicate read alignment for tig %u read %u - better %u-%u %.4f - worse %u-%u %.4f\n",
+ t->placed[aa].tigID, t->placed[aa].frgID,
+ t->placed[bb].position.bgn, t->placed[bb].position.end, t->placed[bb].errors / t->placed[bb].aligned,
+ t->placed[aa].position.bgn, t->placed[aa].position.end, t->placed[aa].errors / t->placed[aa].aligned);
+#endif
+ t->placed[aa] = overlapPlacement();
+ }
+ }
+ }
+
+ // Get rid of any now-empty entries.
+
+ for (uint32 aa=t->placed.size(); aa--; ) {
+ if (t->placed[aa].frgID == 0) {
+ t->placed[aa] = t->placed.back();
+ t->placed.pop_back();
+ }
+ }
+ }
+
+ // Make a set of the reads in the bubble. We'll compare each target against this to decide if all reads are placed.
+
+ for (uint32 fi=0; fi<bubble->ufpath.size(); fi++)
+ tigReads.insert(bubble->ufpath[fi].ident);
+
+ uint32 nOrphan = 0; // Full coverage; bubble can be popped.
+ uint32 orphanTarget = 0;
+
+ uint32 nBubble = 0; // Partial coverage, bubble cannot be popped.
+ uint32 bubbleTarget = 0;
+
+ for (uint32 tt=0; tt<targets.size(); tt++) {
+ tgtReads.clear();
+
+ for (uint32 op=0; op<targets[tt]->placed.size(); op++) {
+ if (logFileFlagSet(LOG_BUBBLE_DETAIL))
+ writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u - read %7u at %9u-%9u\n",
+ bubble->id(), bubble->getLength(),
+ targets[tt]->target->id(), tt, targets[tt]->bgn, targets[tt]->end, targets[tt]->end - targets[tt]->bgn,
+ targets[tt]->placed[op].frgID,
+ targets[tt]->placed[op].position.bgn, targets[tt]->placed[op].position.end);
+
+ assert(targets[tt]->placed[op].frgID > 0);
+ tgtReads.insert(targets[tt]->placed[op].frgID);
+ }
+
+ // Count the number of consecutive reads from the 5' or 3' end of the bubble that are placed
+ // in the target.
+ //
+ // Also, count the number of reads in the bubble that are placed in the target. Likely the
+ // same as n5 + n3.
+
+ uint32 n5 = 0;
+ uint32 n3 = 0;
+ uint32 nt = 0;
+
+ for (uint32 fi=0; fi<bubble->ufpath.size(); fi++)
+ if (tgtReads.count(bubble->ufpath[fi].ident) > 0)
+ n5++;
+ else
+ break;
+
+ for (uint32 fi=bubble->ufpath.size(); fi-->0; )
+ if (tgtReads.count(bubble->ufpath[fi].ident) > 0)
+ n3++;
+ else
+ break;
+
+
+ for (uint32 fi=0; fi<bubble->ufpath.size(); fi++)
+ if (tgtReads.count(bubble->ufpath[fi].ident) > 0)
+ nt++;
+
+
+ // Report now, before we nuke targets[tt] for being not a bubble!
+
+ if ((nt == bubble->ufpath.size()) ||
+ ((n5 > 0) && (n3 > 0)))
+ writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u - expected %3"F_SIZE_TP" reads, had %3"F_SIZE_TP" reads. n5=%3u n3=%3u nt=%3u\n",
+ bubble->id(), bubble->getLength(),
+ targets[tt]->target->id(), tt, targets[tt]->bgn, targets[tt]->end, targets[tt]->end - targets[tt]->bgn,
+ tigReads.size(),
+ tgtReads.size(), n5, n3, nt);
+
+ // Decide if this is a bubble, orphan from construction, or repeat.
+
+ if (nt == bubble->ufpath.size()) {
+ nOrphan++;
+ orphanTarget = tt;
+ }
+
+ else if ((n5 > 0) && (n3 > 0)) {
+ nBubble++;
+ bubbleTarget = tt;
+ }
+ }
+
+ // If no placements, pbbbt.
+
+ if (nOrphan + nBubble == 0) {
+ //writeLog("tig %8u length %8u reads %6u had no bubble or orphan placements.\n", bubble->id(), bubble->getLength(), bubble->ufpath.size());
+ continue;
+ }
+
+ // If multiple orphan and/or bubble placements, it's a repeat.
+
+ if (nOrphan + nBubble > 1) {
+ writeLog("tig %8u length %8u reads %6u - repeat - %u orphan %u bubble placements.\n",
+ bubble->id(), bubble->getLength(), bubble->ufpath.size(),
+ nOrphan, nBubble);
+ writeLog("\n");
+ bubble->_isRepeat = true;
+ continue;
+ }
+
+ // If a bubble placement, mark it as a bubble so it can be skipped during repeat detection.
+
+ if (nBubble > 0) {
+ writeLog("tig %8u length %8u reads %6u - bubble\n",
+ bubble->id(), bubble->getLength(), bubble->ufpath.size());
+ writeLog("\n");
+ bubble->_isBubble = true;
+ continue;
+ }
+
+ // Otherwise, it's an orphan, move the reads to the proper place.
+
+ writeLog("tig %8u length %8u reads %6u - orphan\n", bubble->id(), bubble->getLength(), bubble->ufpath.size());
+
+ for (uint32 op=0, tt=orphanTarget; op<targets[tt]->placed.size(); op++) {
+ ufNode frg;
+
+ frg.ident = targets[tt]->placed[op].frgID;
+ frg.contained = 0;
+ frg.parent = 0;
+ frg.ahang = 0;
+ frg.bhang = 0;
+ frg.position.bgn = targets[tt]->placed[op].position.bgn;
+ frg.position.end = targets[tt]->placed[op].position.end;
+
+ writeLog("move read %u from tig %u to tig %u %u-%u\n",
+ frg.ident,
+ bubble->id(),
+ targets[tt]->target->id(), frg.position.bgn, frg.position.end);
+
+ targets[tt]->target->addFrag(frg, 0, false);
+ }
+
+ writeLog("\n");
+
+ unitigs[bubble->id()] = NULL;
+ delete bubble;
+ } // Over all bubbles
+
+ writeLog("\n"); // Needed if no bubbles are popped.
+
+ delete [] placed;
+
+ // Sort reads in all the tigs. Overkill, but correct.
+
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ Unitig *tig = unitigs[ti];
+
+ if ((tig == NULL) || // Not a tig, ignore it.
+ (tig->ufpath.size() == 1)) // Singleton, already sorted.
+ continue;
+
+ tig->sort();
+ }
+}
diff --git a/src/bogart/AS_BAT_Joining.H b/src/bogart/AS_BAT_PopBubbles.H
similarity index 58%
rename from src/bogart/AS_BAT_Joining.H
rename to src/bogart/AS_BAT_PopBubbles.H
index 917849e..8ab8db6 100644
--- a/src/bogart/AS_BAT_Joining.H
+++ b/src/bogart/AS_BAT_PopBubbles.H
@@ -13,21 +13,9 @@
* Canu branched from Celera Assembler at its revision 4587.
* Canu branched from the kmer project at its revision 1994.
*
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_Joining.H
- *
* Modifications by:
*
- * Brian P. Walenz from 2010-DEC-06 to 2013-AUG-01
- * are Copyright 2010,2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
+ * Brian P. Walenz beginning on 2016-MAR-11
* are a 'United States Government Work', and
* are released in the public domain
*
@@ -35,11 +23,16 @@
* full conditions and disclaimers for each license.
*/
-#ifndef INCLUDE_AS_BAT_JOINING
-#define INCLUDE_AS_BAT_JOINING
+#ifndef INCLUDE_AS_BAT_BUBBLEPOPPING
+#define INCLUDE_AS_BAT_BUBBLEPOPPING
+#include "AS_global.H"
+#include "AS_BAT_BestOverlapGraph.H"
#include "AS_BAT_Unitig.H"
-void joinUnitigs(UnitigVector &unitigs, bool enableJoining);
+void
+popBubbles(UnitigVector &unitigs,
+ double deviationBubble);
+
-#endif // INCLUDE_AS_BAT_JOINING
+#endif // INCLUDE_AS_BAT_BUBBLEPOPPING
diff --git a/src/bogart/AS_BAT_PopBubbles.txt b/src/bogart/AS_BAT_PopBubbles.txt
new file mode 100644
index 0000000..ac90b0e
--- /dev/null
+++ b/src/bogart/AS_BAT_PopBubbles.txt
@@ -0,0 +1,87 @@
+
+
+findPotentialBubbles()
+ - any unitig where at least half the reads an overlap to some other unitig is a candidate.
+ - returns a map of unitig id (the bubble) to to a vector of unitig ids (the potential poppers).
+
+findBubbleReadPlacements()
+ - threaded on the reads
+ - for reads in potential bubbles, uses placeFragUsingOverlaps() to find high-quality
+ alignments to unitigs that can pop the bubble.
+ - returns an array of vector<overlapPlacement> - one vector per read - of the placements
+ for this read. Placements are high quality and to popper tigs only.
+
+popBubbles()
+ - findPotentialBubbles()
+ - findBubbleReadPlacements()
+
+ - for each candidate tig:
+ - build a map of unitig id (target) to an intervalList (targetIntervals)
+ - add to the corresponding intervalList each bubble read placement, squish to intervals when done
+ - filter out intervals that are too short (0.75x) or too long (1.25x) the bubble tig size
+ - save size-consistent interavs to vector of candidatePop (targets)
+ - clear targetIntervals list (its no longer needed)
+ - for each read in the candidate tig
+ - assign placements (from findBubbleReadPlacements()) to targets. some placements have no target
+ - we now have a list of targets[] with: bubble*, target*, target bgn/end, vector of placed bubble reads in this region
+ - decide if the candidate is a bubble, a repeat or an orphan
+ - a bubble has the 5' and 3' most reads aligned, and only one target
+ - a repeat has all reads aligned, and multiple targets
+ - an orphan has all reads aligned, and one target
+
+
+----------------------------------------
+OLD-STYLE BUBBLE POPPING
+
+
+mergeSplitJoin()
+
+new intersectionList(unitigs)
+
+foreach unitig (NOT parallel)
+ skip if fewer than 15 reads or 300 bases
+ mergeBubbles() - based on previously discovered intersections
+ stealBubbles() - nothing here, not implemented
+
+for each unitig (parallel) - unitigs created here are not reprocessed
+ skip if fewer than 15 reads or 300 bases
+ markRepeats()
+ markChimera()
+
+----------------------------------------
+
+mergeBubbles(unitigs, erateBubble, targetUnitig, intersectionList)
+
+The intersection list is a 'reverse mapping of all BestEdges between unitigs'.
+For each read, a list of the incoming edges from other unitigs.
+
+foreach intersection point
+ get potential bubble unitig
+ if bubble unitig doesn't exist, it was popped already
+ if bubble unitig is more than 500k, it is skipped
+ if bubble unitig is the current unitig, it is skipped
+ findEnds(), skip if none found
+ checkEnds(), skip if bad
+ checkFrags(), skip if fails
+ bubble is merged, remove it
+
+findEnds() - return value is first/last reads
+ find the first/last non-contained read
+ get the correct edge
+ get the unitig that edge points to
+ discard the edge if the unitig it points to is the bubble (??)
+ if both unitigs are null, return false
+
+checkEnds() - computes placement of first/last reads, false if inconsistent
+ place both reads using overlaps
+ find min/max coords of suspected correct placement
+ if placedLength < bubbleLength / 2 -> return false, bubble shrank too much
+ if placedLength > bubbleLength * 2 -> return false, bubble grew too much
+ if first/last reads are the same, return true
+ check order and orientation between bubble placement and popped placement
+ bubble placed forward - reads have same orient and same order
+ bubble placed reverse - reads have diff orient and diff order
+ if so, return true
+ return false
+
+checkFrags() - based on edges, we think the bubble goes here, try to place all the reads
diff --git a/src/bogart/AS_BAT_PopulateUnitig.C b/src/bogart/AS_BAT_PopulateUnitig.C
index 204217f..4e0c505 100644
--- a/src/bogart/AS_BAT_PopulateUnitig.C
+++ b/src/bogart/AS_BAT_PopulateUnitig.C
@@ -35,9 +35,11 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
#include "AS_BAT_PopulateUnitig.H"
@@ -53,11 +55,13 @@ populateUnitig(Unitig *unitig,
// Nothing to add!
return;
- ufNode frag = unitig->ufpath.back();
+ ufNode frag = unitig->ufpath.back();
// The ID of the last fragment in the unitig, and the end we should walk off of it.
- int32 lastID = frag.ident;
- bool last3p = (frag.position.bgn < frag.position.end);
+ int32 lastID = frag.ident;
+ bool last3p = (frag.position.bgn < frag.position.end);
+
+ uint32 nAdded = 0;
// While there are fragments to add AND those fragments to add are not already in a unitig,
// construct a reverse-edge, and add the fragment.
@@ -71,23 +75,18 @@ populateUnitig(Unitig *unitig,
// innie/outtie, we need to reverse the overlap to maintain that the A fragment is forward.
if (last3p == bestnext->frag3p())
- bestprev.set(lastID, last3p, bestnext->bhang(), bestnext->ahang());
+ bestprev.set(lastID, last3p, bestnext->bhang(), bestnext->ahang(), bestnext->evalue());
else
- bestprev.set(lastID, last3p, -bestnext->ahang(), -bestnext->bhang());
-
- // Call the usual placement routine to place the next fragment relative to the last one. This
- // call depends on which end of the frag-to-be-added we are working with.
+ bestprev.set(lastID, last3p, -bestnext->ahang(), -bestnext->bhang(), bestnext->evalue());
- frag.ident = bestnext->fragId();
+ // We just made 'bestprev' pointing from read 'bestnext->fragId()' end 'bestnext->frag3p()'
+ // back to read 'lastID' end 'last3p'. Compute the placement.
- int32 bidx5 = -1, bidx3 = -1;
-
- if (unitig->placeFrag(frag, bidx5, (bestnext->frag3p() ? NULL : &bestprev),
- frag, bidx3, (bestnext->frag3p() ? &bestprev : NULL))) {
- unitig->addFrag(frag, 0, logFileFlagSet(LOG_POPULATE_UNITIG));
+ if (unitig->placeFrag(frag, bestnext->fragId(), bestnext->frag3p(), &bestprev)) {
+ unitig->addFrag(frag, 0, false);
+ nAdded++;
} else {
-
writeLog("ERROR: Failed to place frag %d into BOG path.\n", frag.ident);
assert(0);
}
@@ -99,6 +98,18 @@ populateUnitig(Unitig *unitig,
bestnext = OG->getBestEdgeOverlap(lastID, last3p);
}
+
+ if (logFileFlagSet(LOG_BUILD_UNITIG))
+ if (bestnext->fragId() == 0)
+ writeLog("Stopped adding at frag %u/%c' because no next best edge. Added %u reads.\n",
+ lastID, (last3p) ? '3' : '5',
+ nAdded);
+ else
+ writeLog("Stopped adding at frag %u/%c' beacuse next best frag %u/%c' is in unitig %u. Added %u reads.\n",
+ lastID, (last3p) ? '3' : '5',
+ bestnext->fragId(), bestnext->frag3p() ? '3' : '5',
+ Unitig::fragIn(bestnext->fragId()),
+ nAdded);
}
@@ -113,7 +124,7 @@ populateUnitig(UnitigVector &unitigs,
(OG->isContained(fi) == true)) // Skip contained
return;
- Unitig *utg = unitigs.newUnitig(logFileFlagSet(LOG_POPULATE_UNITIG));
+ Unitig *utg = unitigs.newUnitig(logFileFlagSet(LOG_BUILD_UNITIG));
// Add a first fragment -- to be 'compatable' with the old code, the first fragment is added
// reversed, we walk off of its 5' end, flip it, and add the 3' walk.
@@ -127,9 +138,8 @@ populateUnitig(UnitigVector &unitigs,
frag.bhang = 0;
frag.position.bgn = FI->fragmentLength(fi);
frag.position.end = 0;
- frag.containment_depth = 0;
- utg->addFrag(frag, 0, logFileFlagSet(LOG_POPULATE_UNITIG));
+ utg->addFrag(frag, 0, logFileFlagSet(LOG_BUILD_UNITIG));
// Add fragments as long as there is a path to follow...from the 3' end of the first fragment.
@@ -169,7 +179,7 @@ populateUnitig(UnitigVector &unitigs,
}
#endif
- if (logFileFlagSet(LOG_POPULATE_UNITIG))
+ if (logFileFlagSet(LOG_BUILD_UNITIG))
writeLog("Adding 5' edges off of frag %d in unitig %d\n",
utg->ufpath.back().ident, utg->id());
@@ -178,7 +188,7 @@ populateUnitig(UnitigVector &unitigs,
utg->reverseComplement(false);
- if (logFileFlagSet(LOG_POPULATE_UNITIG))
+ if (logFileFlagSet(LOG_BUILD_UNITIG))
writeLog("Adding 3' edges off of frag %d in unitig %d\n",
utg->ufpath.back().ident, utg->id());
diff --git a/src/bogart/AS_BAT_PromoteToSingleton.C b/src/bogart/AS_BAT_PromoteToSingleton.C
index c96010b..ec9af9a 100644
--- a/src/bogart/AS_BAT_PromoteToSingleton.C
+++ b/src/bogart/AS_BAT_PromoteToSingleton.C
@@ -35,7 +35,8 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_global.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_Unitig.H"
// If we are not reconstructing repeats, promote all the unplaced fragments to new unitigs.
@@ -64,7 +65,6 @@ promoteToSingleton(UnitigVector &unitigs) {
frag.bhang = 0;
frag.position.bgn = 0;
frag.position.end = FI->fragmentLength(fi);
- frag.containment_depth = 0;
utg->addFrag(frag, 0, false);
}
diff --git a/src/AS_UTL/stddev.C b/src/bogart/AS_BAT_PromoteToSingleton.H
similarity index 64%
rename from src/AS_UTL/stddev.C
rename to src/bogart/AS_BAT_PromoteToSingleton.H
index 2b587b7..29a2b18 100644
--- a/src/AS_UTL/stddev.C
+++ b/src/bogart/AS_BAT_PromoteToSingleton.H
@@ -15,19 +15,11 @@
*
* This file is derived from:
*
- * src/AS_TER/analyzePosMap-libraryFate.C
+ * bogart/AS_BAT_BreakRepeats.H
*
* Modifications by:
*
- * Brian P. Walenz from 2012-DEC-04 to 2013-SEP-23
- * are Copyright 2012-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2015-APR-10 to 2015-AUG-18
- * are Copyright 2015 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
+ * Brian P. Walenz beginning on 2016-APR-13
* are a 'United States Government Work', and
* are released in the public domain
*
@@ -35,7 +27,9 @@
* full conditions and disclaimers for each license.
*/
-#include "stddev.H"
+#ifndef INCLUDE_AS_BAT_PROMOTE_TO_SINGLETON
-#include <algorithm>
+void
+promoteToSingleton(UnitigVector &unitigs);
+#endif // INCLUDE_AS_BAT_PROMOTE_TO_SINGLETON
diff --git a/src/bogart/AS_BAT_ReconstructRepeats.C b/src/bogart/AS_BAT_ReconstructRepeats.C
index c6c8d46..bc010bf 100644
--- a/src/bogart/AS_BAT_ReconstructRepeats.C
+++ b/src/bogart/AS_BAT_ReconstructRepeats.C
@@ -35,14 +35,18 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
#include "AS_BAT_ChunkGraph.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
+
+#include "AS_BAT_PlaceFragUsingOverlaps.H"
#include "AS_BAT_PopulateUnitig.H"
#include "AS_BAT_PlaceContains.H"
-#include "AS_BAT_Unitig.H"
// estimate read error rate from best overlaps (per library?)
@@ -50,7 +54,8 @@
void
reconstructRepeats(UnitigVector &unitigs,
- double erateGraph) {
+ double erateGraph,
+ double deviationGraph) {
// Build a set<> of all the unplaced fragments, then construct a new BOG and CG from which we
// construct unitigs.
@@ -64,7 +69,7 @@ reconstructRepeats(UnitigVector &unitigs,
if (Unitig::fragIn(fi) == 0)
unplaced.insert(fi);
- OG = new BestOverlapGraph(erateGraph / 2.0, &unplaced);
+ OG = new BestOverlapGraph(erateGraph / 2.0, deviationGraph, &unplaced);
CG = new ChunkGraph(&unplaced);
writeLog("==> BUILDING REPEAT UNITIGS from %d fragments.\n", unplaced.size());
@@ -79,7 +84,7 @@ reconstructRepeats(UnitigVector &unitigs,
writeLog("==> BUILDING REPEAT UNITIGS placing contained fragments.\n");
- placeContainsUsingBestOverlaps(unitigs);
+ placeUnplacedUsingAllOverlaps(unitigs, "PREFIX");
delete OG;
delete CG;
diff --git a/src/bogart/AS_BAT_RepeatJunctionEvidence.H b/src/bogart/AS_BAT_RepeatJunctionEvidence.H
deleted file mode 100644
index da86fb5..0000000
--- a/src/bogart/AS_BAT_RepeatJunctionEvidence.H
+++ /dev/null
@@ -1,232 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_RepeatJunctionEvidence.H
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2012-FEB-14 to 2013-SEP-05
- * are Copyright 2012-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz from 2014-DEC-19 to 2014-DEC-22
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-class repeatJunctionEvidence {
-public:
- repeatJunctionEvidence(Unitig *target, overlapPlacement &op);
-
- bool operator<(repeatJunctionEvidence const that) const {
- return(point < that.point);
- };
-
- // Placed position in the unitig. The fragment has overlap evidence that covers
- // 'coveredbgn-coveredend' and the unaligned hangs of the fragment cover the remaining.
- // Orientation of the fragment is not tracked in this position.
-
- int32 uncovered5bgn;
- int32 uncovered5end;
-
- int32 coveredbgn;
- int32 coveredend;
-
- int32 uncovered3bgn;
- int32 uncovered3end;
-
- bool is3;
-
- int32 point;
-
- uint32 eviFrag; // Evidence fragment (orientation is meaningless)
- FragmentEnd tigFrag; // Unitig fragment
-
- uint32 numOvl3p; // Weight/score of the incoming fragment
- uint32 numOvl5p;
-
- overlapPlacement place; // Possibly unused
-};
-
-
-class repeatUniqueBreakPoint {
-public:
- repeatUniqueBreakPoint() {
- point = 0;
- breakFrag = FragmentEnd();
- rptLeft = false;
- };
- repeatUniqueBreakPoint(uint32 point_, FragmentEnd breakFrag_, bool rptLeft_) {
- point = point_;
- breakFrag = breakFrag_;
- rptLeft = rptLeft_;
- };
-
- bool operator<(repeatUniqueBreakPoint const that) const {
- return(point < that.point);
- };
-
- uint32 point; // Position in the unitig where we want to break
- FragmentEnd breakFrag; // Fragment end that position is
- bool rptLeft; // Repeat is to the left of the point
-};
-
-
-
-class repeatRegion {
-public:
- repeatRegion(uint32 bgn_, uint32 end_) {
- bgn = bgn_;
- end = end_;
- };
-
- uint32 bgn;
- uint32 end;
-
- repeatUniqueBreakPoint rujBgn;
- repeatUniqueBreakPoint rujEnd;
-};
-
-
-
-
-
-
-
-
-repeatJunctionEvidence::repeatJunctionEvidence(Unitig *target, overlapPlacement &op) {
- FragmentEnd *end3;
- FragmentEnd *end5;
-
- if (op.position.bgn < op.position.end) {
- // Fragment is placed forward.
- uncovered5bgn = op.position.bgn;
- uncovered5end = op.verified.bgn;
-
- coveredbgn = op.verified.bgn;
- coveredend = op.verified.end;
-
- uncovered3bgn = op.verified.end;
- uncovered3end = op.position.end;
-
- end5 = &op.frag5p;
- end3 = &op.frag3p;
-
- } else {
- // Fragment is placed reverse.
- uncovered5bgn = op.position.end;
- uncovered5end = op.verified.end;
-
- coveredbgn = op.verified.end;
- coveredend = op.verified.bgn;
-
- uncovered3bgn = op.verified.bgn;
- uncovered3end = op.position.bgn;
-
- end5 = &op.frag3p;
- end3 = &op.frag5p;
- }
-
- if (uncovered5bgn < 0) {
- uncovered5bgn = 0; // Ignore ends that extend past the edge of the unitig
- uncovered5end = 0;
- }
-
- if (target->getLength() <= uncovered3end) {
- uncovered3bgn = 0;
- uncovered3end = 0;
- }
-
- // Set flags indicating (true) if the unaligned portion on that end is significant enough to
- // be trusted as a real unaligned piece.
- //
- // If both are true, this looks like a short repeat in a long fragment. The only way (I can
- // think of anyway) this could occur is from an alignment to a short contained fragment.
- //
- // If both are false, the fragment has aligned fully.
-
-#define UNCOVERED_NOISE_FILTER 10
-
- bool save5 = (uncovered5bgn + UNCOVERED_NOISE_FILTER < uncovered5end);
- bool save3 = (uncovered3bgn + UNCOVERED_NOISE_FILTER < uncovered3end);
-
- eviFrag = op.frgID;
-
- numOvl3p = 0;
- numOvl5p = 0;
-
- if (save5 == save3) {
- tigFrag = FragmentEnd();
- uncovered5bgn = 0;
- uncovered5end = 0;
- uncovered3bgn = 0;
- uncovered3end = 0;
-
- return;
- }
-
- place = op;
-
- if (save5) {
- tigFrag = *end5;
- point = coveredbgn;
- is3 = false;
- uncovered3bgn = 0;
- uncovered3end = 0;
-
- } else {
- tigFrag = *end3;
- point = coveredend;
- is3 = true;
- uncovered5bgn = 0;
- uncovered5end = 0;
- }
-
- // When the fragment is placed on the unitig, it returns the first/last fragment aligning
- // to each end, and the (dovetail) end it aligns to. If the fragment aligns with a spur
- // (as here) the end computation is backwards.
- //
- // ------------ aligned fragment
- // 5--------3 unitig fragment -- placement returns 3' end
- //
- // \----------- aligned fragment with spur
- // 5--------------3 unitig fragment -- placement return 3' end also
- //
- // We reverse these ends to show the end with the break point.
- //
- tigFrag = FragmentEnd(tigFrag.fragId(), !tigFrag.frag3p());
-
-#if 0
- writeLog("markRepeats()-- tig frag %8d ovl frag %8d %6d-%6d %5.2f%% tig pos %8d/%c' (%4d) %6d-%6d (%4d) %8d/%c'\n",
- op[pl].refID, op[pl].frgID,
- op[pl].covered.bgn,
- op[pl].covered.end,
- op[pl].fCoverage * 100.0,
- end5->fragId(), (end5->frag5p() ? '5' : '3'),
- uncovered5end - uncovered5bgn,
- coveredbgn, coveredend,
- uncovered3end - uncovered3bgn,
- end3->fragId(), (end3->frag5p() ? '5' : '3'));
-#endif
-}
diff --git a/src/bogart/AS_BAT_SetParentAndHang.C b/src/bogart/AS_BAT_SetParentAndHang.C
index e0b6ce6..6f336e2 100644
--- a/src/bogart/AS_BAT_SetParentAndHang.C
+++ b/src/bogart/AS_BAT_SetParentAndHang.C
@@ -35,208 +35,148 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_OverlapCache.H"
+
#include "AS_BAT_Unitig.H"
-#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_UnitigVector.H"
#include "AS_BAT_SetParentAndHang.H"
void
setParentAndHang(UnitigVector &unitigs) {
+ return;
+
+ map<uint32,bool> forward;
+ map<uint32,bool> allreads;
+
+ // Just for stats, build a map fo the reads in the unitig.
+
+
for (uint32 ti=0; ti<unitigs.size(); ti++) {
- Unitig *utg = unitigs[ti];
+ Unitig *tig = unitigs[ti];
- if (utg == NULL)
+ if (tig == NULL)
continue;
- if (utg->ufpath.size() == 0)
+ if (tig->ufpath.size() == 0)
continue;
- // Reset parent and hangs for everything.
+ // Reset parent and hangs, build a map of the reads in the unitig.
- for (uint32 fi=1; fi<utg->ufpath.size(); fi++) {
- ufNode *frg = &utg->ufpath[fi];
+ for (uint32 fi=1; fi<tig->ufpath.size(); fi++) {
+ ufNode *frg = &tig->ufpath[fi];
- frg->parent = 0;
- frg->ahang = 0;
- frg->bhang = 0;
+ frg->parent = 0;
+ frg->ahang = 0;
+ frg->bhang = 0;
+
+ allreads[frg->ident] = true;
}
// For each fragment, set parent/hangs using the edges.
- for (uint32 fi=0; fi<utg->ufpath.size(); fi++) {
- ufNode *frg = &utg->ufpath[fi];
+ for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
+ ufNode *frg = &tig->ufpath[fi];
+
+
+
+ // Remember that we've placed this read, and if it was forward or reverse.
+ forward[frg->ident] = (frg->position.bgn < frg->position.end);
- // If we're contained, gee, I sure hope the container is here!
+ // If the first read, there is no parent possible.
+ if (ti == 0)
+ continue;
- BestContainment *bestcont = OG->getBestContainer(frg->ident);
+ // Otherwise, find the thickest overlap to any read already placed in the unitig.
- if (bestcont->isContained == true) {
- if (utg->fragIn(bestcont->container) == utg->id()) {
- int32 pi = utg->pathPosition(bestcont->container);
- ufNode *par = &utg->ufpath[pi];
+ uint32 olapsLen = 0;
+ BAToverlap *olaps = OC->getOverlaps(frg->ident, AS_MAX_EVALUE, olapsLen);
- assert(par->ident == bestcont->container);
+ uint32 tt = UINT32_MAX;
+ uint32 ttLen = 0;
+ double ttErr = DBL_MAX;
- frg->parent = par->ident;
+ int32 ah = 0;
+ int32 bh = 0;
- // The hangs assume the container is forward; adjust if not so.
- if (par->position.bgn < par->position.end) {
- frg->ahang = bestcont->a_hang;
- frg->bhang = bestcont->b_hang;
- } else {
- frg->ahang = -bestcont->b_hang;
- frg->bhang = -bestcont->a_hang;
- }
+ uint32 notPresent = 0; // Potential parent isn't in the unitig
+ uint32 notPlaced = 0; // Potential parent isn't placed yet
+ uint32 negHang = 0; // Potential parent has a negative hang to a placed read
+ uint32 goodOlap = 0;
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog("setParentAndHang()-- CONTAINED - frag %d at %d,%d edge to cont frag %d at %d,%d -- hang %d,%d\n",
- frg->ident, frg->position.bgn, frg->position.end,
- par->ident, par->position.bgn, par->position.end,
- frg->ahang, frg->bhang);
- } else {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog("setParentAndHang()-- CONTAINED - frag %d at %d,%d edge to cont frag %d IN DIFFERENT UNITIG %d\n",
- frg->ident, frg->position.bgn, frg->position.end,
- bestcont->container, utg->fragIn(bestcont->container));
- }
+ for (uint32 oo=0; oo<olapsLen; oo++) {
+ if (allreads.count(olaps[oo].b_iid) == 0) {
+ notPresent++;
continue;
}
- // Nope, not contained. If we don't have a parent set, see if one of our best overlaps
- // can set it.
-
- BestEdgeOverlap *bestedge5 = OG->getBestEdgeOverlap(frg->ident, false);
- BestEdgeOverlap *bestedge3 = OG->getBestEdgeOverlap(frg->ident, true);
-
- // Consensus is expected parent/hangs to be relative to the parent fragment. This is used
- // ONLY to place the fragment, not to orient the fragment. Orientation comes from the
- // absolute positioning coordinates.
- //
- // Interestingly, all four overlap transformations are used here.
- //
- // The inner if tests (on fragment orientation) should be asserts, but due to imprecise
- // layouts, they are sometimes violated:
- // A fragment from 271-547 had a 5'overlap to something after it;
- // the frag after was at 543-272, close enough to a tie to screw up placements
- //
-
- if (bestedge5->fragId() > 0) {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog("setParentAndHang()-- BEST5 - frag %d in unitig %d 5' to %d/%c' in unitig %d\n",
- frg->ident, utg->id(),
- bestedge5->fragId(), bestedge5->frag3p() ? '3' : '5',
- utg->fragIn(bestedge5->fragId()));
-
- if (utg->fragIn(bestedge5->fragId()) == utg->id()) {
- uint32 pi5 = utg->pathPosition(bestedge5->fragId());
- ufNode *oth = &utg->ufpath[pi5];
-
- assert(oth->ident == bestedge5->fragId());
-
- if ((pi5 < fi) && (isReverse(frg->position) == false)) {
- // Edge is to a fragment before us, off our 5' end, and we are forward.
- frg->parent = bestedge5->fragId();
- frg->ahang = -bestedge5->ahang();
- frg->bhang = -bestedge5->bhang();
- assert(frg->ahang >= 0);
-
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - -> frag %d at %d,%d 5' edge to prev frag %d at %d,%d -- hang %d,%d\n",
- frg->ident, frg->position.bgn, frg->position.end,
- oth->ident, oth->position.bgn, oth->position.end,
- frg->ahang, frg->bhang);
-
- } else if ((pi5 > fi) && (isReverse(frg->position) == true)) {
- // Edge is to a fragment after us, off our 5' end, and we are reverse.
- // Use this edge to set the other fragment parent and hang.
- // That fragment must pass the same order/orient tests.
- // Off the others 3' end, fragment must be reverse.
- // Off the others 5' end, fragment must be forward.
- if (((bestedge5->frag3p() == true) && (isReverse(oth->position) == true)) ||
- ((bestedge5->frag3p() == false) && (isReverse(oth->position) == false))) {
- oth->parent = frg->ident;
- oth->ahang = -bestedge5->bhang();
- oth->bhang = -bestedge5->ahang();
- assert(oth->ahang >= 0);
-
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - <- frag %d at %d,%d %c' edge fr prev frag %d at %d,%d -- hang %d,%d\n",
- oth->ident, oth->position.bgn, oth->position.end, bestedge5->frag3p() ? '3' : '5',
- frg->ident, frg->position.bgn, frg->position.end,
- frg->ahang, frg->bhang);
- } else {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - <- frag %d at %d,%d %c' edge fr prev frag %d at %d,%d -- NOT VALID\n",
- oth->ident, oth->position.bgn, oth->position.end, bestedge5->frag3p() ? '3' : '5',
- frg->ident, frg->position.bgn, frg->position.end);
- }
-
- } else {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - -- frag %d at %d,%d 5' edge to prev frag %d at %d,%d -- NOT VALID\n",
- frg->ident, frg->position.bgn, frg->position.end,
- oth->ident, oth->position.bgn, oth->position.end);
- }
- }
+ if (forward.count(olaps[oo].b_iid) == 0) { // Potential parent not placed yet
+ notPlaced++;
+ continue;
}
- if (bestedge3->fragId() > 0) {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog("setParentAndHang()-- BEST3 - frag %d in unitig %d 3' to %d/%c' in unitig %d\n",
- frg->ident, utg->id(),
- bestedge3->fragId(), bestedge3->frag3p() ? '3' : '5',
- utg->fragIn(bestedge5->fragId()));
-
- if (utg->fragIn(bestedge3->fragId()) == utg->id()) {
- uint32 pi3 = utg->pathPosition(bestedge3->fragId());
- ufNode *oth = &utg->ufpath[pi3];
-
- assert(oth->ident == bestedge3->fragId());
-
- // Edge is to a fragment before us, off our 3' end, and we are reverse.
- if ((pi3 < fi) && (isReverse(frg->position) == true)) {
- frg->parent = oth->ident;
- frg->ahang = bestedge3->bhang();
- frg->bhang = bestedge3->ahang();
- assert(frg->ahang >= 0);
-
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - -> frag %d at %d,%d 3' edge to prev frag %d at %d,%d -- hang %d,%d\n",
- frg->ident, frg->position.bgn, frg->position.end,
- oth->ident, oth->position.bgn, oth->position.end,
- frg->ahang, frg->bhang);
-
- } else if ((pi3 > fi) && (isReverse(frg->position) == false)) {
- if (((bestedge3->frag3p() == true) && (isReverse(oth->position) == true)) ||
- ((bestedge3->frag3p() == false) && (isReverse(oth->position) == false))) {
- oth->parent = frg->ident;
- oth->ahang = bestedge3->ahang();
- oth->bhang = bestedge3->bhang();
- assert(oth->ahang >= 0);
-
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - <- frag %d at %d,%d %c' edge fr prev frag %d at %d,%d -- hang %d,%d\n",
- oth->ident, oth->position.bgn, oth->position.end, bestedge5->frag3p() ? '3' : '5',
- frg->ident, frg->position.bgn, frg->position.end,
- frg->ahang, frg->bhang);
- } else {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - <- frag %d at %d,%d %c' edge fr prev frag %d at %d,%d -- NOT VALID\n",
- oth->ident, oth->position.bgn, oth->position.end, bestedge5->frag3p() ? '3' : '5',
- frg->ident, frg->position.bgn, frg->position.end);
- }
-
- } else {
- if (logFileFlags & LOG_SET_PARENT_AND_HANG)
- writeLog(" - -- frag %d at %d,%d 3' edge to prev frag %d at %d,%d -- NOT VALID\n",
- frg->ident, frg->position.bgn, frg->position.end,
- oth->ident, oth->position.bgn, oth->position.end);
- }
- }
+ uint32 l = FI->overlapLength(olaps[oo].a_iid, olaps[oo].b_iid, olaps[oo].a_hang, olaps[oo].b_hang);
+
+ // Compute the hangs, so we can ignore those that would place this read before the parent.
+ // This is a flaw somewhere in bogart, and should be caught and fixed earlier.
+
+ // Consensus is expecting the have the hangs for the parent read, not this read, and some
+ // fiddling is needed to flip the overlap for this:
+ // First, swap the reads so it's b-vs-a.
+ // Then, flip the overlap if the b read is in the unitig flipped.
+
+ int32 ah = (olaps[oo].flipped == false) ? (-olaps[oo].a_hang) : (olaps[oo].b_hang);
+ int32 bh = (olaps[oo].flipped == false) ? (-olaps[oo].b_hang) : (olaps[oo].a_hang);
+
+ if (forward[olaps[oo].b_iid] == false) {
+ swap(ah, bh);
+ ah = -ah;
+ bh = -bh;
}
- } // Over all fragment
+
+ // If the ahang is negative, we flubbed up somewhere, and want to place this read before
+ // the parent (even though positions say to place it after, because we sorted by position).
+
+ if (ah < 0) {
+ //fprintf(stderr, "ERROR: read %u in tig %u has negative ahang from parent read %u, ejected.\n",
+ // frg->ident, ti, olaps[oo].b_iid);
+ negHang++;
+ continue;
+ }
+
+ // The overlap is good. Count it as such.
+
+ goodOlap++;
+
+ // If the overlap is worse than the one we already have, we don't care.
+
+ if ((l < ttLen) || // Too short
+ (ttErr < olaps[oo].erate)) { // Too noisy
+ continue;
+ }
+
+ tt = oo;
+ ttLen = l;
+ ttErr = olaps[oo].erate;
+ }
+
+ // If no thickest overlap, we screwed up somewhere. Complain and eject the read.
+
+ if (tt == UINT32_MAX) {
+ fprintf(stderr, "ERROR: read %u in tig %u has no overlap to any previous read, ejected. %u overlaps total. %u negative hang. %u to read not in tig. %u to read later in tig. %u good overlaps.\n",
+ frg->ident, tig->tigID(), olapsLen, negHang, notPresent, notPlaced, goodOlap);
+ continue;
+ }
+
+ frg->parent = olaps[tt].b_iid;
+ frg->ahang = ah;
+ frg->bhang = bh;
+
+
+
+ } // Over all fragments
} // Over all unitigs
}
diff --git a/src/bogart/AS_BAT_SetParentAndHang.H b/src/bogart/AS_BAT_SetParentAndHang.H
index cd1fa14..5fb13ea 100644
--- a/src/bogart/AS_BAT_SetParentAndHang.H
+++ b/src/bogart/AS_BAT_SetParentAndHang.H
@@ -38,6 +38,8 @@
#ifndef INCLUDE_AS_BAT_SETPARENTANDHANG
#define INCLUDE_AS_BAT_SETPARENTANDHANG
+#include "AS_BAT_UnitigVector.H"
+
void setParentAndHang(UnitigVector &unitigs);
#endif // INCLUDE_AS_BAT_SETPARENTANDHANG
diff --git a/src/bogart/AS_BAT_SplitDiscontinuous.C b/src/bogart/AS_BAT_SplitDiscontinuous.C
index 7e0e24d..210ea69 100644
--- a/src/bogart/AS_BAT_SplitDiscontinuous.C
+++ b/src/bogart/AS_BAT_SplitDiscontinuous.C
@@ -35,10 +35,10 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+#include "AS_BAT_Unitig.H"
static
@@ -168,48 +168,14 @@ void splitDiscontinuousUnitigs(UnitigVector &unitigs, uint32 minOverlap) {
continue;
}
- // No thick overlap found. We need to break right here before the current fragment.
-
- // If there is exactly one fragment, and it's contained, move it to the
- // container. (This has a small positive benefit over just making every read a singleton).
- //
- if ((splitFragsLen == 1) &&
- (splitFrags[0].contained != 0)) {
- Unitig *dangler = unitigs[tig->fragIn(splitFrags[0].contained)];
-
- // If the parent isn't in a unitig, we must have shattered the repeat unitig it was in.
- // Do the same here.
-
- if (dangler == NULL) {
- if (logFileFlagSet(LOG_SPLIT_DISCONTINUOUS))
- writeLog("splitDiscontinuous()-- singleton frag "F_U32" shattered.\n",
- splitFrags[0].ident);
- Unitig::removeFrag(splitFrags[0].ident);
-
- } else {
- assert(dangler->id() == tig->fragIn(splitFrags[0].contained));
-
- if (logFileFlagSet(LOG_SPLIT_DISCONTINUOUS))
- writeLog("splitDiscontinuous()-- old tig "F_U32" with "F_SIZE_T" fragments (contained frag "F_U32" moved here).\n",
- dangler->id(), dangler->ufpath.size() + 1, splitFrags[0].ident);
-
- BestContainment *bestcont = OG->getBestContainer(splitFrags[0].ident);
+ // No thick overlap found. We need to break right here before the current fragment. We used
+ // to try to place contained reads with their container. For simplicity, we instead just
+ // make a new unitig, letting the main() decide what to do with them (e.g., bubble pop or try
+ // to place all reads in singleton unitigs as contained reads again).
- assert(bestcont->isContained == true);
-
- dangler->addContainedFrag(splitFrags[0].ident, bestcont, false);
- dangler->bubbleSortLastFrag();
-
- assert(dangler->id() == Unitig::fragIn(splitFrags[0].ident));
- }
- }
-
- // Otherwise, make an entirely new unitig for these fragments.
- else {
- numCreated++;
- makeNewUnitig(unitigs, splitFragsLen, splitFrags);
- tig = unitigs[ti];
- }
+ numCreated++;
+ makeNewUnitig(unitigs, splitFragsLen, splitFrags);
+ tig = unitigs[ti];
// Done with the split, save the current fragment. This resets everything.
diff --git a/src/bogart/AS_BAT_Unitig.C b/src/bogart/AS_BAT_Unitig.C
index 81696d7..d345120 100644
--- a/src/bogart/AS_BAT_Unitig.C
+++ b/src/bogart/AS_BAT_Unitig.C
@@ -35,8 +35,9 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_global.H"
#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
static std::map<uint32,int>* containPartialOrder;
@@ -45,53 +46,11 @@ uint32* Unitig::_inUnitig = NULL;
uint32* Unitig::_pathPosition = NULL;
-#warning WHAT REALLLY HAPPENS IF NO BACKBONE NODE, OR NO PREVIOUS BACKBONE NODE
+#undef SHOW_PROFILE_CONSTRUCTION
+#undef SHOW_PROFILE_CONSTRUCTION_DETAILS
-ufNode Unitig::getLastBackboneNode(void) {
- for (int32 fi=ufpath.size()-1; fi >= 0; fi--) {
- ufNode &node = ufpath[fi];
-
- if (node.contained)
- continue;
-
- return(node);
- }
-
- writeLog("Unitig::getLastBackboneNode()-- WARNING: unitig %d has no backbone nodes, all contained!\n", id());
- ufNode last;
- memset(&last, 0, sizeof(ufNode));
- return(last);
-}
-
-
-ufNode Unitig::getLastBackboneNode(uint32 &prevID) {
- ufNode last;
-
- memset(&last, 0, sizeof(ufNode));
-
- prevID = 0;
-
- for (int32 fi=ufpath.size()-1; (fi >= 0) && (prevID == 0); fi--) {
- ufNode *node = &ufpath[fi];
-
- if (node->contained)
- continue;
-
- if (last.ident == 0)
- // Save the last dovetail node, but keep looking....
- last = *node;
- else
- // ...for the next to last ID.
- prevID = node->ident;
- }
-
- return(last);
-}
-
-
-
-
-void Unitig::reverseComplement(bool doSort) {
+void
+Unitig::reverseComplement(bool doSort) {
// If there are contained fragments, we need to sort by position to place them correctly after
// their containers. If there are no contained fragments, sorting can break the initial unitig
@@ -127,69 +86,433 @@ void Unitig::reverseComplement(bool doSort) {
-int
-ufNodeCmp(const void *a, const void *b){
- ufNode *impa = (ufNode *)a;
- ufNode *impb = (ufNode *)b;
- int32 abgn = (impa->position.bgn < impa->position.end) ? impa->position.bgn : impa->position.end;
- int32 aend = (impa->position.bgn < impa->position.end) ? impa->position.end : impa->position.bgn;
+class epOlapDat {
+public:
+ epOlapDat(uint32 p, bool o, double e) {
+ pos = p;
+ open = o;
+ erate = e;
+ };
- int32 bbgn = (impb->position.bgn < impb->position.end) ? impb->position.bgn : impb->position.end;
- int32 frag3p = (impb->position.bgn < impb->position.end) ? impb->position.end : impb->position.bgn;
+ bool operator<(const epOlapDat &that) const { return(pos < that.pos); };
- // NEWSORT does not work. When bubbles are popped, we add non-contained fragments to
- // a unitig, but just stick them at the end of the list. NEWSORT would then maintain
- // this ordering, which is an error.
- //
-#undef NEWSORT
+ uint32 pos;
+ bool open;
+ double erate;
+};
+
+
+
+
+
+void
+Unitig::computeArrivalRate(const char *UNUSED(prefix),
+ const char *UNUSED(label),
+ vector<int32> *hist) {
+
+ sort();
+
+ for (uint32 fi=0; fi<ufpath.size(); fi++) {
+ ufNode *rdA = &ufpath[fi];
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ for (uint32 fj=1; fj<6; fj++) {
+ if (fi + fj < ufpath.size()) {
+ ufNode *rdB = &ufpath[fi+fj];
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ uint32 dist = rdBlo - rdAlo;
+
+ hist[fj].push_back(dist);
+ }
+ }
+ }
+}
+
+
+
+
+
+
+#if 1
+void
+Unitig::computeErrorProfileApproximate(const char *UNUSED(prefix), const char *UNUSED(label)) {
+}
+#endif
+
+
+
+void
+Unitig::computeErrorProfile(const char *UNUSED(prefix), const char *UNUSED(label)) {
+
+#ifdef SHOW_PROFILE_CONSTRUCTION
+ writeLog("Find error profile for tig "F_U32" of length "F_U32" with "F_SIZE_T" reads.\n",
+ id(), getLength(), ufpath.size());
+#endif
+
+ errorProfile.clear();
+ errorProfileIndex.clear();
+
+ vector<epOlapDat> olaps;
+
+
+
+ // Pick a set of reads to use. We need full coverage in overlaps.
+
+
+
+
+ // Scan overlaps to find those that we care about, and save their endpoints.
+
+ for (uint32 fi=0; fi<ufpath.size(); fi++) {
+ ufNode *rdA = &ufpath[fi];
+ bool rdAfwd = (rdA->position.bgn < rdA->position.end);
+ int32 rdAlo = (rdAfwd) ? rdA->position.bgn : rdA->position.end;
+ int32 rdAhi = (rdAfwd) ? rdA->position.end : rdA->position.bgn;
+
+ uint32 ovlLen = 0;
+ BAToverlap *ovl = OC->getOverlaps(rdA->ident, AS_MAX_ERATE, ovlLen);
+
+ uint32 nDiffTig = 0;
+ uint32 nDiffPos = 0;
+ uint32 nIsect = 0;
+
+ for (uint32 oi=0; oi<ovlLen; oi++) {
+
+ // Reads in different tigs? Don't care about this overlap.
+
+ if (id() != Unitig::fragIn(ovl[oi].b_iid)) {
+ nDiffTig++;
+ continue;
+ }
+
+ // Reads in same tig but not overlapping? Don't care about this overlap.
+
+ ufNode *rdB = &ufpath[ Unitig::pathPosition(ovl[oi].b_iid) ];
+ bool rdBfwd = (rdB->position.bgn < rdB->position.end);
+ int32 rdBlo = (rdBfwd) ? rdB->position.bgn : rdB->position.end;
+ int32 rdBhi = (rdBfwd) ? rdB->position.end : rdB->position.bgn;
+
+ if ((rdAhi < rdBlo) || (rdBhi < rdAlo)) {
+ nDiffPos++;
+#ifdef SHOW_PROFILE_CONSTRUCTION_DETAILS
+ writeLog("diffPos rdA %u=%u %u-%u rdB %u=%u %u-%u\n",
+ ovl[oi].a_iid, rdA->ident, rdAlo, rdAhi,
+ ovl[oi].b_iid, rdB->ident, rdBlo, rdBhi);
+#endif
+ continue;
+ }
+
+ // Now figure out what region is covered by the overlap.
+
+ int32 tiglo = 0;
+ int32 tighi = FI->fragmentLength(rdA->ident);
+
+ if (ovl[oi].a_hang > 0)
+ tiglo += ovl[oi].a_hang; // Postiive hang!
+
+ if (ovl[oi].b_hang < 0)
+ tighi += ovl[oi].b_hang; // Negative hang!
+
+ assert(0 <= tiglo);
+ assert(0 <= tighi);
+ assert(tiglo <= tighi);
+ assert(tiglo <= FI->fragmentLength(rdA->ident));
+ assert(tighi <= FI->fragmentLength(rdA->ident));
+
+ // Offset and adjust to tig coordinates
+
+ // Beacuse the read is placed with a lot of fudging in the positions, we need
+ // to scale the coordinates we compute here.
+ double sc = (rdAhi - rdAlo) / (double)FI->fragmentLength(rdA->ident);
+
+ uint32 bgn = (uint32)floor(rdAlo + sc * tiglo);
+ uint32 end = (uint32)floor(rdAlo + sc * tighi);
+
+ nIsect++;
+
+ olaps.push_back(epOlapDat(bgn, true, ovl[oi].erate));
+ olaps.push_back(epOlapDat(end, false, ovl[oi].erate));
+ }
+
+#ifdef SHOW_PROFILE_CONSTRUCTION_DETAILS
+ writeLog("tig %u read %u with %u overlaps - diffTig %u diffPos %u intersect %u\n",
+ id(), rdA->ident, ovlLen, nDiffTig, nDiffPos, nIsect);
+#endif
+ }
-#ifdef NEWSORT
- bool aIsCont = OG->isContained(impa->ident);
- bool bIsCont = OG->isContained(impb->ident);
+#ifdef SHOW_PROFILE_CONSTRUCTION
+ writeLog("tig %u generated "F_SIZE_T" olaps.\n", id(), olaps.size());
+#endif
+
+ // Sort.
+
+ std::sort(olaps.begin(), olaps.end());
- if ((aIsCont == false) && (bIsCont == false))
- // Both dovetail nodes, keep same order
- return((int)impa->containment_depth - (int)impb->containment_depth);
+ // Convert coordinates into intervals. Conceptually, squish out the duplicate numbers, then
+ // create an interval for every adjacent pair. We need to add intervals for the first and last
+ // region. And one more, for convenience, to hold the final 'close' values on intervals that
+ // extend to the end of the unitig.
+
+ if (olaps[0].pos != 0)
+ errorProfile.push_back(epValue(0, olaps[0].pos));
+
+ for (uint32 bb=0, ii=1; ii<olaps.size(); ii++) {
+ if (olaps[bb].pos == olaps[ii].pos)
+ continue;
+
+ errorProfile.push_back(epValue(olaps[bb].pos, olaps[ii].pos));
+
+#ifdef SHOW_PROFILE_CONSTRUCTION_DETAILS
+ writeLog("tig %u make region bb=%u ii=%i - %u %u\n", id(), bb, ii, olaps[bb].pos, olaps[ii].pos);
#endif
- if (abgn != bbgn)
- // Return negative for the one that starts first.
- return(abgn - bbgn);
+ bb = ii;
+ }
- if (aend != frag3p)
- // Return negative for the one that ends last.
- return(frag3p - aend);
+ if (olaps[olaps.size()-1].pos != getLength())
+ errorProfile.push_back(epValue(olaps[olaps.size()-1].pos, getLength()));
-#ifdef NEWSORT
- if (bIsCont == true)
- // b is contained in a, so it comes after a.
- return(-1);
+ errorProfile.push_back(epValue(getLength(), getLength()+1));
- if (aIsCont == true)
- // a is contained in b, so it comes after b.
- return(1);
+
+#ifdef SHOW_PROFILE_CONSTRUCTION
+ writeLog("tig %u generated "F_SIZE_T" profile regions.\n", id(), errorProfile.size());
#endif
- // Both contained, fallback on depth added, negative for earliest added
- return((int)impa->containment_depth - (int)impb->containment_depth);
+ // Walk both lists, adding positive erates and removing negative erates.
+
+ stdDev<double> curDev;
+
+ for (uint32 oo=0, ee=0; oo<olaps.size(); oo++) {
+ if (olaps[oo].pos != errorProfile[ee].bgn) // Move to the next profile if the pos is different.
+ ee++; // By construction, this single step should be all we need.
+
+#ifdef SHOW_PROFILE_CONSTRUCTION_DETAILS
+ writeLog("oo=%u bgn=%u -- ee=%u bgn=%u -- olaps.size "F_SIZE_T" errorProfile.size "F_SIZE_T" -- insert %d erate %f\n",
+ oo, olaps[oo].pos,
+ ee, errorProfile[ee].bgn,
+ olaps.size(), errorProfile.size(),
+ olaps[oo].open, olaps[oo].erate);
+#endif
+
+ assert(olaps[oo].pos == errorProfile[ee].bgn);
+ assert(oo < olaps.size());
+ assert(ee < errorProfile.size());
+
+ if (olaps[oo].open == true)
+ curDev.insert(olaps[oo].erate);
+ else
+ curDev.remove(olaps[oo].erate);
+
+ errorProfile[ee].dev = curDev;
+ }
+
+ // Build an index.
+ // bi - base we are indexing.
+ // pi - profile
+ //
+ for (uint32 bi=0, pi=0; bi<getLength(); bi += 1000) {
+ while ((pi < errorProfile.size()) && (errorProfile[pi].end <= bi))
+ pi++;
+
+ if (pi < errorProfile.size()) {
+ assert(errorProfile[pi].bgn <= bi);
+ assert(bi < errorProfile[pi].end);
+
+ errorProfileIndex.push_back(pi);
+ }
+ }
+
+ // Finalize the values.
+
+ for (uint32 bi=0; bi<errorProfile.size(); bi++)
+ errorProfile[bi].dev.finalize();
+
+ //writeLog("tig %u generated "F_SIZE_T" profile regions with "F_U64" overlap pieces.\n",
+ // id(), errorProfile.size(), nPieces);
}
-void
-Unitig::sort(void) {
+// For the range bgn..end, returns the amount of sequence (as a fraction)
+// that has an estimated max overlap error rate above the 'erate' threshold.
+//
+// For bgn..end the range of an overlap with some 'erate', then a low
+// return value would indicate that the average overlap error rate in this
+// region is lower than the supplied 'erate' - that this overlap is too noisy
+// to be placed here. Likewise, if the return value is 1.0, then the
+// overlap 'erate' is within the same range as the other overlaps in the tig.
+//
+double
+Unitig::overlapConsistentWithTig(double deviations,
+ uint32 bgn, uint32 end,
+ double erate) {
+ int32 nBelow = 0;
+ int32 nAbove = 0;
+
+ assert(bgn < end);
+ assert(bgn < getLength());
+ assert(end <= getLength());
+
+ // Coarse search to find the first index that is after our region.
+
+#undef BINARY_SEARCH
+
+#ifdef BINARY_SEARCH
-#ifdef NEWSORT
- for (int fi=0; fi<ufpath.size(); fi++) {
- ufNode *f = &(ufpath[fi]);
+ uint32 min = 0;
+ uint32 max = errorProfile.size();
+ uint32 pb = min + (max - min) / 2;
- if (OG->isContained(f->ident) == false)
- f->containment_depth = fi;
+ while ((bgn < errorProfile[pb].bgn) ||
+ (errorProfile[pb].end <= bgn)) {
+
+ if (bgn < errorProfile[pb].bgn)
+ max = pb;
+
+ if (errorProfile[pb].end <= bgn)
+ min = pb;
+
+ assert(min < max);
+
+ pb = min + (max - min) / 2;
+ }
+
+#else
+
+ uint32 pbi = bgn / 1000;
+
+ if (errorProfileIndex.size() <= pbi)
+ fprintf(stderr, "errorProfileIndex.size() = "F_SIZE_T"\n", errorProfileIndex.size());
+ assert(pbi < errorProfileIndex.size());
+
+ while ((0 < pbi) && (errorProfile[errorProfileIndex[pbi]].bgn > bgn)) {
+ fprintf(stderr, "BAD ESTIMATE for bgn=%u end=%u\n", bgn, end);
+ pbi--;
}
+
+ while ((pbi < errorProfileIndex.size()) && (errorProfile[errorProfileIndex[pbi]].end <= bgn))
+ pbi++;
+
+ if (pbi == errorProfileIndex.size()) {
+ //fprintf(stderr, "Fell off loop for bgn=%u end=%u last ep bgn=%u end=%u\n",
+ // bgn, end, errorProfile.back().bgn, errorProfile.back().end);
+ pbi--;
+ }
+
+ // The region pb points to will contain bgn.
+
+ uint32 pb = errorProfileIndex[pbi];
+
+ //fprintf(stderr, "For bgn=%u end=%u - stopped at pbi=%u errorProfile[%u] = %u-%u (1)\n",
+ // bgn, end, pbi, pb, errorProfile[pb].bgn, errorProfile[pb].end);
+
+ // Fine tune search to find the exact first region.
+
+ while ((0 < pb) && (bgn < errorProfile[pb].bgn))
+ pb--;
+ while ((pb < errorProfile.size()) && (errorProfile[pb].end <= bgn))
+ pb++;
+
#endif
- qsort( &(ufpath.front()), getNumFrags(), sizeof(ufNode), &ufNodeCmp );
+ if ((errorProfile[pb].bgn > bgn) ||
+ (bgn >= errorProfile[pb].end))
+ fprintf(stderr, "For bgn=%u end=%u - stopped at errorProfile[%u] = %u-%u BOOM\n",
+ bgn, end, pb, errorProfile[pb].bgn, errorProfile[pb].end);
+ assert(errorProfile[pb].bgn <= bgn);
+ assert(bgn < errorProfile[pb].end);
+
+ // Sum the number of bases above the supplied erate.
+
+ uint32 pe = pb;
+
+ while ((pe < errorProfile.size()) && (errorProfile[pe].bgn < end)) {
+ if (erate <= errorProfile[pe].max(deviations))
+ nAbove += errorProfile[pe].end - errorProfile[pe].bgn;
+ else
+ nBelow += errorProfile[pe].end - errorProfile[pe].bgn;
+
+ pe++;
+ }
+
+ // Adjust for the bits we overcounted in the first and last regions.
+
+ if (pe > 0) // Argh. If this read is fully in the first region (where there
+ pe--; // is only 1x coverage) then pe==0.
+
+
+ uint32 bb = bgn - errorProfile[pb].bgn;
+ uint32 be = errorProfile[pe].end - end;
+
+ assert(bgn >= errorProfile[pb].bgn);
+ assert(errorProfile[pe].end >= end);
+
+ if (erate <= errorProfile[pb].max(deviations))
+ nAbove -= bb;
+ else
+ nBelow -= bb;
+
+ if (erate <= errorProfile[pe].max(deviations))
+ nAbove -= be;
+ else
+ nBelow -= be;
- for (uint32 fi=0; fi<ufpath.size(); fi++)
- _pathPosition[ufpath[fi].ident] = fi;
+ assert(nAbove >= 0);
+ assert(nBelow >= 0);
+
+ return((double)nAbove / (nBelow + nAbove));
+}
+
+
+
+
+
+
+void
+Unitig::reportErrorProfile(const char *prefix, const char *label) {
+ char N[FILENAME_MAX];
+ FILE *F;
+
+ sprintf(N, "%s.%s.%08u.profile", prefix, label, id());
+
+ F = fopen(N, "w");
+
+ if (F) {
+ for (uint32 ii=0; ii<errorProfile.size(); ii++)
+ fprintf(F, "%u %u %f +- %f (%u overlaps)\n",
+ errorProfile[ii].bgn, errorProfile[ii].end,
+ errorProfile[ii].dev.mean(), errorProfile[ii].dev.stddev(),
+ errorProfile[ii].dev.size());
+ fclose(F);
+ }
+
+ // Reporting the index isn't generally useful, only for debugging.
+
+#if 0
+ sprintf(N, "%s.%s.%08u.profile.index", prefix, label, id());
+
+ F = fopen(N, "w");
+
+ if (F) {
+ for (uint32 ii=0; ii<errorProfileIndex.size(); ii++) {
+ uint32 xx = errorProfileIndex[ii];
+
+ fprintf(F, "index[%u] = %u -- errorProfile[] = %u-%u %.6f +- %.6f (%u values)\n",
+ ii,
+ xx,
+ errorProfile[xx].bgn,
+ errorProfile[xx].end,
+ errorProfile[xx].dev.mean(),
+ errorProfile[xx].dev.stddev(),
+ errorProfile[xx].dev.size());
+ }
+ fclose(F);
+ }
+#endif
}
diff --git a/src/bogart/AS_BAT_Unitig.H b/src/bogart/AS_BAT_Unitig.H
index 9b246ae..432ced4 100644
--- a/src/bogart/AS_BAT_Unitig.H
+++ b/src/bogart/AS_BAT_Unitig.H
@@ -38,7 +38,68 @@
#ifndef INCLUDE_AS_BAT_UNITIG
#define INCLUDE_AS_BAT_UNITIG
-#include "AS_BAT_Datatypes.H"
+#include "AS_global.H"
+#include "AS_BAT_UnitigVector.H"
+
+#include "stddev.H"
+
+#include <vector>
+#include <algorithm>
+
+class BestEdgeOverlap;
+
+
+
+class SeqInterval {
+public:
+ SeqInterval() {
+ bgn = 0;
+ end = 0;
+ };
+ ~SeqInterval() {
+ };
+
+
+ int32 min(void) const { return(::min(bgn, end)); };
+ int32 max(void) const { return(::max(bgn, end)); };
+
+ bool isForward(void) const { return(bgn < end); };
+ bool isReverse(void) const { return(bgn > end); };
+
+ bool operator==(SeqInterval const that) const {
+ return(((bgn == that.bgn) && (end == that.end)) ||
+ ((bgn == that.end) && (end == that.bgn)));
+ };
+
+ bool operator!=(SeqInterval const that) const {
+ return(((bgn != that.bgn) || (end != that.end)) &&
+ ((bgn != that.end) || (end != that.bgn)));
+ };
+
+ bool operator<(SeqInterval const that) const {
+ return(min() < that.min());
+#if 0
+ if (isReverse()) {
+ if (b.isReverse()) return end < that.end;
+ else return end < that.bgn;
+ } else {
+ if (b.isReverse()) return bgn < that.end;
+ else return bgn < that.bgn;
+ }
+#endif
+ };
+
+
+public:
+ int32 bgn; // MUST be signed! Read placement needs to set coordinates to negative
+ int32 end; // coordinates to indicate the read extends off the start of the tig.
+};
+
+
+
+
+
+
// Derived from IntMultiPos, but removes some of the data (48b in IntMultiPos, 32b in struct
// ufNode). The minimum size (bit fields, assuming maximum limits, not using the contained
@@ -47,7 +108,8 @@
//
// ufNode is, of course, 'unitig fragment node'.
//
-struct ufNode {
+class ufNode {
+public:
uint32 ident;
uint32 contained;
uint32 parent; // IID of the fragment we align to
@@ -57,17 +119,37 @@ struct ufNode {
SeqInterval position;
- uint32 containment_depth;
+ bool isForward(void) const { return(position.isForward()); };
+ bool isReverse(void) const { return(position.isReverse()); };
+
+ bool operator<(ufNode const &that) const {
+ int32 abgn = (position.bgn < position.end) ? position.bgn : position.end;
+ int32 aend = (position.bgn < position.end) ? position.end : position.bgn;
+
+ int32 bbgn = (that.position.bgn < that.position.end) ? that.position.bgn : that.position.end;
+ int32 bend = (that.position.bgn < that.position.end) ? that.position.end : that.position.bgn;
+
+ if (abgn < bbgn) return(true); // A starts before B!
+ if (abgn > bbgn) return(false); // B starts before A!
+
+ if (aend < bend) return(false); // A contained in B, not less than.
+ if (aend > bend) return(true); // B contained in A, is less than.
+
+ return(false); // Equality, not less than.
+ };
};
-struct Unitig {
+
+
+class Unitig {
private:
Unitig() {
_length = 0;
_id = 0;
_tigID = 0;
+
_isUnassembled = false;
_isBubble = false;
_isRepeat = false;
@@ -80,7 +162,13 @@ public:
friend class UnitigVector;
- void sort(void);
+ void sort(void) {
+ std::sort(ufpath.begin(), ufpath.end());
+
+ for (uint32 fi=0; fi<ufpath.size(); fi++)
+ _pathPosition[ufpath[fi].ident] = fi;
+ };
+ //void bubbleSortLastFrag(void);
void reverseComplement(bool doSort=true);
// getNumRandomFrags() is a placeholder, random frags should not
@@ -94,24 +182,79 @@ public:
uint32 getNumFrags(void) { return(ufpath.size()); };
uint32 getNumRandomFrags(void) { return(getNumFrags()); };
- ufNode getLastBackboneNode(void);
- ufNode getLastBackboneNode(uint32 &);
+ // Place 'frag' using an edge to some read in this tig. The edge is from 'frag3p' end.
+ //
+ bool placeFrag(ufNode &frag, // resulting placement
+ uint32 fragId, // read we want to place
+ bool frag3p, // end that the edge is from
+ BestEdgeOverlap *edge); // edge to something in this tig
+
+ void addFrag(ufNode node, int offset=0, bool report=false);
- void placeFrag_computePlacement(ufNode &frag,
- int32 &bidx,
- BestEdgeOverlap *bestedge,
- bool bestIs3);
- bool placeFrag(ufNode &place5, int32 &fidx5, BestEdgeOverlap *bestedge5,
- ufNode &place3, int32 &fidx3, BestEdgeOverlap *bestedge3);
+public:
+ class epValue {
+ public:
+ epValue(uint32 b, uint32 e) {
+ bgn = b;
+ end = e;
+ };
- bool placeFrag(ufNode &frag, BestContainment *bestcont);
+ double max(double deviations) {
+ return(dev.mean() + deviations * dev.stddev());
+ };
- void addFrag(ufNode node, int offset=0, bool report=false);
- bool addContainedFrag(int32 fid, BestContainment *bestcont, bool report=false);
- bool addAndPlaceFrag(int32 fid, BestEdgeOverlap *bestedge5, BestEdgeOverlap *bestedge3, bool report=false);
+ bool operator<(const epValue &that) const { return(bgn < that.bgn); };
+ bool operator<(const uint32 &that) const { return(bgn < that); };
+
+
+
+ uint32 bgn;
+ uint32 end;
- void bubbleSortLastFrag(void);
+ stdDev<double> dev;
+ };
+
+ static size_t epValueSize(void) { return(sizeof(epValue)); };
+
+ void computeArrivalRate(const char *prefix,
+ const char *label,
+ vector<int32> *hist);
+
+ void computeErrorProfileApproximate(const char *prefix, const char *label);
+ void computeErrorProfile(const char *prefix, const char *label);
+ void reportErrorProfile(const char *prefix, const char *label);
+ void clearErrorProfile(void) { errorProfile.clear(); };
+
+ double overlapConsistentWithTig(double deviations,
+ uint32 bgn, uint32 end,
+ double erate);
+
+
+ // Returns the read that is touching the start of the tig.
+ ufNode *firstRead(void) {
+ ufNode *rd5 = &ufpath.front();
+
+ for (uint32 fi=1; (fi < ufpath.size()) && (rd5->position.min() != 0); fi++)
+ rd5 = &ufpath[fi];
+
+ assert(rd5->position.min() == 0);
+
+ return(rd5);
+ };
+
+
+ // Returns the read that is touching the end of the tig.
+ ufNode *lastRead(void) {
+ ufNode *rd3 = &ufpath.back();
+
+ for (uint32 fi=ufpath.size()-1; (fi-- > 0) && (rd3->position.max() != getLength()); )
+ rd3 = &ufpath[fi];
+
+ assert(rd3->position.max() == getLength());
+
+ return(rd3);
+ };
static void removeFrag(int32 fid) {
@@ -143,12 +286,14 @@ public:
// Public Member Variables
vector<ufNode> ufpath;
+ vector<epValue> errorProfile;
+ vector<uint32> errorProfileIndex;
private:
- int32 _length;
- uint32 _id;
+ int32 _length;
+ uint32 _id;
public:
- uint32 _tigID;
+ uint32 _tigID;
private:
static uint32 *_inUnitig; // Maps a fragment iid to a unitig id.
@@ -162,93 +307,14 @@ public:
uint32 _isBubble; // Annotation: from a failed bubble pop
uint32 _isRepeat; // Annotation: from an identified repeat region
uint32 _isCircular; // Annotation: has overlap to self
-};
-
-
-
-class UnitigVector {
-public:
- UnitigVector() {
- _blockSize = 1048576;
- _numBlocks = 1;
- _maxBlocks = 1024;
- _blocks = new Unitig ** [_maxBlocks];
- _blocks[0] = new Unitig * [_blockSize];
- _blocks[0][0] = NULL; // No first unitig.
- _blockNext = 1;
- _totalUnitigs = 1;
- };
- ~UnitigVector() {
- };
-
- Unitig *newUnitig(bool verbose) {
- Unitig *u = new Unitig();
-
-#pragma omp critical
- {
- u->_id = _totalUnitigs++;
-
- if (verbose)
- writeLog("Creating Unitig %d\n", u->_id);
-
- if (_blockNext >= _blockSize) {
- assert(_numBlocks < _maxBlocks);
-
- _blocks[_numBlocks] = new Unitig * [_blockSize];
-
- memset(_blocks[_numBlocks], 0, sizeof(Unitig **) * _blockSize);
-
- _numBlocks++;
- _blockNext = 0;
- }
-
- _blocks[_numBlocks-1][_blockNext++] = u;
-
- // The rest are just sanity checks.
-
- assert((u->id() / _blockSize) == (_numBlocks - 1));
- assert((u->id() % _blockSize) == (_blockNext - 1));
-
- assert(operator[](u->id()) == u);
- }
-
- return(u);
- };
-
- size_t size(void) {
- return(_totalUnitigs);
- };
-
- Unitig *&operator[](uint32 i) {
- uint32 idx = i / _blockSize;
- uint32 pos = i % _blockSize;
-
-#ifdef CHECK_UNITIG_ARRAY_INDEXING
- if (((i >= _totalUnitigs)) ||
- ((idx >= _numBlocks)) ||
- (((pos >= _blockNext) && (idx >= _numBlocks - 1)))) {
- fprintf(stderr, "UnitigVector::operator[]()-- i="F_U32" with totalUnitigs="F_U64"\n", i, _totalUnitigs);
- fprintf(stderr, "UnitigVector::operator[]()-- blockSize="F_U64"\n", _blockSize);
- fprintf(stderr, "UnitigVector::operator[]()-- idx="F_U32" numBlocks="F_U64"\n", idx, _numBlocks);
- fprintf(stderr, "UnitigVector::operator[]()-- pos="F_U32" blockNext="F_U64"\n", pos, _blockNext);
- }
- assert(i < _totalUnitigs);
- assert((idx < _numBlocks));
- assert((pos < _blockNext) || (idx < _numBlocks - 1));
-#endif
-
- return(_blocks[idx][pos]);
- };
-
-private:
- uint64 _blockSize;
-
- uint64 _numBlocks;
- uint64 _maxBlocks;
- Unitig ***_blocks;
- uint64 _blockNext;
- uint64 _totalUnitigs;
+ char type(void) {
+ if (_isUnassembled) return('U');
+ if (_isBubble) return('B');
+ if (_isRepeat) return('R');
+ if (_isCircular) return('C');
+ return('N');
+ }
};
diff --git a/src/bogart/AS_BAT_UnitigVector.C b/src/bogart/AS_BAT_UnitigVector.C
new file mode 100644
index 0000000..1fc99d6
--- /dev/null
+++ b/src/bogart/AS_BAT_UnitigVector.C
@@ -0,0 +1,218 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-APR-06
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
+#include "AS_BAT_UnitigVector.H"
+
+
+
+UnitigVector::UnitigVector() {
+ _blockSize = 1048576;
+ _numBlocks = 1;
+ _maxBlocks = 1024;
+ _blocks = new Unitig ** [_maxBlocks];
+ _blocks[0] = new Unitig * [_blockSize];
+ _blocks[0][0] = NULL; // No first unitig.
+ _blockNext = 1;
+ _totalUnitigs = 1;
+};
+
+
+
+UnitigVector::~UnitigVector() {
+
+ // Delete the unitigs.
+ for (uint32 ii=0; ii<_numBlocks; ii++)
+ for (uint32 jj=0; jj<_blockSize; jj++)
+ delete _blocks[ii][jj];
+
+ // Delete the blocks.
+ for (uint32 ii=0; ii<_numBlocks; ii++)
+ delete [] _blocks[ii];
+
+ // And the block pointers.
+ delete [] _blocks;
+};
+
+
+
+Unitig *
+UnitigVector::newUnitig(bool verbose) {
+ Unitig *u = new Unitig();
+
+#pragma omp critical
+ {
+ u->_id = _totalUnitigs++;
+
+ if (verbose)
+ writeLog("Creating Unitig %d\n", u->_id);
+
+ if (_blockNext >= _blockSize) {
+ assert(_numBlocks < _maxBlocks);
+
+ _blocks[_numBlocks] = new Unitig * [_blockSize];
+
+ memset(_blocks[_numBlocks], 0, sizeof(Unitig **) * _blockSize);
+
+ _numBlocks++;
+ _blockNext = 0;
+ }
+
+ _blocks[_numBlocks-1][_blockNext++] = u;
+
+ // The rest are just sanity checks.
+
+ assert((u->id() / _blockSize) == (_numBlocks - 1));
+ assert((u->id() % _blockSize) == (_blockNext - 1));
+
+ assert(operator[](u->id()) == u);
+ }
+
+ return(u);
+};
+
+
+
+void
+UnitigVector::deleteUnitig(uint32 i) {
+ delete _blocks[i / _blockSize][i % _blockSize];
+ _blocks[i / _blockSize][i % _blockSize] = NULL;
+}
+
+
+
+#ifdef CHECK_UNITIG_ARRAY_INDEXING
+Unitig *&operator[](uint32 i) {
+ uint32 idx = i / _blockSize;
+ uint32 pos = i % _blockSize;
+
+ if (((i >= _totalUnitigs)) ||
+ ((idx >= _numBlocks)) ||
+ (((pos >= _blockNext) && (idx >= _numBlocks - 1)))) {
+ fprintf(stderr, "UnitigVector::operator[]()-- i="F_U32" with totalUnitigs="F_U64"\n", i, _totalUnitigs);
+ fprintf(stderr, "UnitigVector::operator[]()-- blockSize="F_U64"\n", _blockSize);
+ fprintf(stderr, "UnitigVector::operator[]()-- idx="F_U32" numBlocks="F_U64"\n", idx, _numBlocks);
+ fprintf(stderr, "UnitigVector::operator[]()-- pos="F_U32" blockNext="F_U64"\n", pos, _blockNext);
+ }
+ assert(i < _totalUnitigs);
+ assert((idx < _numBlocks));
+ assert((pos < _blockNext) || (idx < _numBlocks - 1));
+
+ return(_blocks[idx][pos]);
+};
+#endif
+
+
+
+
+
+
+
+void
+UnitigVector::computeArrivalRate(const char *prefix, const char *label) {
+ uint32 tiLimit = size();
+ uint32 numThreads = omp_get_max_threads();
+ uint32 blockSize = (tiLimit < 100000 * numThreads) ? numThreads : tiLimit / 99999;
+
+ fprintf(stderr, "Computing arrival rates for %u unitigs using %u threads.\n", tiLimit, numThreads);
+
+ vector<int32> hist[6];
+
+ //#pragma omp parallel for schedule(dynamic, blockSize)
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ Unitig *tig = operator[](ti);
+
+ if (tig == NULL)
+ continue;
+
+ if (tig->ufpath.size() == 1)
+ continue;
+
+ tig->computeArrivalRate(prefix, label, hist);
+ }
+
+ for (uint32 ii=1; ii<6; ii++) {
+ char N[FILENAME_MAX];
+
+ sprintf(N, "%s.arrivalRate.%u.dat", prefix, ii);
+ FILE *F = fopen(N, "w");
+ for (uint32 jj=0; jj<hist[ii].size(); jj++)
+ fprintf(F, "%d\n", hist[ii][jj]);
+ fclose(F);
+ }
+}
+
+
+
+
+
+
+void
+UnitigVector::computeErrorProfiles(const char *prefix, const char *label) {
+ uint32 tiLimit = size();
+ uint32 numThreads = omp_get_max_threads();
+ uint32 blockSize = (tiLimit < 100000 * numThreads) ? numThreads : tiLimit / 99999;
+
+ fprintf(stderr, "Computing error profiles for %u unitigs using %u threads.\n", tiLimit, numThreads);
+
+ //#pragma omp parallel for schedule(dynamic, blockSize)
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ Unitig *tig = operator[](ti);
+
+ if (tig == NULL)
+ continue;
+
+ if (tig->ufpath.size() == 1)
+ continue;
+
+ tig->computeErrorProfile(prefix, label);
+ }
+
+ fprintf(stderr, "Computing error profiles - FINISHED.\n");
+}
+
+
+
+void
+UnitigVector::reportErrorProfiles(const char *prefix, const char *label) {
+ uint32 tiLimit = size();
+ uint32 numThreads = omp_get_max_threads();
+ uint32 blockSize = (tiLimit < 100000 * numThreads) ? numThreads : tiLimit / 99999;
+
+ for (uint32 ti=0; ti<tiLimit; ti++) {
+ Unitig *tig = operator[](ti);
+
+ if (tig == NULL)
+ continue;
+
+ if (tig->ufpath.size() == 1)
+ continue;
+
+ tig->reportErrorProfile(prefix, label);
+ }
+}
+
diff --git a/src/bogart/AS_BAT_UnitigVector.H b/src/bogart/AS_BAT_UnitigVector.H
new file mode 100644
index 0000000..5f12276
--- /dev/null
+++ b/src/bogart/AS_BAT_UnitigVector.H
@@ -0,0 +1,61 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-APR-06
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#ifndef INCLUDE_AS_BAT_UNITIG_VECTOR
+#define INCLUDE_AS_BAT_UNITIG_VECTOR
+
+#include "AS_global.H"
+
+class Unitig;
+
+class UnitigVector {
+public:
+ UnitigVector();
+ ~UnitigVector();
+
+ Unitig *newUnitig(bool verbose);
+ void deleteUnitig(uint32 i);
+
+ size_t size(void) { return(_totalUnitigs); };
+ Unitig *&operator[](uint32 i) { return(_blocks[i / _blockSize][i % _blockSize]); };
+
+ void computeArrivalRate(const char *prefix, const char *label);
+
+ void computeErrorProfiles(const char *prefix, const char *label);
+ void reportErrorProfiles(const char *prefix, const char *label);
+
+private:
+ uint64 _blockSize;
+
+ uint64 _numBlocks;
+ uint64 _maxBlocks;
+ Unitig ***_blocks;
+ uint64 _blockNext;
+
+ uint64 _totalUnitigs;
+};
+
+
+#endif // INCLUDE_AS_BAT_UNITIG_VECTOR
diff --git a/src/bogart/AS_BAT_Unitig_AddAndPlaceFrag.C b/src/bogart/AS_BAT_Unitig_AddAndPlaceFrag.C
deleted file mode 100644
index afba69b..0000000
--- a/src/bogart/AS_BAT_Unitig_AddAndPlaceFrag.C
+++ /dev/null
@@ -1,144 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_Unitig_AddAndPlaceFrag.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2010-NOV-23 to 2013-AUG-01
- * are Copyright 2010,2012-2013 J. Craig Venter Institute, and
- * are subject to the GNU General Public License version 2
- *
- * Brian P. Walenz on 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * Brian P. Walenz beginning on 2016-JAN-11
- * are a 'United States Government Work', and
- * are released in the public domain
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_BestOverlapGraph.H"
-
-
-
-// Given two edges, place fragment node.ident into this unitig using the thickest edge to decide on
-// the placement. At least one of the edges must be from the node to a fragment in the target
-// unitig.
-//
-// Returns true if placement was successful.
-//
-bool
-Unitig::addAndPlaceFrag(int32 fid, BestEdgeOverlap *bestedge5, BestEdgeOverlap *bestedge3, bool report) {
- int32 bidx5 = -1, bidx3 = -1;
- int32 blen5 = 0, blen3 = 0;
- ufNode frag;
-
- frag.ident = fid;
- frag.contained = 0;
- frag.parent = 0;
- frag.ahang = 0;
- frag.bhang = 0;
- frag.position.bgn = 0;
- frag.position.end = 0;
- frag.containment_depth = 0;
-
- // The length of the overlap depends only on the length of the a frag and the hangs. We don't
- // actually care about the real length (except for logging), only which is thicker.
-
- if ((bestedge5) && (bestedge5->fragId() == 0))
- bestedge5 = NULL;
-
- if ((bestedge3) && (bestedge3->fragId() == 0))
- bestedge3 = NULL;
-
- if ((bestedge5) && (fragIn(bestedge5->fragId()) == id())) {
- bidx5 = pathPosition(bestedge5->fragId());
- blen5 = FI->fragmentLength(fid) + ((bestedge5->ahang() < 0) ? bestedge5->bhang() : -bestedge5->ahang());
-#ifdef DEBUG_PLACEMENT
- writeLog("addAndPlaceFrag()-- bestedge5: %d,%d,%d,%d len %d\n",
- bestedge5->fragId(), bestedge5->frag3p, bestedge5->ahang(), bestedge5->bhang(), blen5);
-#endif
- assert(bestedge5->fragId() == ufpath[bidx5].ident);
- }
-
- if ((bestedge3) && (fragIn(bestedge3->fragId()) == id())) {
- bidx3 = pathPosition(bestedge3->fragId());;
- blen3 = FI->fragmentLength(fid) + ((bestedge3->ahang() < 0) ? bestedge3->bhang() : -bestedge3->ahang());
-#ifdef DEBUG_PLACEMENT
- writeLog("addAndPlaceFrag()-- bestedge3: %d,%d,%d,%d len %d\n",
- bestedge3->fragId(), bestedge3->frag3p, bestedge3->ahang(), bestedge3->bhang(), blen3);
-#endif
- assert(bestedge3->fragId() == ufpath[bidx3].ident);
- }
-
- // Use the longest that exists -- an alternative would be to take the average position, but that
- // could get messy if the placements are different. Picking one or the other has a better chance
- // of working, though it'll fail if the fragment is chimeric or spans something it shouldn't,
- // etc.
-
- if ((blen5 == 0) && (blen3 == 0)) {
- writeLog("Unitig::addAndPlaceFrag()-- WARNING: Failed to place frag %d into unitig %d; no edges to the unitig.\n",
- fid, id());
- return(false);
- }
-
- if (blen5 < blen3)
- bestedge5 = NULL;
- else
- bestedge3 = NULL;
-
- // Compute the placement -- a little scary, as we stuff both placements into the same frag, but
- // we guarantee only one placement is computed.
-
- if (placeFrag(frag, bidx5, bestedge5,
- frag, bidx3, bestedge3) == false)
- return(false);
-
- // If we just computed a placement before the start of the unitig, we need to shift the unitig to
- // make space.
-
- int32 frgBgn = MIN(frag.position.bgn, frag.position.end);
-
- if (frgBgn < 0) {
- frgBgn = -frgBgn;
-
- frag.position.bgn += frgBgn;
- frag.position.end += frgBgn;
-
- _length += frgBgn;
-
- for (uint32 fi=0; fi<ufpath.size(); fi++) {
- ufNode *tfrg = &ufpath[fi];
-
- tfrg->position.bgn += frgBgn;
- tfrg->position.end += frgBgn;
- }
- }
-
- // Finally, add the fragment.
-
- addFrag(frag, 0, report);
-
- return(true);
-}
-
diff --git a/src/bogart/AS_BAT_Unitig_AddFrag.C b/src/bogart/AS_BAT_Unitig_AddFrag.C
index 49e9258..4fc0cc6 100644
--- a/src/bogart/AS_BAT_Unitig_AddFrag.C
+++ b/src/bogart/AS_BAT_Unitig_AddFrag.C
@@ -35,9 +35,11 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
@@ -86,32 +88,9 @@ Unitig::addFrag(ufNode node, int offset, bool report) {
}
-// This will add a contained fragment to a unitig, adjusting the position as needed. It is only
-// needed when moving a contained read from unitig A to unitig B. It is NOT needed when rebuilding
-// a unitig.
-//
-bool
-Unitig::addContainedFrag(int32 fid, BestContainment *bestcont, bool report) {
- ufNode frag;
-
- assert(bestcont->isContained);
-
- frag.ident = fid;
-
- if (placeFrag(frag, bestcont) == false) {
- writeLog("addContainedFrag()-- Failed to place contained frag %d using bestcont %d (hang %d,%d same orient %d).\n",
- fid, bestcont->container, bestcont->a_hang, bestcont->b_hang, bestcont->sameOrientation);
- return(false);
- }
-
- addFrag(frag, 0, report);
-
- return(true);
-}
-
-
// Percolate the last fragment to the correct spot in the list.
+#if 0
void
Unitig::bubbleSortLastFrag(void) {
uint32 previd = ufpath.size() - 2;
@@ -135,3 +114,4 @@ Unitig::bubbleSortLastFrag(void) {
if (lastid < ufpath.size() - 1)
ufpath[lastid] = last;
}
+#endif
diff --git a/src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C b/src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C
index e8ca47a..c08643b 100644
--- a/src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C
+++ b/src/bogart/AS_BAT_Unitig_PlaceFragUsingEdges.C
@@ -35,476 +35,241 @@
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
-#include "AS_BAT_Unitig.H"
+#include "AS_BAT_FragmentInfo.H"
#include "AS_BAT_BestOverlapGraph.H"
+#include "AS_BAT_Logging.H"
-// This provides low level (and usually too much) detail on placing a read using an edge.
-#undef DEBUG_PLACE_FRAG
+#include "AS_BAT_Unitig.H"
+#undef DEBUG_PLACE_FRAG
-void
-Unitig::placeFrag_computePlacement(ufNode &frag,
- int32 &bidx,
- BestEdgeOverlap *bestedge,
- bool bestIs3) {
- ufNode *parent = &ufpath[bidx];
- assert(parent->ident == bestedge->fragId());
+ufNode
+placeFrag_contained(uint32 fragId,
+ ufNode &parent,
+ BestEdgeOverlap *edge) {
- // Scale the hangs based on the placement of the parent read. This isn't perfect; we should really only
- // scale the hang that is into the parent read (either positive A or negative B) and let the other
- // hang be based on the scaling for this read -- but we don't know the scaling for this read.
+ bool pFwd = (parent.position.bgn < parent.position.end) ? true : false;
+ int32 pMin = (parent.position.bgn < parent.position.end) ? parent.position.bgn : parent.position.end;
+ int32 pMax = (parent.position.bgn < parent.position.end) ? parent.position.end : parent.position.bgn;
- uint32 parentPlacedLen = (parent->position.bgn < parent->position.end) ? (parent->position.end - parent->position.bgn) : (parent->position.bgn - parent->position.end);
- uint32 parentRealLen = FI->fragmentLength(parent->ident);
+ assert(pMin < pMax);
- double intraScale = (double)parentPlacedLen / parentRealLen; // Within the parent read overlap
- double interScale = 1.0; // Outside the parent read overlap
+ // Reverse the overlap. frag3p here means the overlap is flipped.
+ int32 ahang = (edge->frag3p() == false) ? -edge->ahang() : edge->bhang();
+ int32 bhang = (edge->frag3p() == false) ? -edge->bhang() : edge->ahang();
- // Overlap is stored using 'node' as the A frag, and we negate the hangs to make them relative
- // to the 'parent'. (This is opposite from how containment edges are saved.) A special case
- // exists when we overlap to the 5' end of the other fragment; we need to flip the overlap to
- // ensure the (new) A frag is forward.
+ // Depending on the parent orientation...
+ //
+ // pMin pMax pMin pMax
+ // ----------------> <----------------
+ // ahang ----- bhang bhang ----- ahang
+ // > 0 < 0 < 0 > 0
- int32 ahang = -bestedge->ahang();
- int32 bhang = -bestedge->bhang();
+ int32 fMin = (pFwd == true) ? pMin + ahang : pMin - bhang;
+ int32 fMax = (pFwd == true) ? pMax + bhang : pMax - ahang;
- if (bestedge->frag3p() == bestIs3) {
- ahang = bestedge->bhang();
- bhang = bestedge->ahang();
- }
+ //int32 fMin = pMin + ((frag3p == false) ? -edge->ahang() : edge->bhang()); // * intraScale
+ //int32 fMax = pMax + ((frag3p == false) ? -edge->bhang() : edge->ahang()); // * interScale
- int32 bgnhang = 0;
- int32 endhang = 0;
+ assert(fMin < fMax);
- int32 pbgn, pend;
- int32 fbgn, fend;
+ // We don't know the true length of the overlap, and our hang-based math tends to shrink reads.
+ // Reset the end coordinate using the actual length of the read.
- bool adjustBgn = false;
+ fMax = fMin + FI->fragmentLength(fragId);
- // Place the new fragment using the overlap. We don't worry about the orientation of the new
- // fragment, only the location. Orientation of the parent fragment matters (1) to know which
- // coordinate is the lower, and (2) to decide if the overlap needs to be flipped (again).
+ // Orientation is straightforward, based on the orient of the parent, and the flipped flag.
- if (parent->position.bgn < parent->position.end) {
- pbgn = parent->position.bgn;
- pend = parent->position.end;
+ bool fFwd = (((pFwd == true) && (edge->frag3p() == false)) || // parent is fwd, olap is not flipped
+ ((pFwd == false) && (edge->frag3p() == true))); // parent is rev, olap is flipped
- bgnhang = ahang;
- endhang = bhang;
+ ufNode frag;
- } else {
- pbgn = parent->position.end;
- pend = parent->position.bgn;
+ frag.ident = fragId;
+ frag.contained = 0;
+ frag.parent = edge->fragId(); // == parent->ident
+ frag.ahang = 0; // Not used in bogart, set on output
+ frag.bhang = 0; // Not used in bogart, set on output
+ frag.position.bgn = (fFwd) ? fMin : fMax;
+ frag.position.end = (fFwd) ? fMax : fMin;
- bgnhang = -bhang;
- endhang = -ahang;
- }
+#ifdef DEBUG_PLACE_FRAG
+ writeLog("placeCont()-- parent %7d pos %7d,%7d -- edge to %7d %c' hangs %7d %7d -- frag %7d C' -- placed %7d-%7d oriented %s %7d-%7d\n",
+ parent.ident, parent.position.bgn, parent.position.end,
+ edge->fragId(), (edge->frag3p()) ? '3' : '5', edge->ahang(), edge->bhang(),
+ fragId,
+ fMin, fMax, (fFwd) ? "rev" : "fwd", frag.position.bgn, frag.position.end);
+#endif
- assert((bgnhang >=0) == (endhang >= 0));
+ return(frag);
+}
- if (bgnhang > 0) {
- fbgn = pbgn + bgnhang * intraScale; // hang is moving low to the right, inside the parent
- fend = pend + endhang * interScale; // hang is moving high to the right, outside the parent
- } else {
- fbgn = pbgn + bgnhang * interScale; // hang is moving low to the left, outside the parent
- fend = pend + endhang * intraScale; // hang is moving high to the left, inside the parent
- }
- // Since we don't know the true length of the overlap, if we use just the hangs to place a
- // fragment, we typically shrink fragments well below their actual length. In one case, we
- // shrank a container enough that the containee was placed in the unitig backwards.
- //
- // We now revert back to placing the end based on the actual length, but will
- // adjust to maintain a dovetail relationship.
- //
- // See comments on other instances of this warning.
-#warning not knowing the overlap length really hurts.
-#if 1
+ufNode
+placeFrag_dovetail(uint32 fragId,
+ bool frag3p,
+ ufNode &parent,
+ BestEdgeOverlap *edge) {
- // If true, we've moved fend outside the parent range, so that can be adjusted.
- // If false, the begin point can be adjusted.
+ // We have an 'edge' from 'fragId' end 'frag3p' back to 'parent'.
+ // Use that to compute the placement of 'frag'.
- if (bgnhang > 0)
- fend = fbgn + FI->fragmentLength(frag.ident);
- else
- fbgn = fend - FI->fragmentLength(frag.ident);
+ bool pFwd = (parent.position.bgn < parent.position.end) ? true : false;
+ int32 pMin = (parent.position.bgn < parent.position.end) ? parent.position.bgn : parent.position.end;
+ int32 pMax = (parent.position.bgn < parent.position.end) ? parent.position.end : parent.position.bgn;
+
+ assert(pMin < pMax);
+
+ // Scale the hangs based on the placed versus actual length of the parent read.
-#else
- // This was an attempt to adjust position to better capture the length of the read. It bombed
- // because it violates hang restrictions - for example, when building the initial unitig, the
- // path is from positive ahang overlaps, but this change can result in negative hang
- // postioning.
+ //double intraScale = (double)(pMax - pMin) / FI->fragmentLength(parent.ident); // Within the parent read overlap
+ //double interScale = 1.0; // Outside the parent read overlap
+
+ // We're given an edge from the read-to-place back to the parent. Reverse the edge so it points
+ // from the parent to the read-to-place.
+ //
+ // The canonical edge is from a forward parent to the child.
+ //
+ // -P----\--> +b
+ // +a ---v--------C-
//
- // This typically fails with
- // AS_BAT_Unitig_AddFrag.C:68:
- // void Unitig::addFrag(ufNode, int, bool):
- // Assertion `node.position.end >= 0' failed
+ // To reverse the edge:
//
- int fpos = (fbgn + fend) / 2;
+ // If child is forward, swapping the order of the reads results in a canonical overlap. The
+ // hangs become negative.
+ //
+ // -P----\--> +b ----> -a ---/--------C>
+ // +a ---v--------C> ----> -P----v--> -b
+ //
+ // If child is reverse, swapping the order of the reads results in a backwards canonical
+ // overlap, and we need to flip end-to-end also. The hangs are swapped.
+ //
+ // -P----\--> +b ----> -C--------\--> +a
+ // +a <--v--------C- ----> +b <--v----P-
+ //
+ int32 ahang = (frag3p == false) ? -edge->ahang() : edge->bhang();
+ int32 bhang = (frag3p == false) ? -edge->bhang() : edge->ahang();
- fbgn = fpos - FI->fragmentLength(frag.ident) / 2;
- fend = fpos + FI->fragmentLength(frag.ident) / 2;
-#endif
+ // The read is placed 'to the right' of the parent if
+ // pFwd == true and edge points to 3' end
+ // pFwd == false and edge points to 5' end
+ //
+ bool toRight = (pFwd == edge->frag3p());
- // Make sure that we didn't just make a contained fragment out of a dovetail. There are two
- // cases here, either the fragment is before or after the parent. We'll compare fbgn to the
- // parent position. We could use orientations and ends, but this is easier.
+ // If placing 'to the right', we add hangs. Else, subtract the swapped hangs.
- if (fbgn < pbgn) {
- if (fend >= pend) {
- fend = pend - 1;
- }
+ int32 fMin = 0;
+ int32 fMax = 0;
+ if (toRight) {
+ fMin = pMin + ahang;
+ fMax = pMax + bhang;
} else {
- if (fend <= pend) {
- fend = pend + 1;
- }
+ fMin = pMin - bhang;
+ fMax = pMax - ahang;
}
- if (pbgn >= pend)
- writeLog("placeFrag()-- ERROR: %c' parent placement inconsistent iid=%d %d,%d\n",
- (bestIs3) ? '3' : '5', parent->ident, parent->position.bgn, parent->position.end);
- if (fbgn >= fend)
- writeLog("placeFrag()-- ERROR: %c' placement inconsistent parent=%d %d,%d hang %d,%d this %d %d,%d\n",
- (bestIs3) ? '3' : '5', parent->ident, parent->position.bgn, parent->position.end,
- ahang, bhang,
- frag.ident, fbgn, fend);
+ //int32 fMin = pMin + ((frag3p == false) ? -edge->ahang() : edge->bhang()); // * intraScale
+ //int32 fMax = pMax + ((frag3p == false) ? -edge->bhang() : edge->ahang()); // * interScale
- //if ((pbgn >= pend) || (fbgn >= fend))
- // return(false);
+ assert(fMin < fMax);
- assert(pbgn < pend);
- assert(fbgn < fend);
+ // We don't know the true length of the overlap, and our hang-based math tends to shrink reads.
+ // Reset the end coordinate using the actual length of the read.
+ fMax = fMin + FI->fragmentLength(fragId);
- // The new frag is reverse if:
- // the old frag is forward and we hit its 5' end, or
- // the old frag is reverse and we hit its 3' end.
+
+ // Orientation is a bit more complicated, with eight cases (drawing pictures helps).
+ //
+ // edge from frag3p=true to forward parent 3p -> reverse
+ // edge from frag3p=false to reverse parent 3p -> reverse
+ // edge from frag3p=false to forward parent 5p -> reverse
+ // edge from frag3p=true to reverse parent 5p -> reverse
//
- // The new frag is forward if:
- // the old frag is forward and we hit its 3' end, or
- // the old frag is reverse and we hit its 5' end.
+ // edge from frag3p=true to reverse parent 3p -> forward
+ // edge from frag3p=false to forward parent 3p -> forward
+ // edge from frag3p=false to reverse parent 5p -> forward
+ // edge from frag3p=true to forward parent 5p -> forward
//
- bool flip = (((parent->position.bgn < parent->position.end) && (bestedge->frag3p() == bestIs3)) ||
- ((parent->position.end < parent->position.bgn) && (bestedge->frag3p() != bestIs3)));
+ bool fFwd = (((frag3p == true) && (pFwd == true) && (edge->frag3p() == true)) ||
+ ((frag3p == false) && (pFwd == false) && (edge->frag3p() == true)) ||
+ ((frag3p == false) && (pFwd == true) && (edge->frag3p() == false)) ||
+ ((frag3p == true) && (pFwd == false) && (edge->frag3p() == false))) ? false : true;
+
+ ufNode frag;
+
+ frag.ident = fragId;
+ frag.contained = 0;
+ frag.parent = edge->fragId(); // == parent->ident
+ frag.ahang = 0; // Not used in bogart, set on output
+ frag.bhang = 0; // Not used in bogart, set on output
+ frag.position.bgn = (fFwd) ? fMin : fMax;
+ frag.position.end = (fFwd) ? fMax : fMin;
#ifdef DEBUG_PLACE_FRAG
- writeLog("placeFrag()-- bestedge: parent iid %d pos %d,%d b_iid %d ovl %d,%d,%d pos %d,%d flip %d\n",
- parent->ident, parent->position.bgn, parent->position.end,
- bestedge->fragId(), bestedge->frag3p(), bestedge->ahang(), bestedge->bhang(), fbgn, fend, flip);
+ writeLog("placeDove()-- parent %7d pos %7d,%7d -- edge to %7d %c' hangs %7d %7d -- frag %7d %c' -- placed %7d-%7d oriented %s %7d-%7d\n",
+ parent.ident, parent.position.bgn, parent.position.end,
+ edge->fragId(), (edge->frag3p()) ? '3' : '5', edge->ahang(), edge->bhang(),
+ fragId, (frag3p) ? '3' : '5',
+ fMin, fMax, (fFwd) ? "rev" : "fwd", frag.position.bgn, frag.position.end);
#endif
- frag.contained = 0;
- frag.parent = bestedge->fragId();
- frag.ahang = ahang;
- frag.bhang = bhang;
- frag.position.bgn = (flip) ? fend : fbgn;
- frag.position.end = (flip) ? fbgn : fend;
+ return(frag);
}
-// Given an implicit fragment -- a ufNode with only the 'ident' set -- and at least one best edge
-// to a fragment in this unitig, compute the position of the fragment in this unitig. If both
-// edges are given, both will independently compute a placement, which might disagree. It is up to
-// the client to figure out what to do in this case.
-//
-// If a placement is not found for an edge, the corresponding bidx value is set to -1. Otherwise,
-// it is set to the position in the fragment list of the fragment in this unitig (from above).
-//
-// Returns true if any placement is found, false otherwise.
+// Place a read into this tig using an edge from the read to some read in this tig.
//
bool
-Unitig::placeFrag(ufNode &frag5, int32 &bidx5, BestEdgeOverlap *bestedge5,
- ufNode &frag3, int32 &bidx3, BestEdgeOverlap *bestedge3) {
-
- bidx5 = -1;
- bidx3 = -1;
-
- assert(frag5.ident > 0);
- assert(frag5.ident <= FI->numFragments());
-
- frag5.contained = 0;
- frag5.parent = 0;
- frag5.ahang = 0;
- frag5.bhang = 0;
- frag5.position.bgn = 0;
- frag5.position.end = 0;
- frag5.containment_depth = 0;
-
- assert(frag3.ident > 0);
- assert(frag3.ident <= FI->numFragments());
-
- frag3.contained = 0;
- frag3.parent = 0;
- frag3.ahang = 0;
- frag3.bhang = 0;
- frag3.position.bgn = 0;
- frag3.position.end = 0;
- frag3.containment_depth = 0;
-
- if ((bestedge5) && (bestedge5->fragId() == 0))
- bestedge5 = NULL;
-
- if ((bestedge3) && (bestedge3->fragId() == 0))
- bestedge3 = NULL;
-
- // If we have an incoming edge, AND the fragment for that edge is in this unitig, look up its
- // index. Otherwise, discard the edge to prevent placement.
-
- if ((bestedge5) && (fragIn(bestedge5->fragId()) == id())) {
- bidx5 = pathPosition(bestedge5->fragId());
- assert(bestedge5->fragId() == ufpath[bidx5].ident);
- } else {
- bestedge5 = NULL;
- bidx5 = -1;
- }
-
- if ((bestedge3) && (fragIn(bestedge3->fragId()) == id())) {
- bidx3 = pathPosition(bestedge3->fragId());;
- assert(bestedge3->fragId() == ufpath[bidx3].ident);
- } else {
- bestedge3 = NULL;
- bidx3 = -1;
- }
+Unitig::placeFrag(ufNode &frag, // output placement
+ uint32 fragId, // id of read we want to place
+ bool frag3p, // end of read 'edge' is from, meaningless if contained
+ BestEdgeOverlap *edge) { // edge to read in this tig
- // Now, just compute the placement based on edges that exist.
-
- if ((bestedge5) && (bidx5 != -1))
- placeFrag_computePlacement(frag5, bidx5, bestedge5, false);
-
- if ((bestedge3) && (bidx3 != -1))
- placeFrag_computePlacement(frag3, bidx3, bestedge3, true);
-
- // Return success if we computed.
-
- return((bidx5 != -1) || (bidx3 != -1));
-}
-
-
-
-bool
-Unitig::placeFrag(ufNode &frag, BestContainment *bestcont) {
+ assert(fragId > 0);
+ assert(fragId <= FI->numFragments());
+ frag.ident = fragId;
frag.contained = 0;
frag.parent = 0;
frag.ahang = 0;
frag.bhang = 0;
frag.position.bgn = 0;
frag.position.end = 0;
- frag.containment_depth = 0;
-
- ufNode *parent = &ufpath[pathPosition(bestcont->container)];
-#if 0
- // This block is useful for debugging (maybe). It is usually triggered only during popBubbles(),
- // when we try to place a contained fragment into a fragment that has not been moved into the new
- // unitig yet. It might be useful if pathPosition ever gets messed up.
- //
- if ((parent == NULL) || (parent->ident != bestcont->container)) {
- ufNode *found = parent;
-
- for (int fi=0; fi<ufpath.size(); fi++)
- if (ufpath[fi].ident == bestcont->container)
- parent = &ufpath[fi];
-
- if (parent) {
- writeLog("placeFrag()-- WARNING: Didn't find the correct parent frag (%d) for contained frag %d -- pathPosition screwed up.\n",
- bestcont->container, frag.ident);
- writeLog("placeFrag()-- Found frag %d instead.\n", (parent == NULL) ? -1 : parent->ident);
-
- for (int fi=0; fi<ufpath.size(); fi++) {
- ufNode *ix = &ufpath[fi];
-
- writeLog("placeFrag()-- path[%4d,%4d] is frag %d %s\n",
- fi, pathPosition(ix->ident),
- ix->ident,
- (ix->ident == bestcont->container) ? " CORRECT PARENT!" : "");
- }
- }
- }
-#endif
-
- if ((parent == NULL) || (parent->ident != bestcont->container)) {
-#ifdef DEBUG_PLACE_FRAG
- writeLog("placeFrag()-- WARNING: Failed to place frag %d into unitig %d; parent not here.\n",
- frag.ident, id());
-#endif
+ if (edge == NULL)
+ // No best edge? Hard to place without one.
return(false);
- }
-
- // Adjust orientation.
- //
- // NOTE! The hangs are from the (parent) container to the (child)
- // containee. This is opposite as to how dovetail edges are stored.
-
- // isContained
- // If true, this is a true BestContainment; this fragment is contained in bestcont->container.
- //
- // If false, the containment relationship is inverted; this fragment contains
- // bestcont->contaier. This is used when placing fragments by overlaps,
- // and cannot be added to a unitig directly.
-
- if (bestcont->isContained) {
- assert(bestcont->a_hang >= 0);
- assert(bestcont->b_hang <= 0);
- } else {
- assert(bestcont->a_hang <= 0);
- assert(bestcont->b_hang >= 0);
- }
-
- // Scale the hangs to the current placed read length. int32 will overflow for reads > 15 bits, so we use a double.
-
- if (parent->position.bgn < parent->position.end) {
- // Container is forward.
- frag.contained = bestcont->container;
- frag.parent = bestcont->container;
- frag.ahang = bestcont->a_hang;
- frag.bhang = bestcont->b_hang;
-
- double scale = (double)(parent->position.end - parent->position.bgn) / FI->fragmentLength(parent->ident);
-
- if ((scale < 0.75) ||
- (1.25 < scale))
- writeLog("placeFrag()-- extreme scaling FWD %d at %d,%d (%d) len %d by %f\n",
- parent->ident, parent->position.bgn, parent->position.end, parent->position.end - parent->position.bgn, FI->fragmentLength(parent->ident), scale);
-
- if (bestcont->sameOrientation) {
- // ...and so is containee.
- frag.position.bgn = parent->position.bgn + frag.ahang * scale;
- frag.position.end = parent->position.end + frag.bhang * scale;
- } else {
- // ...but containee is reverse.
- frag.position.bgn = parent->position.end + frag.bhang * scale;
- frag.position.end = parent->position.bgn + frag.ahang * scale;
- }
-
- } else {
- // Container is reverse.
- frag.contained = bestcont->container;
- frag.parent = bestcont->container;
- frag.ahang = -bestcont->b_hang;
- frag.bhang = -bestcont->a_hang;
-
- double scale = (double)(parent->position.bgn - parent->position.end) / FI->fragmentLength(parent->ident);
-
- if ((scale < 0.75) ||
- (1.25 < scale))
- writeLog("placeFrag()-- extreme scaling REV %d at %d,%d (%d) len %d by %f\n",
- parent->ident, parent->position.end, parent->position.bgn, parent->position.bgn - parent->position.end, FI->fragmentLength(parent->ident), scale);
-
- if (bestcont->sameOrientation) {
- // ...and so is containee.
- frag.position.bgn = parent->position.bgn + frag.bhang * scale;
- frag.position.end = parent->position.end + frag.ahang * scale;
- } else {
- // ...but containee is forward.
- frag.position.bgn = parent->position.end + frag.ahang * scale;
- frag.position.end = parent->position.bgn + frag.bhang * scale;
- }
- }
-
-
- if (bestcont->isContained == false)
- // If we're a false containment overlap, skip the adjustment below.
- return(true);
-
-
-#ifdef DEBUG_PLACE_FRAG
- writeLog("placeFrag()-- contained frag %u at %d,%d -- hangs %d,%d\n",
- frag.ident, frag.position.bgn, frag.position.end, frag.ahang, frag.bhang);
-#endif
-
- // Reset the position. Try to accomodate the hangs and full read length.
- // Note that this CAN break containment relationships.
- int32 fragPos = (frag.position.bgn + frag.position.end) / 2;
- int32 placedLen = 0;
-
- if (frag.position.bgn < frag.position.end) {
- int32 placedLen = frag.position.end - frag.position.bgn;
- int32 aveLen = (placedLen + FI->fragmentLength(frag.ident)) / 2;
-
- frag.position.bgn = fragPos - aveLen / 2;
- frag.position.end = fragPos + aveLen / 2;
-
-#ifdef DEBUG_PLACE_FRAG
- writeLog("placeFrag()-- contained frag %u at %d,%d fwd from parent frag %u at %d,%d placedLen %d readLen %d aveLen %d\n",
- frag.ident, frag.position.bgn, frag.position.end,
- parent->ident, parent->position.bgn, parent->position.end,
- placedLen, FI->fragmentLength(frag.ident), aveLen);
-#endif
-
- } else {
- int32 placedLen = frag.position.bgn - frag.position.end;
- int32 aveLen = (placedLen + FI->fragmentLength(frag.ident)) / 2;
-
- frag.position.bgn = fragPos + aveLen / 2;
- frag.position.end = fragPos - aveLen / 2;
-
-#ifdef DEBUG_PLACE_FRAG
- writeLog("placeFrag()-- contained frag %u at %d,%d rev from parent frag %u at %d,%d placedLen %d readLen %d aveLen %d\n",
- frag.ident, frag.position.bgn, frag.position.end,
- parent->ident, parent->position.bgn, parent->position.end,
- placedLen, FI->fragmentLength(frag.ident), aveLen);
-#endif
- }
-
- // If we're pushed outside the container, adjust.
-
- int32 minParent = MIN(parent->position.bgn, parent->position.end);
- int32 maxParent = MAX(parent->position.bgn, parent->position.end);
-
- //writeLog("min/max %d %d frag %d %d\n", minParent, maxParent, frag.position.bgn, frag.position.end);
-
- if (frag.position.bgn < minParent) frag.position.bgn = minParent;
- if (frag.position.end < minParent) frag.position.end = minParent;
+ if (edge->fragId() == 0)
+ // Empty best edge? Still hard to place.
+ return(false);
- if (frag.position.bgn > maxParent) frag.position.bgn = maxParent;
- if (frag.position.end > maxParent) frag.position.end = maxParent;
+ if (fragIn(edge->fragId()) != id())
+ // Edge not pointing to a read in this tig?
+ return(false);
- //writeLog("min/max %d %d frag %d %d\n", minParent, maxParent, frag.position.bgn, frag.position.end);
+ // Grab the index of the parent read.
- assert(frag.position.bgn >= 0);
- assert(frag.position.end >= 0);
- assert(frag.position.bgn <= getLength());
- assert(frag.position.end <= getLength());
+ uint32 bidx = pathPosition(edge->fragId());
+ assert(edge->fragId() == ufpath[bidx].ident);
- // Containments are particularily painful. A beautiful example: a fragment of length 253bp is
- // contained in a fragment of length 251bp (both hangs are zero). In this case, the
- // "ahang+length" method fails, placing the contained fragment outside the container (and if
- // backwards oriented, _BEFORE_ the contained fragment). The "ahang,bhang" method works here,
- // but fails on other instances, shrinking deep containments to nothing.
- //
- // We can use either method first, then adjust using the other method.
- //
- // We'll use 'ahang,bhang' first (mostly because it was already done, and we need to compute
- // those values anyway) then reset the end based on the length, limited to maintain a containment
- // relationship.
- //
-#if 0
-#warning not knowing the overlap length really hurts.
- if (frag.position.bgn < frag.position.end) {
- frag.position.end = frag.position.bgn + FI->fragmentLength(frag.ident);
- if (frag.position.end > MAX(parent->position.bgn, parent->position.end))
- frag.position.end = MAX(parent->position.bgn, parent->position.end);
- } else {
- frag.position.bgn = frag.position.end + FI->fragmentLength(frag.ident);
- if (frag.position.bgn > MAX(parent->position.bgn, parent->position.end))
- frag.position.bgn = MAX(parent->position.bgn, parent->position.end);
- }
-#endif
+ // Now, just compute the placement and return success!
- // So we can sort properly, set the depth of this contained fragment.
- frag.containment_depth = parent->containment_depth + 1;
+ if (((edge->ahang() >= 0) && (edge->bhang() <= 0)) ||
+ ((edge->ahang() <= 0) && (edge->bhang() >= 0)))
+ frag = placeFrag_contained(fragId, ufpath[bidx], edge);
+ else
+ frag = placeFrag_dovetail(fragId, frag3p, ufpath[bidx], edge);
return(true);
}
diff --git a/src/bogart/AS_BAT_findEdges.C b/src/bogart/AS_BAT_findEdges.C
deleted file mode 100644
index ddc144d..0000000
--- a/src/bogart/AS_BAT_findEdges.C
+++ /dev/null
@@ -1,178 +0,0 @@
-
-/******************************************************************************
- *
- * This file is part of canu, a software program that assembles whole-genome
- * sequencing reads into contigs.
- *
- * This software is based on:
- * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
- * the 'kmer package' (http://kmer.sourceforge.net)
- * both originally distributed by Applera Corporation under the GNU General
- * Public License, version 2.
- *
- * Canu branched from Celera Assembler at its revision 4587.
- * Canu branched from the kmer project at its revision 1994.
- *
- * This file is derived from:
- *
- * src/AS_BAT/AS_BAT_findEdges.C
- *
- * Modifications by:
- *
- * Brian P. Walenz from 2014-NOV-14 to 2014-DEC-19
- * are Copyright 2014 Battelle National Biodefense Institute, and
- * are subject to the BSD 3-Clause License
- *
- * File 'README.licenses' in the root directory of this distribution contains
- * full conditions and disclaimers for each license.
- */
-
-// Given two fragments that share at least one edge, this will find that edge and construct a new
-// edge to make it mutual.
-//
-// For example, if there is a best edge from aFrg 3' to bFrg 5', this will return that edge in a3,
-// and also create the symmetric edge in b5.
-//
-static
-bool
-findEdges(ufNode *aFrg, BestEdgeOverlap &a5, BestEdgeOverlap &a3,
- ufNode *bFrg, BestEdgeOverlap &b5, BestEdgeOverlap &b3) {
-
- if (OG->isContained(aFrg->ident) ||
- OG->isContained(bFrg->ident))
- return(false);
-
- // Grab what edges we have.
-
- a5 = *OG->getBestEdgeOverlap(aFrg->ident, false);
- a3 = *OG->getBestEdgeOverlap(aFrg->ident, true);
- b5 = *OG->getBestEdgeOverlap(bFrg->ident, false);
- b3 = *OG->getBestEdgeOverlap(bFrg->ident, true);
-
- // Erase things that aren't correct
-
- if (a5.fragId() != bFrg->ident) a5 = BestEdgeOverlap();
- if (a3.fragId() != bFrg->ident) a3 = BestEdgeOverlap();
- if (b5.fragId() != aFrg->ident) b5 = BestEdgeOverlap();
- if (b3.fragId() != aFrg->ident) b3 = BestEdgeOverlap();
-
- // If we have no edges left, there are no edges!
-
- if ((b5.fragId() != aFrg->ident) && (b3.fragId() != aFrg->ident) &&
- (a5.fragId() != bFrg->ident) && (a3.fragId() != bFrg->ident))
- return(false);
-
- // If we found TWO edges for any single fragment....that's madness! That means the fragment
- // had best dovetail overlaps to the same other fragment off of both ends. We'll complain
- // and return failure. Ideally, data like this will be cleaned up by OBT, or filtered from
- // our input.
- //
- if (a5.fragId() == a3.fragId()) {
- writeLog("findEdges()-- frag %d has multiple edges to frag %d - a5 %d/%d' a3 %d/%d'\n",
- aFrg->ident, a5.fragId(),
- a5.fragId(), a5.frag3p() ? 3 : 5,
- a5.fragId(), a5.frag3p() ? 3 : 5);
- }
-
- if (b5.fragId() == b3.fragId()) {
- writeLog("findEdges()-- frag %d has multiple edges to frag %d - b5 %d/%d' b3 %d/%d'\n",
- bFrg->ident, b5.fragId(),
- b5.fragId(), b5.frag3p() ? 3 : 5,
- b5.fragId(), b5.frag3p() ? 3 : 5);
- }
-
- if (((a5.fragId() != 0) && (a5.fragId() == a3.fragId())) ||
- ((b5.fragId() != 0) && (b5.fragId() == b3.fragId()))) {
- a5 = BestEdgeOverlap();
- a3 = BestEdgeOverlap();
- b5 = BestEdgeOverlap();
- b3 = BestEdgeOverlap();
- return(false);
- }
-
- // Now, populate the other edges using whatever we have. Best case is that we have two edges
- // (because we're done).
-
- assert(((a5.fragId() == bFrg->ident) +
- (a3.fragId() == bFrg->ident) +
- (b5.fragId() == aFrg->ident) +
- (b3.fragId() == aFrg->ident)) <= 2);
-
- if (((a5.fragId() == bFrg->ident) || (a3.fragId() == bFrg->ident)) &&
- ((b5.fragId() == aFrg->ident) || (b3.fragId() == aFrg->ident)))
- return(true);
-
- // Otherwise, we have exactly one edge, and the other one needs to be created.
-
- assert(((a5.fragId() == bFrg->ident) +
- (a3.fragId() == bFrg->ident) +
- (b5.fragId() == aFrg->ident) +
- (b3.fragId() == aFrg->ident)) == 1);
-
- if (a5.fragId() == bFrg->ident) {
- //assert(a5.fragId() == 0);
- assert(a3.fragId() == 0);
- assert(b5.fragId() == 0);
- assert(b3.fragId() == 0);
-
- // Edge off of A's 5' end ('false' below)...
- // ...to B's 3' end (so ANTI or NORMAL -- negate the hangs)
- // ...to B's 5' end (so INNIE or OUTTIE -- swap the hangs)
- if (a5.frag3p())
- b3.set(aFrg->ident, false, -a5.ahang(), -a5.bhang());
- else
- b5.set(aFrg->ident, false, a5.bhang(), a5.ahang());
-
- } else if (a3.fragId() == bFrg->ident) {
- assert(a5.fragId() == 0);
- //assert(a3.fragId() == 0);
- assert(b5.fragId() == 0);
- assert(b3.fragId() == 0);
-
- // Edge off of A's 3' end ('true' below)...
- // ...to B's 3' end (so INNIE or OUTTIE -- swap the hangs)
- // ...to B's 5' end (so ANTI or NORMAL -- negate the hangs)
- if (a3.frag3p())
- b3.set(aFrg->ident, true, a3.bhang(), a3.ahang());
- else
- b5.set(aFrg->ident, true, -a3.ahang(), -a3.bhang());
-
- } else if (b5.fragId() == aFrg->ident) {
- assert(a5.fragId() == 0);
- assert(a3.fragId() == 0);
- //assert(b5.fragId() == 0);
- assert(b3.fragId() == 0);
-
- if (b5.frag3p())
- a3.set(bFrg->ident, false, -b5.ahang(), -b5.bhang());
- else
- a5.set(bFrg->ident, false, b5.bhang(), b5.ahang());
-
-
- } else if (b3.fragId() == aFrg->ident) {
- assert(a5.fragId() == 0);
- assert(a3.fragId() == 0);
- assert(b5.fragId() == 0);
- //assert(b3.fragId() == 0);
-
- if (b3.frag3p())
- a3.set(bFrg->ident, true, b3.bhang(), b3.ahang());
- else
- a5.set(bFrg->ident, true, -b3.ahang(), -b3.bhang());
-
- } else {
- fprintf(stderr, "findEdges()-- Logically impossible!\n");
- assert(0);
- }
-
- // And now we should have exactly two edges.
-
- assert(((a5.fragId() == bFrg->ident) +
- (a3.fragId() == bFrg->ident) +
- (b5.fragId() == aFrg->ident) +
- (b3.fragId() == aFrg->ident)) == 2);
-
- return(true);
-}
-
-
diff --git a/src/bogart/bogart.C b/src/bogart/bogart.C
index f3f10c9..02af654 100644
--- a/src/bogart/bogart.C
+++ b/src/bogart/bogart.C
@@ -31,26 +31,35 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
-#include "AS_BAT_Datatypes.H"
+#include "AS_BAT_FragmentInfo.H"
+#include "AS_BAT_OverlapCache.H"
#include "AS_BAT_BestOverlapGraph.H"
#include "AS_BAT_ChunkGraph.H"
-#include "AS_BAT_Unitig.H"
-#include "AS_BAT_OverlapCache.H"
+#include "AS_BAT_Logging.H"
+
+#include "AS_BAT_Unitig.H"
#include "AS_BAT_PopulateUnitig.H"
#include "AS_BAT_Instrumentation.H"
#include "AS_BAT_PlaceContains.H"
-#include "AS_BAT_PlaceZombies.H"
-#include "AS_BAT_Joining.H"
-#include "AS_BAT_MergeSplitJoin.H"
+#include "AS_BAT_MergeUnitigs.H"
+#include "AS_BAT_PopBubbles.H"
+#include "AS_BAT_MarkRepeatReads.H"
+
#include "AS_BAT_SplitDiscontinuous.H"
+#include "AS_BAT_PromoteToSingleton.H"
+
#include "AS_BAT_SetParentAndHang.H"
#include "AS_BAT_Outputs.H"
@@ -60,13 +69,6 @@ OverlapCache *OC = 0L;
BestOverlapGraph *OG = 0L;
ChunkGraph *CG = 0L;
-// HACK
-extern uint32 examineOnly;
-
-extern uint32 SPURIOUS_COVERAGE_THRESHOLD;
-extern uint32 ISECT_NEEDED_TO_BREAK;
-extern uint32 REGION_END_WEIGHT;
-
int
main (int argc, char * argv []) {
char *gkpStorePath = NULL;
@@ -75,10 +77,7 @@ main (int argc, char * argv []) {
char *tigStorePath = NULL;
double erateGraph = 0.030;
- double erateBubble = 0.035;
- double erateMerge = 0.025;
- double erateRepeat = 0.030;
- double erateMax = 0.0; // Computed
+ double erateMax = 0.050;
uint64 genomeSize = 0;
@@ -88,6 +87,13 @@ main (int argc, char * argv []) {
double lowcovFraction = 0.75;
uint32 lowcovDepth = 2;
+ double deviationGraph = 5.0;
+ double deviationBubble = 5.0;
+ double deviationRepeat = 3.0;
+
+ uint32 confusedAbsolute = 250;
+ double confusedPercent = 100.0;
+
int32 numThreads = 0;
uint64 ovlCacheMemory = UINT64_MAX;
@@ -97,21 +103,15 @@ main (int argc, char * argv []) {
bool doSave = false;
int fragment_count_target = 0;
- char *output_prefix = NULL;
+ char *prefix = NULL;
- bool removeSpur = false;
- double removeWeak = 0.0;
- bool removeSuspicious = false;
- bool noContainsInSingletons = false;
bool enableJoining = false;
- bool placeContainsUsingBest = true; // MUST be true; alternate doesn't work.
-
bool enableShatterRepeats = false;
bool enableReconstructRepeats = false;
uint32 minReadLen = 0;
- uint32 minOverlap = 40;
+ uint32 minOverlap = 500;
argc = AS_configure(argc, argv);
@@ -122,7 +122,7 @@ main (int argc, char * argv []) {
fragment_count_target = atoi(argv[++arg]);
} else if (strcmp(argv[arg], "-o") == 0) {
- output_prefix = argv[++arg];
+ prefix = argv[++arg];
} else if (strcmp(argv[arg], "-G") == 0) {
gkpStorePath = argv[++arg];
@@ -138,18 +138,6 @@ main (int argc, char * argv []) {
} else if (strcmp(argv[arg], "-gs") == 0) {
genomeSize = strtoull(argv[++arg], NULL, 10);
- } else if (strcmp(argv[arg], "-RS") == 0) {
- removeSpur = true;
-
- } else if (strcmp(argv[arg], "-RW") == 0) {
- removeWeak = atof(argv[++arg]);
-
- } else if (strcmp(argv[arg], "-NS") == 0) {
- removeSuspicious = true;
-
- } else if (strcmp(argv[arg], "-CS") == 0) {
- noContainsInSingletons = true;
-
} else if (strcmp(argv[arg], "-J") == 0) {
enableJoining = true;
@@ -163,16 +151,6 @@ main (int argc, char * argv []) {
enableShatterRepeats = true;
enableReconstructRepeats = true;
- } else if (strcmp(argv[arg], "-examineonly") == 0) {
- // HACK
- examineOnly = atoi(argv[++arg]);
-
- } else if (strcmp(argv[arg], "-repeatdetect") == 0) {
- // HACK
- SPURIOUS_COVERAGE_THRESHOLD = atoi(argv[++arg]);
- ISECT_NEEDED_TO_BREAK = atoi(argv[++arg]);
- REGION_END_WEIGHT = atoi(argv[++arg]);
-
} else if (strcmp(argv[arg], "-unassembled") == 0) {
fewReadsNumber = atoi(argv[++arg]);
tooShortLength = atoi(argv[++arg]);
@@ -188,22 +166,24 @@ main (int argc, char * argv []) {
} else if (strcmp(argv[arg], "-eg") == 0) {
erateGraph = atof(argv[++arg]);
-
- } else if (strcmp(argv[arg], "-eb") == 0) {
- erateBubble = atof(argv[++arg]);
-
- } else if (strcmp(argv[arg], "-em") == 0) {
- erateMerge = atof(argv[++arg]);
-
- } else if (strcmp(argv[arg], "-er") == 0) {
- erateRepeat = atof(argv[++arg]);
-
} else if (strcmp(argv[arg], "-eM") == 0) {
erateMax = atof(argv[++arg]);
} else if (strcmp(argv[arg], "-el") == 0) {
minOverlap = atoi(argv[++arg]);
+ } else if (strcmp(argv[arg], "-ca") == 0) { // Edge confused, based on absolute difference
+ confusedAbsolute = atoi(argv[++arg]);
+ } else if (strcmp(argv[arg], "-cp") == 0) { // Edge confused, based on percent difference
+ confusedPercent = atof(argv[++arg]);
+
+ } else if (strcmp(argv[arg], "-dg") == 0) { // Deviations, graph
+ deviationGraph = atof(argv[++arg]);
+ } else if (strcmp(argv[arg], "-db") == 0) { // Deviations, bubble
+ deviationBubble = atof(argv[++arg]);
+ } else if (strcmp(argv[arg], "-dr") == 0) { // Deviations, repeat
+ deviationRepeat = atof(argv[++arg]);
+
} else if (strcmp(argv[arg], "-M") == 0) {
ovlCacheMemory = (uint64)(atof(argv[++arg]) * 1024 * 1024 * 1024);
@@ -236,10 +216,8 @@ main (int argc, char * argv []) {
if (strcasecmp("most", argv[arg]) == 0) {
for (flg=1, opt=0; logFileFlagNames[opt]; flg <<= 1, opt++)
if ((strcasecmp(logFileFlagNames[opt], "stderr") != 0) &&
- (strcasecmp(logFileFlagNames[opt], "overlapQuality") != 0) &&
- (strcasecmp(logFileFlagNames[opt], "overlapsUsed") != 0) &&
+ (strcasecmp(logFileFlagNames[opt], "overlapScoring") != 0) &&
(strcasecmp(logFileFlagNames[opt], "chunkGraph") != 0) &&
- (strcasecmp(logFileFlagNames[opt], "happiness") != 0) &&
(strcasecmp(logFileFlagNames[opt], "setParentAndHang") != 0))
logFileFlags |= flg;
fnd = true;
@@ -277,13 +255,10 @@ main (int argc, char * argv []) {
if (erateGraph < 0.0)
err.push_back(NULL);
- if (erateBubble < 0.0)
- err.push_back(NULL);
- if (erateMerge < 0.0)
- err.push_back(NULL);
- if (erateRepeat < 0.0)
+ if (erateMax < 0.0)
err.push_back(NULL);
- if (output_prefix == NULL)
+
+ if (prefix == NULL)
err.push_back(NULL);
if (gkpStorePath == NULL)
err.push_back(NULL);
@@ -306,11 +281,8 @@ main (int argc, char * argv []) {
fprintf(stderr, "\n");
fprintf(stderr, " -gs Genome size in bases.\n");
fprintf(stderr, "\n");
- fprintf(stderr, " -RS Remove edges to spur reads from best overlap graph.\n");
- fprintf(stderr, " -NS Don't seed promiscuous unitigs with suspicious reads.\n");
- fprintf(stderr, " -CS Don't place contained reads in singleton unitigs.\n");
- fprintf(stderr, " -RW t Remove weak overlaps, those in the lower t fraction of erates per overlap end.\n");
fprintf(stderr, " -J Join promiscuous unitigs using unused best edges.\n");
+ fprintf(stderr, "\n");
fprintf(stderr, " -SR Shatter repeats, don't rebuild.\n");
fprintf(stderr, " -R Shatter repeats (-SR), then rebuild them\n");
fprintf(stderr, " -RL len Force reads below 'len' bases to be singletons.\n");
@@ -324,23 +296,14 @@ main (int argc, char * argv []) {
fprintf(stderr, " the following conditions:\n");
fprintf(stderr, "\n");
fprintf(stderr, " When constructing the Best Overlap Graph and Promiscuous Unitigs ('g'raph):\n");
- fprintf(stderr, " -eg 0.020 no more than 0.020 fraction (2.0%%) error\n");
- fprintf(stderr, "\n");
- fprintf(stderr, " When popping bubbles ('b'ubbles):\n");
- fprintf(stderr, " -eb 0.045 no more than 0.045 fraction (4.5%%) error when bubble popping\n");
- fprintf(stderr, "\n");
- fprintf(stderr, " When merging unitig ends ('m'erging):\n");
- fprintf(stderr, " -em 0.045 no more than 0.045 fraction (4.5%%) error when merging unitig ends\n");
- fprintf(stderr, "\n");
- fprintf(stderr, " When detecting repeats ('r'epeats):\n");
- fprintf(stderr, " -er 0.045 no more than 0.045 fraction (4.5%%) error when detecting repeats\n");
+ fprintf(stderr, " -eg 0.020 no more than 0.020 fraction (2.0%%) error ** DEPRECATED **\n");
fprintf(stderr, "\n");
fprintf(stderr, " When loading overlaps, an inflated maximum (to allow reruns with different error rates):\n");
fprintf(stderr, " -eM 0.05 no more than 0.05 fraction (5.0%%) error in any overlap loaded into bogart\n");
fprintf(stderr, " the maximum used will ALWAYS be at leeast the maximum of the four error rates\n");
fprintf(stderr, "\n");
fprintf(stderr, " For all, the lower limit on overlap length\n");
- fprintf(stderr, " -el 40 no shorter than 40 bases\n");
+ fprintf(stderr, " -el 500 no shorter than 40 bases\n");
fprintf(stderr, "\n");
fprintf(stderr, "Overlap Storage\n");
fprintf(stderr, "\n");
@@ -360,17 +323,10 @@ main (int argc, char * argv []) {
if (erateGraph < 0.0)
fprintf(stderr, "Invalid overlap error threshold (-eg option); must be at least 0.0.\n");
+ if (erateMax < 0.0)
+ fprintf(stderr, "Invalid overlap error threshold (-eM option); must be at least 0.0.\n");
- if (erateBubble < 0.0)
- fprintf(stderr, "Invalid overlap error threshold (-eb option); must be at least 0.0.\n");
-
- if (erateMerge < 0.0)
- fprintf(stderr, "Invalid overlap error threshold (-em option); must be at least 0.0.\n");
-
- if (erateRepeat < 0.0)
- fprintf(stderr, "Invalid overlap error threshold (-er option); must be at least 0.0.\n");
-
- if (output_prefix == NULL)
+ if (prefix == NULL)
fprintf(stderr, "No output prefix name (-o option) supplied.\n");
if (gkpStorePath == NULL)
@@ -394,16 +350,10 @@ main (int argc, char * argv []) {
fprintf(stderr, "\n");
fprintf(stderr, "Graph error threshold = %.3f (%.3f%%)\n", erateGraph, erateGraph * 100);
- fprintf(stderr, "Bubble error threshold = %.3f (%.3f%%)\n", erateBubble, erateBubble * 100);
- fprintf(stderr, "Merge error threshold = %.3f (%.3f%%)\n", erateMerge, erateMerge * 100);
- fprintf(stderr, "Repeat error threshold = %.3f (%.3f%%)\n", erateRepeat, erateRepeat * 100);
+ fprintf(stderr, "Max error threshold = %.3f (%.3f%%)\n", erateMax, erateMax * 100);
fprintf(stderr, "\n");
fprintf(stderr, "Minimum overlap length = %u bases\n", minOverlap);
fprintf(stderr, "\n");
- fprintf(stderr, "SPURIOUS_COVERAGE_THRESHOLD "F_U32"\n", SPURIOUS_COVERAGE_THRESHOLD);
- fprintf(stderr, "ISECT_NEEDED_TO_BREAK "F_U32"\n", ISECT_NEEDED_TO_BREAK);
- fprintf(stderr, "REGION_END_WEIGHT "F_U32"\n", REGION_END_WEIGHT);
- fprintf(stderr, "\n");
if (numThreads > 0) {
omp_set_num_threads(numThreads);
@@ -424,21 +374,16 @@ main (int argc, char * argv []) {
UnitigVector unitigs;
- setLogFile(output_prefix, NULL);
+ setLogFile(prefix, NULL);
- FI = new FragmentInfo(gkpStore, output_prefix, minReadLen);
+ FI = new FragmentInfo(gkpStore, prefix, minReadLen);
// Initialize where we've been to nowhere
Unitig::resetFragUnitigMap(FI->numFragments());
- erateMax = MAX(erateMax, erateGraph);
- erateMax = MAX(erateMax, erateBubble);
- erateMax = MAX(erateMax, erateMerge);
- erateMax = MAX(erateMax, erateRepeat);
-
- OC = new OverlapCache(ovlStoreUniq, ovlStoreRept, output_prefix, erateMax, minOverlap, ovlCacheMemory, ovlCacheLimit, onlySave, doSave);
- OG = new BestOverlapGraph(erateGraph, output_prefix, removeWeak, removeSuspicious, removeSpur);
- CG = new ChunkGraph(output_prefix);
+ OC = new OverlapCache(ovlStoreUniq, ovlStoreRept, prefix, MAX(erateMax, erateGraph), minOverlap, ovlCacheMemory, ovlCacheLimit, onlySave, doSave);
+ OG = new BestOverlapGraph(erateGraph, deviationGraph, prefix);
+ CG = new ChunkGraph(prefix);
delete ovlStoreUniq; ovlStoreUniq = NULL;
delete ovlStoreRept; ovlStoreRept = NULL;
@@ -452,7 +397,7 @@ main (int argc, char * argv []) {
// through all fragments and place whatever isn't already placed.
//
- setLogFile(output_prefix, "buildUnitigs");
+ setLogFile(prefix, "buildUnitigs");
writeLog("==> BUILDING UNITIGS from %d fragments.\n", FI->numFragments());
for (uint32 fi=CG->nextFragByChunkLength(); fi>0; fi=CG->nextFragByChunkLength())
@@ -461,95 +406,111 @@ main (int argc, char * argv []) {
delete CG;
CG = NULL;
- writeLog("==> BUILDING UNITIGS catching missed fragments.\n");
-
- for (uint32 fi=1; fi <= FI->numFragments(); fi++)
- populateUnitig(unitigs, fi);
+ breakSingletonTigs(unitigs);
- reportOverlapsUsed(unitigs, output_prefix, "buildUnitigs");
- reportUnitigs(unitigs, output_prefix, "buildUnitigs", genomeSize);
+ reportOverlaps(unitigs, prefix, "buildUnitigs");
+ reportUnitigs(unitigs, prefix, "buildUnitigs", genomeSize);
-#if 0
//
- // Join unitigs using not-best edges.
+ // Place contained reads.
//
- setLogFile(output_prefix, "joinUnitigs");
+#if 1
+ setLogFile(prefix, "placeContains");
- if (enableJoining) {
- setLogFile(output_prefix, "joining");
+ unitigs.computeArrivalRate(prefix, "initial");
+ unitigs.computeErrorProfiles(prefix, "initial");
+ //unitigs.reportErrorProfiles(prefix, "initial");
- joinUnitigs(unitigs, enableJoining);
+ placeUnplacedUsingAllOverlaps(unitigs, prefix);
- reportOverlapsUsed(unitigs, output_prefix, "joining");
- reportUnitigs(unitigs, output_prefix, "joining", genomeSize);
- }
+ reportOverlaps(unitigs, prefix, "placeContains");
+ reportUnitigs(unitigs, prefix, "placeContains", genomeSize);
#endif
//
- // Place contained reads.
+ // Merge tigs (and detect ciruclar ones too). Contained reads need to be placed to 'clean up'
+ // the error rate. Dovetail alone is too 'clean' for circular to be detected (in ecoli).
//
- setLogFile(output_prefix, "placeContains");
+#if 0
+ setLogFile(prefix, "merge");
+
+ computeErrorProfiles(unitigs, prefix, "merge");
+ //reportErrorProfiles(unitigs, prefix, "merge");
- if (noContainsInSingletons)
- OG->rebuildBestContainsWithoutSingletons(unitigs, erateGraph, output_prefix);
+ mergeUnitigs(unitigs, deviationGraph, false);
+
+ reportOverlaps(unitigs, prefix, "merge");
+ reportUnitigs(unitigs, prefix, "merge", genomeSize);
+#endif
- if (placeContainsUsingBest)
- placeContainsUsingBestOverlaps(unitigs);
- else
- placeContainsUsingAllOverlaps(unitigs, erateBubble);
//
- // Break and place zombies
+ // Pop bubbles
//
- setLogFile(output_prefix, "placeZombies");
+#if 1
+ setLogFile(prefix, "popBubbles");
- placeZombies(unitigs, erateMerge);
+ unitigs.computeErrorProfiles(prefix, "unplaced");
+ //unitigs.reportErrorProfiles(prefix, "unplaced");
- checkUnitigMembership(unitigs);
- reportOverlapsUsed(unitigs, output_prefix, "placeContainsZombies");
- reportUnitigs(unitigs, output_prefix, "placeContainsZombies", genomeSize);
+ popBubbles(unitigs,
+ deviationBubble);
+
+ //checkUnitigMembership(unitigs);
+ reportOverlaps(unitigs, prefix, "popBubbles");
+ reportUnitigs(unitigs, prefix, "popBubbles", genomeSize);
+#endif
//
- // Pop bubbles, detect repeats
+ // Detect and break repeats. Annotate each read with overlaps to reads not overlapping in the tig,
+ // project these regions back to the tig, and break unless there is a read spanning the region.
//
- setLogFile(output_prefix, "mergeSplitJoin");
+ setLogFile(prefix, "markRepeatReads");
+
+ unitigs.computeErrorProfiles(prefix, "repeats");
+
+ markRepeatReads(unitigs, deviationRepeat, confusedAbsolute, confusedPercent);
- mergeSplitJoin(unitigs,
- erateGraph, erateBubble, erateMerge, erateRepeat,
- output_prefix,
- minOverlap,
- enableShatterRepeats,
- genomeSize);
+ //checkUnitigMembership(unitigs);
+ reportOverlaps(unitigs, prefix, "markRepeatReads");
+ reportUnitigs(unitigs, prefix, "markRepeatReads", genomeSize);
+ //
+ // Try to reassemble just the split repeats.
+ //
+
+#if 0
if (enableReconstructRepeats) {
assert(enableShatterRepeats);
- setLogFile(output_prefix, "reconstructRepeats");
+ setLogFile(prefix, "reconstructRepeats");
reconstructRepeats(unitigs, erateGraph);
- reportOverlapsUsed(unitigs, output_prefix, "reconstructRepeats");
- reportUnitigs(unitigs, output_prefix, "reconstructRepeats", genomeSize);
+ //checkUnitigMembership(unitigs);
+ reportOverlaps(unitigs, prefix, "reconstructRepeats");
+ reportUnitigs(unitigs, prefix, "reconstructRepeats", genomeSize);
}
-
- checkUnitigMembership(unitigs);
+#endif
//
// Cleanup unitigs. Break those that have gaps in them. Place contains again. For any read
// still unplaced, make it a singleton unitig.
//
- setLogFile(output_prefix, "cleanup");
+ setLogFile(prefix, "cleanup");
splitDiscontinuousUnitigs(unitigs, minOverlap);
- if (placeContainsUsingBest)
- placeContainsUsingBestOverlaps(unitigs);
- else
- placeContainsUsingAllOverlaps(unitigs, erateBubble);
+ breakSingletonTigs(unitigs);
+
+ //unitigs.computeErrorProfiles(prefix, "final");
+ //unitigs.reportErrorProfiles(prefix, "final");
+
+ //placeUnplacedUsingAllOverlaps(unitigs, prefix);
promoteToSingleton(unitigs);
@@ -558,19 +519,24 @@ main (int argc, char * argv []) {
tooShortLength,
spanFraction,
lowcovFraction, lowcovDepth);
- checkUnitigMembership(unitigs);
- reportUnitigs(unitigs, output_prefix, "final", genomeSize);
+
+ //checkUnitigMembership(unitigs);
+ reportOverlaps(unitigs, prefix, "final");
+ reportUnitigs(unitigs, prefix, "final", genomeSize);
//
- // Generate outputs.
+ // Generate outputs. The graph MUST come after output, because it needs
+ // the tigStore tigID.
//
- setLogFile(output_prefix, "setParentAndHang");
+ setLogFile(prefix, "output");
+
setParentAndHang(unitigs);
+ writeUnitigsToStore(unitigs, prefix, tigStorePath, fragment_count_target, true);
- setLogFile(output_prefix, "output");
- writeUnitigsToStore(unitigs, output_prefix, tigStorePath, fragment_count_target);
- writeOverlapsUsed(unitigs, output_prefix);
+ setLogFile(prefix, "graph");
+
+ writeUnusedEdges(unitigs, prefix);
//
// Tear down bogart.
@@ -581,10 +547,7 @@ main (int argc, char * argv []) {
delete OC;
delete FI;
- for (uint32 ti=0; ti<unitigs.size(); ti++)
- delete unitigs[ti];
-
- setLogFile(output_prefix, NULL);
+ setLogFile(prefix, NULL);
writeLog("Bye.\n");
diff --git a/src/bogart/bogart.mk b/src/bogart/bogart.mk
index 55f4a45..c9dfba8 100644
--- a/src/bogart/bogart.mk
+++ b/src/bogart/bogart.mk
@@ -10,28 +10,25 @@ endif
TARGET := bogart
SOURCES := bogart.C \
AS_BAT_BestOverlapGraph.C \
- AS_BAT_Breaking.C \
AS_BAT_ChunkGraph.C \
AS_BAT_FragmentInfo.C \
AS_BAT_Instrumentation.C \
- AS_BAT_IntersectBubble.C \
- AS_BAT_IntersectSplit.C \
- AS_BAT_Joining.C \
AS_BAT_Logging.C \
- AS_BAT_MergeSplitJoin.C \
+ AS_BAT_MarkRepeatReads.C \
+ AS_BAT_MergeUnitigs.C \
AS_BAT_Outputs.C \
AS_BAT_OverlapCache.C \
AS_BAT_PlaceContains.C \
AS_BAT_PlaceFragUsingOverlaps.C \
- AS_BAT_PlaceZombies.C \
+ AS_BAT_PopBubbles.C \
AS_BAT_PopulateUnitig.C \
AS_BAT_PromoteToSingleton.C \
AS_BAT_ReconstructRepeats.C \
AS_BAT_SetParentAndHang.C \
AS_BAT_SplitDiscontinuous.C \
- AS_BAT_Unitig_AddAndPlaceFrag.C \
- AS_BAT_Unitig_AddFrag.C \
AS_BAT_Unitig.C \
+ AS_BAT_UnitigVector.C \
+ AS_BAT_Unitig_AddFrag.C \
AS_BAT_Unitig_PlaceFragUsingEdges.C
SRC_INCDIRS := .. ../AS_UTL ../stores
diff --git a/src/bogart/buildGraph.C b/src/bogart/buildGraph.C
index e808f19..b58222d 100644
--- a/src/bogart/buildGraph.C
+++ b/src/bogart/buildGraph.C
@@ -46,7 +46,6 @@ public:
uint32 readID;
uint32 readBgn;
uint32 readEnd;
- uint32 end;
};
@@ -61,11 +60,13 @@ public:
int32 ahang;
int32 bhang;
bestRead to;
+
+ bool flipped;
};
void
-loadEdges(char *edgeName, vector<bestEdge> &edges) {
+loadEdges(char *edgeName, vector<bestEdge> &edges, set<uint32> &vertices) {
errno = 0;
FILE *edgeFile = fopen(edgeName, "r");
@@ -79,26 +80,35 @@ loadEdges(char *edgeName, vector<bestEdge> &edges) {
while (!feof(edgeFile)) {
splitToWords W(edgeLine);
bestEdge E;
-
- E.fr.tigID = W(1);
- E.fr.tigType = W[2][0];
- E.fr.readID = W(4);
- E.fr.readBgn = W(6);
- E.fr.readEnd = W(7);
- E.fr.end = W[8][0];
-
- E.ahang = W(10);
- E.ahang = W(11);
-
- E.to.tigID = W(14);
- E.fr.tigType = W[15][0];
- E.to.readID = W(17);
- E.to.readBgn = W(19);
- E.to.readEnd = W(20);
- E.to.end = W[21][0];
+ uint32 w = 0;
+
+ assert(W[w++][0] == 't'); // 'tig'
+ E.fr.tigID = W(w++); // tigID
+ E.fr.tigType = W[w++][0]; // tig type 'R', 'N', ...
+ assert(W[w++][0] == 'r'); // 'read'
+ E.fr.readID = W(w++); // readID
+ assert(W[w++][0] == 'a'); // 'at'
+ E.fr.readBgn = W(w++); // bgn-position
+ E.fr.readEnd = W(w++); // end-position
+
+ E.ahang = W(w++); // a-hang
+ E.flipped = (W[w++][0] == '<'); // '<' if flipped, '>' if normal
+ E.bhang = W(w++); // b-hang
+
+ assert(W[w++][0] == 't'); // 'tig'
+ E.to.tigID = W(w++); // tigID
+ E.fr.tigType = W[w++][0]; // tig type
+ assert(W[w++][0] == 'r'); // 'read'
+ E.to.readID = W(w++); // readID
+ assert(W[w++][0] == 'a'); // 'at'
+ E.to.readBgn = W(w++); // bgn-position
+ E.to.readEnd = W(w++); // end-position
edges.push_back(E);
+ vertices.insert(E.fr.tigID);
+ vertices.insert(E.to.tigID);
+
fgets(edgeLine, 1024, edgeFile);
}
@@ -108,14 +118,33 @@ loadEdges(char *edgeName, vector<bestEdge> &edges) {
+tgPosition *
+findRead(tgTig *tig, uint32 id) {
+ uint32 rr = 0;
+ tgPosition *rd = NULL;
+
+ do {
+ rd = tig->getChild(rr++);
+ } while ((rr < tig->numberOfChildren()) && (rd->ident() != id));
+
+ if (rd->ident() != id) {
+ fprintf(stderr, "WARNING: failed to find read %u in tig %u - ejected?\n", id, tig->tigID());
+ rd = NULL;
+ }
+
+ return(rd);
+}
+
+
+
+
int
main(int argc, char **argv) {
char *gkpName = NULL;
char *tigName = NULL;
int32 tigVers = -1;
char *edgesName = NULL;
-
- vector<bestEdge> edges;
+ char *graphName = NULL;
argc = AS_configure(argc, argv);
@@ -132,6 +161,9 @@ main(int argc, char **argv) {
} else if (strcmp(argv[arg], "-E") == 0) {
edgesName = argv[++arg];
+ } else if (strcmp(argv[arg], "-o") == 0) {
+ graphName = argv[++arg];
+
} else {
char *s = new char [1024];
sprintf(s, "Unknown option '%s'.\n", argv[arg]);
@@ -147,12 +179,16 @@ main(int argc, char **argv) {
err.push_back("No tigStore store (-T option) supplied.\n");
if (edgesName == NULL)
err.push_back("No edges file (-E option) supplied.\n");
+ if (graphName == NULL)
+ err.push_back("No output graph file (-o option) supplied.\n");
if (err.size() > 0) {
fprintf(stderr, "usage: %s -G gkpStore -T tigStore tigVersion -E edgesFile ...\n", argv[0]);
fprintf(stderr, " -G gkpStore path to gkpStore\n");
fprintf(stderr, " -T tigStore version path to tigStore\n");
- fprintf(stderr, " -E edgeFile path to bogart used-best-edges file\n");
+ fprintf(stderr, " -E edgeFile path to bogart unused-edges file\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -o graph.gfa write to 'graph.gfa'\n");
fprintf(stderr, "\n");
for (uint32 ii=0; ii<err.size(); ii++)
@@ -162,180 +198,204 @@ main(int argc, char **argv) {
exit(1);
}
- gkStore *gkpStore = gkStore::gkStore_open(gkpName);
- tgStore *tigStore = new tgStore(tigName, tigVers);
+ // Open output.
+
+ errno = 0;
+ FILE *graph = fopen(graphName, "w");
+ if (errno)
+ fprintf(stderr, "Failed to open output graph '%s': %s\n", graphName, strerror(errno)), exit(1);
+
+ // Open inputs, load the graph.
+
+ gkStore *gkpStore = gkStore::gkStore_open(gkpName);
+ tgStore *tigStore = new tgStore(tigName, tigVers);
+ vector<bestEdge> edges;
+ set<uint32> vertices;
- fprintf(stdout, "H\tVN:Z:bogart\n");
+ loadEdges(edgesName, edges, vertices);
+
+ // Dump vertcies.
+
+ fprintf(graph, "H\tVN:Z:canu\n");
for (uint32 tt=0; tt<tigStore->numTigs(); tt++) {
tgTig *tig = tigStore->loadTig(tt);
- fprintf(stdout, "S\ttig%08u\t*\tLN:i:%u\tRC:i:%u\n", tig->tigID(), tig->length(), tig->numberOfChildren());
+ if (vertices.count(tig->tigID()) > 0)
+ fprintf(graph, "S\ttig%08u\t*\tLN:i:%u\tRC:i:%u\n", tig->tigID(), tig->length(), tig->numberOfChildren());
tigStore->unloadTig(tt, true);
}
+ // Dump graph.
-
- loadEdges(edgesName, edges);
-
- uint32 edgeTypes[4][4] = {0};
char *cigar = new char [1024 * 1024];
+ uint32 nEdgesUnassembled = 0;
+
for (uint32 ee=0; ee<edges.size(); ee++) {
// Get the tigs for this edge, ignore if either is unassembled.
- tgTig *frTig = tigStore->loadTig(edges[ee].fr.tigID);
- tgTig *toTig = tigStore->loadTig(edges[ee].to.tigID);
+ tgTig *frTig = tigStore->loadTig(edges[ee].fr.tigID);
+ uint32 frTigID = frTig->tigID();
+
+ tgTig *toTig = tigStore->loadTig(edges[ee].to.tigID);
+ uint32 toTigID = toTig->tigID();
assert(frTig->_class != tgTig_noclass);
assert(toTig->_class != tgTig_noclass);
if ((frTig->_class == tgTig_unassembled) ||
(toTig->_class == tgTig_unassembled)) {
-#if 0
- fprintf(stderr, "SKIPPING edge %d -- fr tig %d read %d -- to tig %d read %d\n",
- ee,
- edges[ee].fr.tigID, edges[ee].fr.readID,
- edges[ee].to.tigID, edges[ee].to.readID);
-#endif
+ nEdgesUnassembled++;
continue;
}
// Find the reads we're using to anchor the tigs together.
-#if 0
- fprintf(stderr, "SEARCHING edge %d -- fr tig %d read %d -- to tig %d read %d\n",
- ee,
- frTig->tigID(), edges[ee].fr.readID,
- toTig->tigID(), edges[ee].to.readID);
-#endif
+ tgPosition *frRead = findRead(frTig, edges[ee].fr.readID);
+ tgPosition *toRead = findRead(toTig, edges[ee].to.readID);
- tgPosition *frRead = NULL;
- tgPosition *toRead = NULL;
- uint32 rr;
+ if ((frRead == NULL) ||
+ (toRead == NULL))
+ continue;
- rr = 0;
- do {
- frRead = frTig->getChild(rr++);
- } while ((rr < frTig->numberOfChildren()) && (frRead->ident() != edges[ee].fr.readID));
+ // Map coordinates from gapped to ungapped.
- rr = 0;
- do {
- toRead = toTig->getChild(rr++);
- } while ((rr < toTig->numberOfChildren()) && (toRead->ident() != edges[ee].to.readID));
+ uint32 frReadMin = frTig->mapGappedToUngapped(frRead->min());
+ uint32 frReadMax = frTig->mapGappedToUngapped(frRead->max());
+ uint32 frLen = frTig->length(false);
+ uint32 toReadMin = toTig->mapGappedToUngapped(toRead->min());
+ uint32 toReadMax = toTig->mapGappedToUngapped(toRead->max());
+ uint32 toLen = toTig->length(false);
- if (frRead->ident() != edges[ee].fr.readID)
- fprintf(stderr, "WARNING: failed to find read %u in tig %u - ejected?\n",
- edges[ee].fr.readID, edges[ee].fr.tigID);
- if (toRead->ident() != edges[ee].to.readID)
- fprintf(stderr, "WARNING: failed to find read %u in tig %u - ejected?\n",
- edges[ee].to.readID, edges[ee].to.tigID);
+ //
+ // Convert from a read-read overlap to a tig-tig overlap.
+ //
- if (frRead->ident() != edges[ee].fr.readID)
- continue;
- if (toRead->ident() != edges[ee].to.readID)
- continue;
+ // Orient tigs based oin the read orientation.
+ // For 'fr', we require that the read always be forward.
+ // For 'to', the overlap dictates the orientation.
- fprintf(stderr, "WORKING -- fr tig %d read %d -- to tig %d read %d\n",
- frTig->tigID(), frRead->ident(),
- toTig->tigID(), toRead->ident());
+ bool frTigFwd = frRead->isForward(); // Tig forward if read forward.
+ bool toTigFwd = toRead->isForward(); // Same, unless...
- // Decide orientations
+ if (edges[ee].flipped == true) // ...edge is flipped, so flip
+ toTigFwd = !toTigFwd; // 'to' tig.
- bool frForward = frRead->isForward();
- bool toForward = toRead->isForward();
+ // Cleanup. Makes skipping an edge much easier.
- int32 frBgn = frTig->mapGappedToUngapped(frRead->bgn());
- int32 frEnd = frTig->mapGappedToUngapped(frRead->end());
+ tigStore->unloadTig(frTigID, true); frRead = NULL;
+ tigStore->unloadTig(toTigID, true); toRead = NULL;
- int32 toBgn = toTig->mapGappedToUngapped(toRead->bgn());
- int32 toEnd = toTig->mapGappedToUngapped(toRead->end());
+ // Based on tig orientation, find the bgn and end lengths from each read.
- // Normalize to forward/forward.
+ int32 frBgn = (frTigFwd) ? ( frReadMin) : (frLen - frReadMax);
+ int32 frEnd = (frTigFwd) ? (frLen - frReadMax) : ( frReadMin);
- if (frForward) {
- frBgn = frTig->length(false) - frBgn;
- frEnd = frTig->length(false) - frEnd;
- }
+ int32 toBgn = (toTigFwd) ? ( toReadMin) : (toLen - toReadMax);
+ int32 toEnd = (toTigFwd) ? (toLen - toReadMax) : ( toReadMin);
- if (toForward) {
- toBgn = toTig->length(false) - toBgn;
- toEnd = toTig->length(false) - toEnd;
- }
+ //fprintf(graph, "hangs0- fr %d-%d to %d-%d ahang %d bhang %d\n",
+ // frBgn, frEnd, toBgn, toEnd, edges[ee].ahang, edges[ee].bhang);
- // Compute an offset based on the read-to-read overlap.
+ // Apply the overlap hangs to find the overlapping regions on the tigs.
- int32 alignOffset = 0;
+ if (edges[ee].ahang < 0)
+ toBgn += -edges[ee].ahang;
+ else
+ frBgn += edges[ee].ahang;
- // Figure out the hangs of each tig.
+ if (edges[ee].bhang < 0)
+ frEnd += -edges[ee].bhang;
+ else
+ toEnd += edges[ee].bhang;
- int32 fr5 = frBgn - 0;
- int32 fr3 = frTig->length(false) - frEnd;
+ //fprintf(graph, "hangs1- fr %d-%d to %d-%d\n",
+ // frBgn, frEnd, toBgn, toEnd);
- int32 to5 = toBgn - 0;
- int32 to3 = toTig->length(false) - toEnd;
+ // The overlap is now between regions toBgn-toEnd and frBgn-frEnd. Extend this to cover the ends of each tig.
+ //
+ // ------------------------------------------
+ // +++ ||| olap ||| +++
+ // -------------------------------
- // Find the min of each of the 5' and 3' hangs. These tell what portions of each tig should be overlapping.
+ if (toBgn < frBgn) {
+ toBgn -= toBgn;
+ frBgn -= toBgn;
+ } else {
+ toBgn -= frBgn;
+ frBgn -= frBgn;
+ }
- int32 min5 = min(fr5, to5);
- int32 min3 = min(fr3, to3);
+ if (toEnd < frEnd) {
+ toEnd -= toEnd;
+ frEnd -= toEnd;
+ } else {
+ toEnd -= frEnd;
+ frEnd -= frEnd;
+ }
- // Extending the read positions by these minimum extensions will then give up (more or less) the region that aligns.
+ //fprintf(graph, "hangs2- fr %d-%d to %d-%d\n",
+ // frBgn, frEnd, toBgn, toEnd);
- frBgn -= min5;
- frEnd += min3;
+ // Compute the alignment between the two regions, and convert to a cigar string.
- toBgn -= min5;
- toEnd += min3;
+ frLen -= (frBgn + frEnd);
+ toLen -= (toBgn + toEnd);
- // And undo the normalization.
+ sprintf(cigar, "%dM", (frLen + toLen) / 2); // Used to be 'm', Bandage complained about it not being 'M'.
- if (frForward) {
- frBgn = frTig->length(false) - frBgn;
- frEnd = frTig->length(false) - frEnd;
- }
+ // The overlap should now have one of:
+ // frBgn == toEnd == 0 -- to has an overlap to fr
+ // frEnd == toBgn == 0 -- fr has an overlap to to
+ //
+ // If not, the overlap is inconsistent with the tigs; it implies the two tigs overlap in their
+ // entirety.
- if (toForward) {
- toBgn = toTig->length(false) - toBgn;
- toEnd = toTig->length(false) - toEnd;
- }
+ // GFA requires that the overlap be between the end of the first read and the start of the second read.
+ // Flip the order if needed.
- // Find an overlap, convert it to a cigar string.
+ if (frTigID == toTigID) {
+ fprintf(stderr, "L\ttig%08u\t%c\ttig%08d\t%c\t%s circular\n",
+ frTigID, (frTigFwd) ? '+' : '-',
+ toTigID, (toTigFwd) ? '+' : '-',
+ cigar);
+ continue;
+ }
- // For now, we just take the average of the two lengths.
- int32 frLen = (frBgn < frEnd) ? (frEnd - frBgn) : (frBgn - frEnd);
- int32 toLen = (toBgn < toEnd) ? (toEnd - toBgn) : (toBgn - toEnd);
+ if ((toBgn == 0) && (frEnd == 0)) {
+ fprintf(graph, "L\ttig%08u\t%c\ttig%08d\t%c\t%s\n",
+ frTigID, (frTigFwd) ? '+' : '-',
+ toTigID, (toTigFwd) ? '+' : '-',
+ cigar);
+ continue;
+ }
- sprintf(cigar, "%dm", (frLen + toLen) / 2);
+ if ((frBgn == 0) && (toEnd == 0)) {
+ fprintf(graph, "L\ttig%08u\t%c\ttig%08d\t%c\t%s\n",
+ toTigID, (toTigFwd) ? '+' : '-',
+ frTigID, (frTigFwd) ? '+' : '-',
+ cigar);
+ continue;
+ }
- // Report the edge.
+ // Inconsistent edge.
- fprintf(stdout, "L\ttig%08u\t%c\ttig%08d\t%c\t%s\n",
- frTig->tigID(), frForward ? '+' : '-',
- toTig->tigID(), toForward ? '+' : '-',
+ fprintf(stderr, "L\ttig%08u\t%c\ttig%08d\t%c\t%s inconsistent\n",
+ frTigID, (frTigFwd) ? '+' : '-',
+ toTigID, (toTigFwd) ? '+' : '-',
cigar);
-
- tigStore->unloadTig(frTig->tigID(), true);
- tigStore->unloadTig(toTig->tigID(), true);
}
- delete [] cigar;
-
-#if 0
- for (uint32 ii=0; ii<4; ii++) {
- fprintf(stderr, "%8u%8u%8u%8u\n",
- edgeTypes[ii][0],
- edgeTypes[ii][1],
- edgeTypes[ii][2],
- edgeTypes[ii][3]);
- }
-#endif
+ edges.clear(); // Make valgrind slightly happier.
+ vertices.clear();
- delete tigStore;
+ delete [] cigar;
+ delete tigStore;
gkpStore->gkStore_close();
diff --git a/src/correction/errorEstimate.C b/src/correction/errorEstimate.C
new file mode 100644
index 0000000..be1095a
--- /dev/null
+++ b/src/correction/errorEstimate.C
@@ -0,0 +1,206 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Sergey Koren beginning on 2016-MAY-16
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "AS_global.H"
+#include "ovStore.H"
+#include "splitToWords.H"
+
+#include "AS_UTL_decodeRange.H"
+#include "stddev.H"
+
+#include <vector>
+#include <algorithm>
+#include <map>
+
+using namespace std;
+
+int
+main(int argc, char **argv) {
+ char *scoreFileName = NULL;
+ uint32 deviations = 6;
+ float mass=0.98;
+ bool isOvl=false;
+
+ argc = AS_configure(argc, argv);
+
+ int32 arg = 1;
+ int32 err = 0;
+ while (arg < argc) {
+ if (strcmp(argv[arg], "-S") == 0) {
+ scoreFileName = argv[++arg];
+
+ } else if (strcmp(argv[arg], "-d") == 0) {
+ deviations = atoi(argv[++arg]);
+
+ } else if (strcmp(argv[arg], "-m") == 0) {
+ mass = atof(argv[++arg]);
+
+ } else if (strcmp(argv[arg], "-o") == 0) {
+ isOvl=true;
+
+ } else {
+ fprintf(stderr, "ERROR: invalid arg '%s'\n", argv[arg]);
+ err++;
+ }
+
+ arg++;
+ }
+
+ if (scoreFileName == NULL)
+ err++;
+
+ if (err) {
+ fprintf(stderr, "usage: %s [options]\n", argv[0]);
+ fprintf(stderr, "\n");
+
+ exit(1);
+ }
+
+ errno = 0;
+ FILE *scoreFile = (scoreFileName == NULL) ? NULL : (scoreFileName[0] == '-' ? stdin : fopen(scoreFileName, "r"));
+ if (errno)
+ fprintf(stderr, "ERROR: failed to open '%s' for reading: %s\n", scoreFileName, strerror(errno)), exit(1);
+
+ // read the file and store best hits
+ char ovStr[1024];
+ ovOverlap ov(NULL);
+ map<uint32, uint32> readToLength;
+ map<uint32, double> readToIdy;
+ double mean, median, stddev, mad;
+ mean = median = stddev = mad = 0.0;
+
+ while (fgets(ovStr, 1024, scoreFile) != NULL) {
+ splitToWords W(ovStr);
+
+ if (isOvl) {
+ ov.a_iid = W(0);
+ ov.b_iid = W(1);
+ if (ov.a_iid == ov.b_iid)
+ continue;
+ ov.dat.ovl.ahg5 = W(4);
+ ov.dat.ovl.ahg3 = W(6);
+ ov.dat.ovl.bhg5 = W(6);
+ ov.dat.ovl.bhg3 = W(7);
+ ov.span(W(3));
+ ov.erate(atof(W[8]));
+ ov.flipped(W[3][0] == 'I' ? true : false);
+
+ } else {
+ ov.a_iid = W(0);
+ ov.b_iid = W(1);
+
+ if (ov.a_iid == ov.b_iid)
+ continue;
+
+ assert(W[4][0] == '0');
+
+ ov.dat.ovl.ahg5 = W(5);
+ ov.dat.ovl.ahg3 = W(7) - W(6);
+
+ if (W[8][0] == '0') {
+ ov.dat.ovl.bhg5 = W(9);
+ ov.dat.ovl.bhg3 = W(11) - W(10);
+ ov.flipped(false);
+ } else {
+ ov.dat.ovl.bhg3 = W(9);
+ ov.dat.ovl.bhg5 = W(11) - W(10);
+ ov.flipped(true);
+ }
+ ov.erate(atof(W[2]));
+ ov.span(W(10)-W(9));
+ }
+
+ if (ov.erate() == 0.0)
+ ov.erate(0.01); // round up when we can't estimate accurately
+
+ if (readToLength.find(ov.b_iid) == readToLength.end() || readToLength[ov.b_iid] < ov.span()) {
+ readToLength[ov.b_iid] = ov.span();
+ readToIdy[ov.b_iid] = ov.erate();
+ }
+ }
+ fclose(scoreFile);
+
+ stdDev<double> edgeStats;
+
+ // Find the overlap for every best edge.
+
+ double *absdev = new double [readToLength.size() + 1];
+ double *erates = new double [readToLength.size() + 1];
+ uint32 eratesLen = 0;
+
+
+ for (map<uint32, double>::iterator it=readToIdy.begin(); it != readToIdy.end(); ++it) {
+ edgeStats.insert(erates[eratesLen++] = it->second);
+ }
+
+ mean = edgeStats.mean();
+ stddev = edgeStats.stddev();
+
+ fprintf(stderr, "with %u points - mean %f stddev %f - would use overlaps below %f fraction error\n", edgeStats.size(), mean, stddev, mean + deviations * stddev);
+
+ // Find the median and absolute deviations.
+
+ sort(erates, erates+eratesLen);
+
+ median = erates[ eratesLen / 2 ];
+
+ double massCutoff = 0;
+ uint32 totalBelow = 0;
+ for (uint32 ii=0; ii<eratesLen/2; ii++) {
+ absdev[ii] = median - erates[ii];
+ if ((double)totalBelow / eratesLen < mass) {
+ massCutoff = erates[ii];
+ totalBelow++;
+ }
+ }
+
+ for (uint32 ii=eratesLen/2; ii<eratesLen; ii++) {
+ absdev[ii] = erates[ii] - median;
+ if ((double)totalBelow / eratesLen < mass) {
+ massCutoff = erates[ii];
+ totalBelow++;
+ }
+ }
+
+ sort(absdev, absdev+eratesLen);
+
+ assert(absdev[0] >= 0.0);
+
+ mad = absdev[eratesLen/2];
+
+ delete [] absdev;
+ delete [] erates;
+
+ fprintf(stderr, "with %u points - median %f mad %f - would use overlaps below %f fraction error\n",
+ edgeStats.size(), median, mad, median + deviations * 1.4826 * mad);
+
+ fprintf(stderr, "with %u points - mass of %d is below %f\n", edgeStats.size(), totalBelow, massCutoff);
+
+ if (scoreFile)
+ fclose(scoreFile);
+
+ fprintf(stdout, "%.3f\n", massCutoff /* median + deviations * 1.4826 * mad*/);
+ exit(0);
+}
diff --git a/src/mhap/mhap.mk b/src/correction/errorEstimate.mk
similarity index 53%
copy from src/mhap/mhap.mk
copy to src/correction/errorEstimate.mk
index f00bd5b..87dfe42 100644
--- a/src/mhap/mhap.mk
+++ b/src/correction/errorEstimate.mk
@@ -7,6 +7,13 @@ ifeq "$(strip ${TARGET_DIR})" ""
TARGET_DIR := ../$(OSTYPE)-$(MACHINETYPE)/bin
endif
-TARGET := mhap-2.0.jar
-SOURCES := mhap-2.0.tar
+TARGET := errorEstimate
+SOURCES := errorEstimate.C
+SRC_INCDIRS := .. ../AS_UTL ../stores ../overlapInCore ../utgcns/libNDalign ../overlapErrorAdjustment
+
+TGT_LDFLAGS := -L${TARGET_DIR}
+TGT_LDLIBS := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/src/falcon_sense/falcon_sense.C b/src/falcon_sense/falcon_sense.C
index 882b042..ad477dd 100644
--- a/src/falcon_sense/falcon_sense.C
+++ b/src/falcon_sense/falcon_sense.C
@@ -30,6 +30,7 @@
#include "falcon.H"
+#include <omp.h>
#include <vector>
#include <string>
@@ -43,6 +44,7 @@ main (int argc, char **argv) {
uint32 min_ovl_len = 500;
double min_idy = 0.5;
uint32 K = 8;
+ uint32 max_read_len = AS_MAX_READLEN;
argc = AS_configure(argc, argv);
@@ -64,6 +66,12 @@ main (int argc, char **argv) {
} else if (strcmp(argv[arg], "--min_ovl_len") == 0) {
min_ovl_len = atoi(argv[++arg]);
+ } else if (strcmp(argv[arg], "--max_read_len") == 0) {
+ max_read_len = atoi(argv[++arg]);
+ if (max_read_len <= 0 || max_read_len > AS_MAX_READLEN) {
+ max_read_len = AS_MAX_READLEN;
+ }
+
} else {
fprintf(stderr, "%s: Unknown option '%s'\n", argv[0], argv[arg]);
err++;
@@ -95,7 +103,7 @@ main (int argc, char **argv) {
if (W[0][0] == '+') {
uint32 splitSeqID = 0;
- FConsensus::consensus_data *consensus_data_ptr = FConsensus::generate_consensus( seqs, min_cov, K, min_idy, min_ovl_len );
+ FConsensus::consensus_data *consensus_data_ptr = FConsensus::generate_consensus( seqs, min_cov, K, min_idy, min_ovl_len, max_read_len );
char * split = strtok(consensus_data_ptr->sequence, "acgt");
while (split != NULL) {
if (strlen(split) > min_len) {
diff --git a/src/falcon_sense/libfalcon/falcon.C b/src/falcon_sense/libfalcon/falcon.C
old mode 100755
new mode 100644
index af35f06..05d4826
--- a/src/falcon_sense/libfalcon/falcon.C
+++ b/src/falcon_sense/libfalcon/falcon.C
@@ -78,13 +78,14 @@
#################################################################################$$
*/
+#include "falcon.H"
+
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include <assert.h>
#include <stdint.h>
-#include "falcon.H"
namespace FConsensus {
@@ -327,13 +328,13 @@ void clean_msa_working_space( msa_pos_t * msa_array, uint32 max_t_len) {
}
}
-//#define STATIC_ALLOCATE
-#undef STATIC_ALLOCATE
+#define STATIC_ALLOCATE
+//#undef STATIC_ALLOCATE
consensus_data * get_cns_from_align_tags( align_tags_t ** tag_seqs,
uint32 n_tag_seqs,
uint32 t_len,
- uint32 min_cov ) {
+ uint32 min_cov, uint32 max_len ) {
seq_coor_t i,j;
seq_coor_t t_pos = 0;
@@ -380,10 +381,10 @@ consensus_data * get_cns_from_align_tags( align_tags_t ** tag_seqs,
#ifdef STATIC_ALLOCATE
if ( msa_array == NULL) {
- msa_array = get_msa_working_sapce( 100000 );
+ msa_array = get_msa_working_sapce( max_len );
}
- assert(t_len < 100000);
+ assert(t_len < max_len);
#endif
@@ -482,6 +483,8 @@ consensus_data * get_cns_from_align_tags( align_tags_t ** tag_seqs,
if (aln_col->p_t_pos[ck] == -1) {
score = (double) aln_col->link_count[ck] - (double) coverage[i] * 0.5;
+ } else if (pj > msa_array[pi]->max_delta) {
+ score = (double) aln_col->link_count[ck] - (double) coverage[i] * 0.5;
} else {
score = msa_array[pi]->delta[pj].base[pkk].score +
(double) aln_col->link_count[ck] - (double) coverage[i] * 0.5;
@@ -604,7 +607,7 @@ consensus_data * get_cns_from_align_tags( align_tags_t ** tag_seqs,
consensus_data * generate_consensus( vector<string> input_seq,
uint32 min_cov,
uint32 K,
- double min_idt, uint32 min_len) {
+ double min_idt, uint32 min_len, uint32 max_len) {
uint32 seq_count;
kmer_lookup * lk_ptr;
seq_array sa_ptr;
@@ -668,7 +671,7 @@ consensus_data * generate_consensus( vector<string> input_seq,
free_kmer_match( kmer_match_ptr);
}
- consensus = get_cns_from_align_tags( tags_list, seq_count, input_seq[0].length(), min_cov );
+ consensus = get_cns_from_align_tags( tags_list, seq_count, input_seq[0].length(), min_cov, max_len);
free_seq_addr_array(sda_ptr);
free_seq_array(sa_ptr);
free_kmer_lookup(lk_ptr);
diff --git a/src/falcon_sense/libfalcon/falcon.H b/src/falcon_sense/libfalcon/falcon.H
old mode 100755
new mode 100644
index 60c352b..9aec526
--- a/src/falcon_sense/libfalcon/falcon.H
+++ b/src/falcon_sense/libfalcon/falcon.H
@@ -165,6 +165,6 @@ void mask_k_mer(seq_coor_t, kmer_lookup *, seq_coor_t);
consensus_data * generate_consensus( vector<string> input_seq,
uint32 min_cov,
uint32 K,
- double min_idt, uint32 min_len);
+ double min_idt, uint32 min_len, uint32 max_len);
void free_consensus_data(consensus_data *);
}
diff --git a/src/falcon_sense/libfalcon/kmer_lookup.C b/src/falcon_sense/libfalcon/kmer_lookup.C
old mode 100755
new mode 100644
diff --git a/src/main.mk b/src/main.mk
index 385824f..b272b26 100644
--- a/src/main.mk
+++ b/src/main.mk
@@ -42,7 +42,6 @@ SOURCES := AS_global.C \
AS_UTL/mt19937ar.C \
AS_UTL/readBuffer.C \
AS_UTL/speedCounter.C \
- AS_UTL/stddev.C \
AS_UTL/sweatShop.C \
AS_UTL/timeAndSize.C \
AS_UTL/kMer.C \
@@ -125,6 +124,7 @@ SUBMAKEFILES := stores/gatekeeperCreate.mk \
stores/ovStoreIndexer.mk \
stores/ovStoreDump.mk \
stores/ovStoreStats.mk \
+ stores/tgStoreCompress.mk \
stores/tgStoreDump.mk \
stores/tgStoreLoad.mk \
stores/tgStoreFilter.mk \
@@ -153,6 +153,7 @@ SUBMAKEFILES := stores/gatekeeperCreate.mk \
correction/filterCorrectionOverlaps.mk \
correction/generateCorrectionLayouts.mk \
correction/readConsensus.mk \
+ correction/errorEstimate.mk \
\
falcon_sense/createFalconSenseInputs.mk \
falcon_sense/falcon_sense.mk \
@@ -161,6 +162,7 @@ SUBMAKEFILES := stores/gatekeeperCreate.mk \
overlapBasedTrimming/splitReads.mk \
\
overlapErrorAdjustment/findErrors.mk \
+ overlapErrorAdjustment/findErrors-Dump.mk \
overlapErrorAdjustment/correctOverlaps.mk \
\
bogart/bogart.mk \
diff --git a/src/merTrim/merTrimAdapter.C b/src/merTrim/merTrimAdapter.C
old mode 100755
new mode 100644
diff --git a/src/meryl/libleaff/fastaFile.C b/src/meryl/libleaff/fastaFile.C
index afeb673..94f250c 100644
--- a/src/meryl/libleaff/fastaFile.C
+++ b/src/meryl/libleaff/fastaFile.C
@@ -26,9 +26,6 @@
#include "fastaFile.H"
#include "dnaAlphabets.H"
-#undef DEBUG
-#undef DEBUGINDEX
-
// Says 'kmerFastaFileIdx'
#define FASTA_MAGICNUMBER1 0x7473614672656d6bULL
#define FASTA_MAGICNUMBER2 0x786449656c694661ULL
diff --git a/src/meryl/libleaff/fastaStdin.C b/src/meryl/libleaff/fastaStdin.C
index eead25b..94055b3 100644
--- a/src/meryl/libleaff/fastaStdin.C
+++ b/src/meryl/libleaff/fastaStdin.C
@@ -72,6 +72,8 @@ fastaStdin::openFile(const char *filename) {
if (filename == 0L)
return(0L);
+ // The stdin variants also handle compressed inputs (because we can't seek in these).
+
uint32 fl = strlen(filename);
char cmd[32 + fl];
@@ -103,7 +105,7 @@ fastaStdin::getNumberOfSequences(void) {
uint32
fastaStdin::find(const char *sequencename) {
- fprintf(stderr, "fastaStdin::find()-- ERROR! Used for random access.\n");
+ fprintf(stderr, "fastaStdin::find()-- ERROR! Used for random access on sequence '%s'.\n", sequencename);
assert(0);
return(~uint32ZERO);
}
@@ -135,7 +137,7 @@ fastaStdin::getSequence(uint32 iid,
bool ret = true;
#ifdef DEBUG
- fprintf(stderr, "fastaStdin::getSequence(full)-- "uint32FMT"\n", iid);
+ fprintf(stderr, "fastaStdin::getSequence(full)-- "F_U32"\n", iid);
#endif
if (iid == _nextIID)
@@ -173,12 +175,10 @@ fastaStdin::getSequence(uint32 iid,
bool
fastaStdin::getSequence(uint32 iid,
- uint32 bgn, uint32 end, char *s) {
+ uint32 bgn, uint32 end, char *UNUSED(s)) {
-#ifdef DEBUG
- fprintf(stderr, "fastaStdin::getSequence(part)-- "uint32FMT"\n", iid);
-#endif
- fprintf(stderr, "fastaStdin::getSequence(part)-- ERROR! Used for random access.\n");
+ fprintf(stderr, "fastaStdin::getSequence(part)-- ERROR! Used for random access on sequence %u bgn %u end %u.\n",
+ iid, bgn, end);
assert(0);
return(false);
}
diff --git a/src/meryl/libleaff/fastqFile.C b/src/meryl/libleaff/fastqFile.C
index 9d3a7a6..5ee1a9b 100644
--- a/src/meryl/libleaff/fastqFile.C
+++ b/src/meryl/libleaff/fastqFile.C
@@ -26,10 +26,6 @@
#include "fastqFile.H"
#include "dnaAlphabets.H"
-
-#undef DEBUG
-#undef DEBUGINDEX
-
// Says 'kmerFastaFileIdx'
#define FASTQ_MAGICNUMBER1 0x7473614672656d6bULL
#define FASTQ_MAGICNUMBER2 0x786449656c694661ULL
diff --git a/src/meryl/libleaff/fastqStdin.C b/src/meryl/libleaff/fastqStdin.C
index a0a10c9..d0e5f49 100644
--- a/src/meryl/libleaff/fastqStdin.C
+++ b/src/meryl/libleaff/fastqStdin.C
@@ -73,6 +73,8 @@ fastqStdin::openFile(const char *filename) {
((filename != 0L) && (filename[0] == '-') && (filename[1] == 0)))
return(new fastqStdin(0L));
+ // The stdin variants also handle compressed inputs (because we can't seek in these).
+
if (filename == 0L)
return(0L);
@@ -139,7 +141,7 @@ fastqStdin::getSequence(uint32 iid,
bool ret = true;
#ifdef DEBUG
- fprintf(stderr, "fastqStdin::getSequence(full)-- "uint32FMT"\n", iid);
+ fprintf(stderr, "fastqStdin::getSequence(full)-- "F_U32"\n", iid);
#endif
if (iid == _nextIID)
@@ -179,9 +181,6 @@ bool
fastqStdin::getSequence(uint32 iid,
uint32 bgn, uint32 end, char *s) {
-#ifdef DEBUG
- fprintf(stderr, "fastqStdin::getSequence(part)-- "uint32FMT"\n", iid);
-#endif
fprintf(stderr, "fastqStdin::getSequence(part)-- ERROR! Used for random access on iid "F_U32" from position "F_U32"-"F_U32".\n", iid, bgn, end);
assert(0);
return(false);
diff --git a/src/meryl/libleaff/gkStoreFile.C b/src/meryl/libleaff/gkStoreFile.C
index f9482ad..5458cd9 100644
--- a/src/meryl/libleaff/gkStoreFile.C
+++ b/src/meryl/libleaff/gkStoreFile.C
@@ -46,7 +46,7 @@ gkStoreFile::gkStoreFile(const char *name) {
gkp = gkStore::gkStore_open(_filename);
_numberOfSequences = gkp->gkStore_getNumReads();
- fprintf(stderr, "Opened '%s' with %u reads\n", _filename, _numberOfSequences);
+ //fprintf(stderr, "Opened '%s' with %u reads\n", _filename, _numberOfSequences);
}
gkStoreFile::~gkStoreFile() {
diff --git a/src/meryl/libleaff/seqCache.C b/src/meryl/libleaff/seqCache.C
index fd056bb..184e09b 100644
--- a/src/meryl/libleaff/seqCache.C
+++ b/src/meryl/libleaff/seqCache.C
@@ -25,9 +25,6 @@
#include "seqCache.H"
#include "seqFactory.H"
-//#include "alphabet.h"
-
-#undef DEBUG
seqCache::seqCache(const char *filename, uint32 cachesize, bool verbose) {
diff --git a/src/meryl/libleaff/seqFile.H b/src/meryl/libleaff/seqFile.H
index bb9ab71..5392635 100644
--- a/src/meryl/libleaff/seqFile.H
+++ b/src/meryl/libleaff/seqFile.H
@@ -30,6 +30,8 @@
#ifndef SEQFILE_H
#define SEQFILE_H
+#undef DEBUG
+
#include "AS_global.H"
#include "readBuffer.H"
diff --git a/src/meryl/libmeryl.C b/src/meryl/libmeryl.C
index 1d50760..72ce68c 100644
--- a/src/meryl/libmeryl.C
+++ b/src/meryl/libmeryl.C
@@ -46,15 +46,18 @@
#include "libmeryl.H"
#include "AS_UTL_fileIO.H"
+#include "AS_UTL_alloc.H"
-#define LIBMERYL_HISTOGRAM_MAX 1048576
+
+// Version 3 ??
+// Version 4 removed _histogramHuge, dynamically sizing it on write.
// 0123456789012345
-static char *ImagicV = "merylStreamIv03\n";
+static char *ImagicV = "merylStreamIv04\n";
static char *ImagicX = "merylStreamIvXX\n";
-static char *DmagicV = "merylStreamDv03\n";
+static char *DmagicV = "merylStreamDv04\n";
static char *DmagicX = "merylStreamDvXX\n";
-static char *PmagicV = "merylStreamPv03\n";
+static char *PmagicV = "merylStreamPv04\n";
static char *PmagicX = "merylStreamPvXX\n";
merylStreamReader::merylStreamReader(const char *fn_, uint32 ms_) {
@@ -150,15 +153,19 @@ merylStreamReader::merylStreamReader(const char *fn_, uint32 ms_) {
_numDistinct = _IDX->getBits(64);
_numTotal = _IDX->getBits(64);
- _histogramHuge = 0;
+ _histogramPos = 0;
_histogramLen = 0;
_histogramMaxValue = 0;
_histogram = 0L;
uint32 version = atoi(Imagic + 13);
- if (version > 1) {
- _histogramHuge = _IDX->getBits(64);
+ // Versions earlier than four used a fixed-size histogram, stored at the start
+ // of the index.
+
+ if (version < 4) {
+ _histogramPos = _IDX->tell();
+ _histogramLen = _IDX->getBits(64); // Previous _histogramHuge, now unused
_histogramLen = _IDX->getBits(64);
_histogramMaxValue = _IDX->getBits(64);
_histogram = new uint64 [_histogramLen];
@@ -167,6 +174,26 @@ merylStreamReader::merylStreamReader(const char *fn_, uint32 ms_) {
_histogram[i] = _IDX->getBits(64);
}
+ // Version 4 switched to a dynamically sized histogram, stored at the end
+ // of the index.
+
+ else {
+ _histogramPos = _IDX->getBits(64);
+ _histogramLen = _IDX->getBits(64);
+ _histogramMaxValue = _IDX->getBits(64);
+ _histogram = new uint64 [_histogramLen];
+
+ uint64 position = _IDX->tell();
+
+ _IDX->seek(_histogramPos);
+
+ for (uint32 i=0; i<_histogramLen; i++)
+ _histogram[i] = _IDX->getBits(64);
+
+ _IDX->seek(position);
+ }
+
+
_thisBucket = uint64ZERO;
_thisBucketSize = getIDXnumber();
_numBuckets = uint64ONE << _prefixSize;
@@ -281,13 +308,6 @@ merylStreamWriter::merylStreamWriter(const char *fn_,
delete [] outpath;
- // Save really important stuff
-
- // unpacked --> write 0.42M mers/sec on 8 threads, merge 3.3M mers/sec
- // packed --> write 0.77M mers/sec on 8 threads, merge 3.9M mers/sec
- //
- // This sucks.
- //
_idxIsPacked = 1;
_datIsPacked = 1;
_posIsPacked = 0;
@@ -305,6 +325,14 @@ merylStreamWriter::merylStreamWriter(const char *fn_,
_numDistinct = uint64ZERO;
_numTotal = uint64ZERO;
+ _histogramPos = 0;
+ _histogramLen = 1024;
+ _histogramMaxValue = 0;
+ _histogram = new uint64 [_histogramLen];
+
+ for (uint32 i=0; i<_histogramLen; i++)
+ _histogram[i] = 0;
+
_thisMerIsBits = false;
_thisMerIskMer = false;
@@ -319,6 +347,8 @@ merylStreamWriter::merylStreamWriter(const char *fn_,
_thisMerCount = uint64ZERO;
+ // Initialize the index file.
+
for (uint32 i=0; i<16; i++)
_IDX->putBits(ImagicX[i], 8);
@@ -333,23 +363,17 @@ merylStreamWriter::merylStreamWriter(const char *fn_,
_IDX->putBits(_numDistinct, 64);
_IDX->putBits(_numTotal, 64);
- _histogramHuge = 0;
- _histogramLen = LIBMERYL_HISTOGRAM_MAX;
- _histogramMaxValue = 0;
- _histogram = new uint64 [_histogramLen];
-
- for (uint32 i=0; i<_histogramLen; i++)
- _histogram[i] = 0;
+ _IDX->putBits(0, 64); // Offset to the histogram
+ _IDX->putBits(0, 64); // Length of the histogram data
+ _IDX->putBits(0, 64); // Max value seen in the histogram
- _IDX->putBits(_histogramHuge, 64);
- _IDX->putBits(_histogramLen, 64);
- _IDX->putBits(_histogramMaxValue, 64);
- for (uint32 i=0; i<_histogramLen; i++)
- _IDX->putBits(_histogram[i], 64);
+ // Initialize the data file.
for (uint32 i=0; i<16; i++)
_DAT->putBits(DmagicX[i], 8);
+ // Initialize the positions file.
+
if (_POS)
for (uint32 i=0; i<16; i++)
_POS->putBits(PmagicX[i], 8);
@@ -361,17 +385,25 @@ merylStreamWriter::~merylStreamWriter() {
writeMer();
// Finish writing the buckets.
- //
+
while (_thisBucket < _numBuckets + 2) {
setIDXnumber(_thisBucketSize);
_thisBucketSize = 0;
_thisBucket++;
}
- // Seek back to the start and rewrite the magic numbers
- //
+ // Save the position of the histogram
+
+ _histogramPos = _IDX->tell();
+
+ // And write the histogram
+
+ for (uint32 i=0; i<=_histogramMaxValue; i++)
+ _IDX->putBits(_histogram[i], 64);
+
+ // Seek back to the start and rewrite the magic numbers.
+
_IDX->seek(0);
- _DAT->seek(0);
for (uint32 i=0; i<16; i++)
_IDX->putBits(ImagicV[i], 8);
@@ -387,25 +419,35 @@ merylStreamWriter::~merylStreamWriter() {
_IDX->putBits(_numDistinct, 64);
_IDX->putBits(_numTotal, 64);
- _IDX->putBits(_histogramHuge, 64);
- _IDX->putBits(_histogramLen, 64);
- _IDX->putBits(_histogramMaxValue, 64);
- for (uint32 i=0; i<_histogramLen; i++)
- _IDX->putBits(_histogram[i], 64);
- delete _IDX;
+ _IDX->putBits(_histogramPos, 64);
+ _IDX->putBits(_histogramMaxValue+1, 64); // The length of the data (includes 0)
+ _IDX->putBits(_histogramMaxValue, 64); // The maximum value of the data
+ delete _IDX;
delete [] _histogram;
+ // Seek back to the start of the data and rewrite the magic numbers.
+
+ _DAT->seek(0);
+
for (uint32 i=0; i<16; i++)
_DAT->putBits(DmagicV[i], 8);
+
delete _DAT;
+ // Seek back to the start of the positions and rewrite the magic numbers.
+
if (_POS) {
+ _POS->seek(0);
+
for (uint32 i=0; i<16; i++)
_POS->putBits(PmagicV[i], 8);
- delete _POS;
}
+ delete _POS;
+
+ // All done! Rename our temporary outputs to final outputs.
+
char *outpath = new char [FILENAME_MAX];
char *finpath = new char [FILENAME_MAX];
@@ -437,10 +479,11 @@ merylStreamWriter::writeMer(void) {
_numTotal += _thisMerCount;
_numDistinct++;
- if (_thisMerCount < LIBMERYL_HISTOGRAM_MAX)
- _histogram[_thisMerCount]++;
- else
- _histogramHuge++;
+ if (_thisMerCount >= _histogramLen)
+ resizeArray(_histogram, _histogramMaxValue, _histogramLen, _thisMerCount + 16384);
+
+ _histogram[_thisMerCount]++;
+
if (_histogramMaxValue < _thisMerCount)
_histogramMaxValue = _thisMerCount;
diff --git a/src/meryl/libmeryl.H b/src/meryl/libmeryl.H
index d965bf1..a69693c 100644
--- a/src/meryl/libmeryl.H
+++ b/src/meryl/libmeryl.H
@@ -80,7 +80,6 @@ public:
uint64 histogram(uint32 i) { return((i < _histogramLen) ? _histogram[i] : ~uint64ZERO); };
uint64 histogramLength(void) { return(_histogramLen); };
- uint64 histogramHuge(void) { return(_histogramHuge); };
uint64 histogramMaximumCount(void) { return(_histogramMaxValue); };
bool nextMer(void);
@@ -139,7 +138,7 @@ private:
uint64 _numDistinct;
uint64 _numTotal;
- uint64 _histogramHuge; // number that are bigger than Len
+ uint64 _histogramPos; // position of the histogram data in IDX
uint64 _histogramLen; // number of entries in the histo
uint64 _histogramMaxValue; // highest count ever seen
uint64 *_histogram;
@@ -207,8 +206,8 @@ private:
uint64 _numDistinct;
uint64 _numTotal;
- uint64 _histogramHuge; // number that are bigger than Len
- uint64 _histogramLen; // number of entries in the histo
+ uint64 _histogramPos; // position of the histogram data in IDX
+ uint64 _histogramLen; // number of entries in the histogram
uint64 _histogramMaxValue; // highest count ever seen
uint64 *_histogram;
diff --git a/src/meryl/meryl-build.C b/src/meryl/meryl-build.C
index 1e7d493..e69a13a 100644
--- a/src/meryl/meryl-build.C
+++ b/src/meryl/meryl-build.C
@@ -394,44 +394,13 @@ runSegment(merylArgs *args, uint64 segment) {
delete [] filename;
-
- //
- // We can do all allocations up front:
- // mer data storage (the buckets themselves, plus 64 for slop)
- // bucket pointers (plus an extra bucket at the end and a little for slop)
- // bucket size counting space, last because we toss it out quickly
- //
- if (args->beVerbose)
- fprintf(stderr, " Allocating "F_U64"MB for mer storage ("F_U32" bits wide).\n",
- (args->basesPerBatch * args->merDataWidth + 64) >> 23, args->merDataWidth);
-
- // Mer storage - if mers are bigger than 32, we allocate full
- // words. The last allocation is always a bitPacked array.
-
- for (uint64 mword=0, width=args->merDataWidth; width > 0; ) {
- if (width >= 64) {
- merDataArray[mword] = new uint64 [ args->basesPerBatch + 1 ];
- width -= 64;
- mword++;
- } else {
- merDataArray[mword] = new uint64 [ (args->basesPerBatch * width + 64) >> 6 ];
- width = 0;
- }
- }
-
- if (args->positionsEnabled) {
- if (args->beVerbose)
- fprintf(stderr, " Allocating "F_U64"MB for mer position storage.\n",
- (args->basesPerBatch * 32 + 32) >> 23);
- merPosnArray = new uint32 [ args->basesPerBatch + 1 ];
- }
+ // Allocate space for bucket pointers and (temporary) bucket sizes.
if (args->beVerbose)
fprintf(stderr, " Allocating "F_U64"MB for bucket pointer table ("F_U32" bits wide).\n",
(args->numBuckets * args->bucketPointerWidth + 128) >> 23, args->bucketPointerWidth);
bucketPointers = new uint64 [(args->numBuckets * args->bucketPointerWidth + 128) >> 6];
-
if (args->beVerbose)
fprintf(stderr, " Allocating "F_U64"MB for counting the size of each bucket.\n", args->numBuckets >> 18);
bucketSizes = new uint32 [ args->numBuckets ];
@@ -512,12 +481,42 @@ runSegment(merylArgs *args, uint64 segment) {
// All done with the counting table, get rid of it.
- //
+
if (args->beVerbose)
fprintf(stderr, " Releasing "F_U64"MB from counting the size of each bucket.\n", args->numBuckets >> 18);
delete [] bucketSizes;
+
+ // Allocate space for mer storage and (optional) position data. If mers are bigger than 32, we
+ // allocate full words.
+
+ if (args->beVerbose)
+ fprintf(stderr, " Allocating "F_U64"MB for mer storage ("F_U32" bits wide).\n",
+ (args->basesPerBatch * args->merDataWidth + 64) >> 23, args->merDataWidth);
+
+ for (uint64 mword=0, width=args->merDataWidth; width > 0; ) {
+ if (width >= 64) {
+ merDataArray[mword] = new uint64 [ args->basesPerBatch + 1 ];
+ width -= 64;
+ mword++;
+ } else {
+ merDataArray[mword] = new uint64 [ (args->basesPerBatch * width + 64) >> 6 ];
+ width = 0;
+ }
+ }
+
+ // Position data.
+
+ if (args->positionsEnabled) {
+ if (args->beVerbose)
+ fprintf(stderr, " Allocating "F_U64"MB for mer position storage.\n",
+ (args->basesPerBatch * 32 + 32) >> 23);
+ merPosnArray = new uint32 [ args->basesPerBatch + 1 ];
+ }
+
+
+
C = new speedCounter(" Filling mers into list: %7.2f Mmers -- %5.2f Mmers/second\r", 1000000.0, 0x1fffff, args->beVerbose);
M = new merStream(new kMerBuilder(args->merSize, args->merComp),
new seqStream(args->inputFile),
diff --git a/src/meryl/meryl-dump.C b/src/meryl/meryl-dump.C
index 41912b3..b4156e4 100644
--- a/src/meryl/meryl-dump.C
+++ b/src/meryl/meryl-dump.C
@@ -133,8 +133,8 @@ plotHistogram(merylArgs *args) {
fprintf(stderr, "Found "F_U64" distinct mers.\n", M->numberOfDistinctMers());
fprintf(stderr, "Found "F_U64" unique mers.\n", M->numberOfUniqueMers());
- fprintf(stderr, "Largest mercount is "F_U64"; "F_U64" mers are too big for histogram.\n",
- M->histogramMaximumCount(), M->histogramHuge());
+ fprintf(stderr, "Largest mercount is "F_U64".\n",
+ M->histogramMaximumCount());
for (uint32 i=1; i<M->histogramLength(); i++) {
uint64 hist = M->histogram(i);
diff --git a/src/mhap/mhap.mk b/src/mhap/mhap.mk
index f00bd5b..0655256 100644
--- a/src/mhap/mhap.mk
+++ b/src/mhap/mhap.mk
@@ -7,6 +7,6 @@ ifeq "$(strip ${TARGET_DIR})" ""
TARGET_DIR := ../$(OSTYPE)-$(MACHINETYPE)/bin
endif
-TARGET := mhap-2.0.jar
-SOURCES := mhap-2.0.tar
+TARGET := mhap-2.1.jar
+SOURCES := mhap-2.1.tar
diff --git a/src/overlapErrorAdjustment/correctOverlaps-Correct_Frags.C b/src/overlapErrorAdjustment/correctOverlaps-Correct_Frags.C
index 7d55808..d7166fb 100644
--- a/src/overlapErrorAdjustment/correctOverlaps-Correct_Frags.C
+++ b/src/overlapErrorAdjustment/correctOverlaps-Correct_Frags.C
@@ -19,6 +19,10 @@
* are Copyright 2015 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
+ * Brian P. Walenz beginning on 2016-MAY-02
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -87,7 +91,7 @@ correctRead(uint32 curID,
if ((i != C[Cpos].pos) &&
(i != C[Cpos].pos + 1))
- fprintf(stderr, "i=%d Cpos=%d C[Cpos].pos=%d\n", i, Cpos, C[Cpos].pos);
+ fprintf(stderr, "i="F_U32" Cpos="F_U64" C[Cpos].pos="F_U32"\n", i, Cpos, C[Cpos].pos);
assert((i == C[Cpos].pos) ||
(i == C[Cpos].pos + 1));
@@ -239,6 +243,11 @@ Correct_Frags(coParameters *G,
fprintf(stderr, "Correcting "F_U64" bases with "F_U64" indel adjustments.\n", G->basesLen, G->adjustsLen);
+ fprintf(stderr, "--Allocate "F_U64" + "F_U64" + "F_U64" MB for bases, adjusts and reads.\n",
+ (sizeof(char) * G->basesLen) >> 20,
+ (sizeof(Adjust_t) * G->adjustsLen) >> 20,
+ (sizeof(Frag_Info_t) * (G->endID - G->bgnID + 1)) >> 20);
+
G->bases = new char [G->basesLen];
G->adjusts = new Adjust_t [G->adjustsLen];
G->reads = new Frag_Info_t [G->endID - G->bgnID + 1];
@@ -276,7 +285,7 @@ Correct_Frags(coParameters *G,
// We should be at the IDENT message.
if (C[Cpos].type != IDENT) {
- fprintf(stderr, "ERROR: didn't find IDENT at Cpos=%u for read %u\n", Cpos, curID);
+ fprintf(stderr, "ERROR: didn't find IDENT at Cpos="F_U64" for read "F_U32"\n", Cpos, curID);
fprintf(stderr, " C[Cpos] = keep_left=%u keep_right=%u type=%u pos=%u readID=%u\n",
C[Cpos].keep_left,
C[Cpos].keep_right,
diff --git a/src/overlapErrorAdjustment/correctOverlaps-Prefix_Edit_Distance.C b/src/overlapErrorAdjustment/correctOverlaps-Prefix_Edit_Distance.C
index 9b321e2..2db1a19 100644
--- a/src/overlapErrorAdjustment/correctOverlaps-Prefix_Edit_Distance.C
+++ b/src/overlapErrorAdjustment/correctOverlaps-Prefix_Edit_Distance.C
@@ -15,6 +15,10 @@
*
* Modifications by:
*
+ * Brian P. Walenz beginning on 2016-MAY-02
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -138,7 +142,7 @@ Allocate_More_Edit_Space(pedWorkArea_t *WA) {
fprintf(stderr, "Allocate_More_Edit_Space()-- ERROR: couldn't allocate enough space for even one more entry! e=%d\n", e);
assert(e != b);
- //fprintf(stderr, "WorkArea %d allocates space %d of size %d for array %d through %d\n", WA->thread_id, a, Size, b, e-1);
+ fprintf(stderr, "--Allocate "F_U64" MB for edit array work space %u (positions %u-%u)\n", Size >> 20, a, b, e-1);
}
diff --git a/src/overlapErrorAdjustment/correctOverlaps-Read_Olaps.C b/src/overlapErrorAdjustment/correctOverlaps-Read_Olaps.C
index 0549866..e845f7f 100644
--- a/src/overlapErrorAdjustment/correctOverlaps-Read_Olaps.C
+++ b/src/overlapErrorAdjustment/correctOverlaps-Read_Olaps.C
@@ -19,6 +19,10 @@
* are Copyright 2015 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
+ * Brian P. Walenz beginning on 2016-MAY-02
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -43,6 +47,9 @@ Read_Olaps(coParameters *G, gkStore *gkpStore) {
fprintf(stderr, "Read_Olaps()-- Loading "F_U64" overlaps from '%s' for reads "F_U32" to "F_U32"\n",
numolaps, G->ovlStorePath, G->bgnID, G->endID);
+ fprintf(stderr, "--Allocate "F_U64" MB for overlaps.\n",
+ (sizeof(Olap_Info_t) * numolaps) >> 20);
+
G->olaps = new Olap_Info_t [numolaps];
G->olapsLen = 0;
diff --git a/src/overlapErrorAdjustment/correctOverlaps-Redo_Olaps.C b/src/overlapErrorAdjustment/correctOverlaps-Redo_Olaps.C
index c9ca109..2cede22 100644
--- a/src/overlapErrorAdjustment/correctOverlaps-Redo_Olaps.C
+++ b/src/overlapErrorAdjustment/correctOverlaps-Redo_Olaps.C
@@ -290,16 +290,19 @@ Redo_Olaps(coParameters *G, gkStore *gkpStore) {
// Allocate some temporary work space for the forward and reverse corrected B reads.
- char *fseq = new char [AS_MAX_READLEN + AS_MAX_READLEN];
+ fprintf(stderr, "--Allocate "F_U64" MB for fseq and rseq.\n", (2 * sizeof(char) * 2 * (AS_MAX_READLEN + 1)) >> 20);
+ char *fseq = new char [AS_MAX_READLEN + 1 + AS_MAX_READLEN + 1];
uint32 fseqLen = 0;
- char *rseq = new char [AS_MAX_READLEN + AS_MAX_READLEN];
+ char *rseq = new char [AS_MAX_READLEN + 1 + AS_MAX_READLEN + 1];
uint32 rseqLen = 0;
- Adjust_t *fadj = new Adjust_t [AS_MAX_READLEN];
- Adjust_t *radj = new Adjust_t [AS_MAX_READLEN];
+ fprintf(stderr, "--Allocate "F_U64" MB for fadj and radj.\n", (2 * sizeof(Adjust_t) * (AS_MAX_READLEN + 1)) >> 20);
+ Adjust_t *fadj = new Adjust_t [AS_MAX_READLEN + 1];
+ Adjust_t *radj = new Adjust_t [AS_MAX_READLEN + 1];
uint32 fadjLen = 0; // radj is the same length
+ fprintf(stderr, "--Allocate "F_U64" MB for pedWorkArea_t.\n", sizeof(pedWorkArea_t) >> 20);
gkReadData *readData = new gkReadData;
pedWorkArea_t *ped = new pedWorkArea_t;
@@ -323,6 +326,8 @@ Redo_Olaps(coParameters *G, gkStore *gkpStore) {
// Process overlaps. Loop over the B reads, and recompute each overlap.
for (uint32 curID=loBid; curID<=hiBid; curID++) {
+ if (((curID - loBid) % 1024) == 0)
+ fprintf(stderr, "Recomputing overlaps - %9u - %9u - %9u\r", loBid, curID, hiBid);
if (curID < G->olaps[thisOvl].b_iid)
continue;
@@ -519,6 +524,8 @@ Redo_Olaps(coParameters *G, gkStore *gkpStore) {
}
}
+ fprintf(stderr, "\n");
+
delete ped;
delete readData;
delete [] radj;
@@ -527,6 +534,12 @@ Redo_Olaps(coParameters *G, gkStore *gkpStore) {
delete [] fseq;
delete Cfile;
+ fprintf(stderr, "-- Release bases, adjusts and reads.\n");
+
+ delete [] G->bases; G->bases = NULL;
+ delete [] G->adjusts; G->adjusts = NULL;
+ delete [] G->reads; G->reads = NULL;
+
fprintf(stderr, "Olaps Fwd "F_U64"\n", olapsFwd);
fprintf(stderr, "Olaps Rev "F_U64"\n", olapsRev);
diff --git a/src/overlapErrorAdjustment/correctOverlaps.C b/src/overlapErrorAdjustment/correctOverlaps.C
index 3bc80d9..2037b00 100644
--- a/src/overlapErrorAdjustment/correctOverlaps.C
+++ b/src/overlapErrorAdjustment/correctOverlaps.C
@@ -23,6 +23,10 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-30
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -153,19 +157,38 @@ main(int argc, char **argv) {
// Load overlaps we're going to correct
+ fprintf(stderr, "Loading overlaps.\n");
+
Read_Olaps(G, gkpStore);
// Now sort them on the B iid.
+ fprintf(stderr, "Sorting overlaps.\n");
+
+#ifdef _GLIBCXX_PARALLEL
+ __gnu_sequential::sort(G->olaps, G->olaps + G->olapsLen, Olap_Info_t_by_bID());
+#else
sort(G->olaps, G->olaps + G->olapsLen, Olap_Info_t_by_bID());
+#endif
// Recompute overlaps
+ fprintf(stderr, "Recomputing overlaps.\n");
+
Redo_Olaps(G, gkpStore);
+ gkpStore->gkStore_close();
+ gkpStore = NULL;
+
// Sort the overlaps back into the original order
+ fprintf(stderr, "Sorting overlaps.\n");
+
+#ifdef _GLIBCXX_PARALLEL
+ __gnu_sequential::sort(G->olaps, G->olaps + G->olapsLen, Olap_Info_t_by_Order());
+#else
sort(G->olaps, G->olaps + G->olapsLen, Olap_Info_t_by_Order());
+#endif
// Dump the new erates
@@ -181,6 +204,9 @@ main(int argc, char **argv) {
AS_UTL_safeWrite(fp, &G->endID, "hiid", sizeof(int32), 1);
AS_UTL_safeWrite(fp, &G->olapsLen, "num", sizeof(uint64), 1);
+ fprintf(stderr, "--Allocate "F_U64" MB for output error rates.\n",
+ (sizeof(uint16) * G->olapsLen) >> 20);
+
uint16 *evalue = new uint16 [G->olapsLen];
for (int32 i=0; i<G->olapsLen; i++)
@@ -199,6 +225,10 @@ main(int argc, char **argv) {
// Failed_Alignments_Ct, Total_Alignments_Ct,
// Total_Alignments_Ct == 0 ? 0.0 : (100.0 * Failed_Alignments_Ct) / Total_Alignments_Ct);
+ delete G;
+
+ fprintf(stderr, "DONE.\n");
+
exit(0);
}
diff --git a/src/overlapErrorAdjustment/correctOverlaps.H b/src/overlapErrorAdjustment/correctOverlaps.H
index cc2c30f..ab5e950 100644
--- a/src/overlapErrorAdjustment/correctOverlaps.H
+++ b/src/overlapErrorAdjustment/correctOverlaps.H
@@ -15,6 +15,10 @@
*
* Modifications by:
*
+ * Brian P. Walenz beginning on 2016-MAY-02
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -204,6 +208,8 @@ public:
void initialize(coParameters *G_, double errorRate) {
G = G_;
+ fprintf(stderr, "-- Allocate "F_U64" MB for Edit_Array pointers.\n", (sizeof(int32 *) * Edit_Array_Max) >> 20);
+
Edit_Array_Max = 1 + (uint32)(errorRate * AS_MAX_READLEN);
Edit_Array_Lazy = new int32 * [Edit_Array_Max];
diff --git a/src/overlapErrorAdjustment/findErrors-Analyze_Alignment.C b/src/overlapErrorAdjustment/findErrors-Analyze_Alignment.C
index d778914..4a7bd62 100644
--- a/src/overlapErrorAdjustment/findErrors-Analyze_Alignment.C
+++ b/src/overlapErrorAdjustment/findErrors-Analyze_Alignment.C
@@ -19,6 +19,14 @@
* are Copyright 2015 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
+ * Sergey Koren beginning on 2016-MAR-22
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * Brian P. Walenz beginning on 2016-MAY-18
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -89,7 +97,6 @@ Matching_Vote(char ch) {
// a_len and b_len are the lengths of the prefixes of a_part and
// b_part , resp., that align.
-
void
Analyze_Alignment(Thread_Work_Area_t *wa,
char *a_part, int32 a_len, int32 a_offset,
@@ -102,7 +109,7 @@ Analyze_Alignment(Thread_Work_Area_t *wa,
int32 ct = 0;
// Necessary??
- memset(wa->globalvote, 0, sizeof(Vote_t) * AS_MAX_READLEN);
+ //memset(wa->globalvote, 0, sizeof(Vote_t) * AS_MAX_READLEN);
wa->globalvote[ct].frag_sub = -1;
wa->globalvote[ct].align_sub = -1;
@@ -215,7 +222,19 @@ Analyze_Alignment(Thread_Work_Area_t *wa,
// For each identified change, add votes for some region around the change.
-
+ //
+ // This is adding extra votes if the distance between two errors is larger than a kmer.
+ // Not sure why there are no 'matching bases' in this region.
+ //
+ // X == changes, mismatch or indel
+ //
+ // ------- <- confirmed count added
+ // ----- <- no_insert count added
+ // matching-bases} X 1 2 3 1 2 3 4 3 2 1 X {matching-bases
+ // ----- -----
+ // match match
+ // votes votes
+ //
for (int32 i=1; i<=ct; i++) {
int32 prev_match = wa->globalvote[i].align_sub - wa->globalvote[i - 1].align_sub - 1;
@@ -259,6 +278,11 @@ Analyze_Alignment(Thread_Work_Area_t *wa,
(wa->globalvote[i ].vote_val <= T_SUBST))) {
int32 next_match = wa->globalvote[i + 1].align_sub - wa->globalvote[i].align_sub - 1;
+ // if our vote is outside of the bounds (meaning we have gaps at the start or end of the alignment), skip the vote
+ if (a_offset + wa->globalvote[i].frag_sub < 0 || a_offset + wa->globalvote[i].frag_sub >= a_len) {
+ continue;
+ }
+
if (prev_match + next_match >= wa->G->Vote_Qualify_Len)
Cast_Vote(wa->G,
wa->globalvote[i].vote_val,
diff --git a/src/overlapErrorAdjustment/findErrors-Dump.C b/src/overlapErrorAdjustment/findErrors-Dump.C
new file mode 100644
index 0000000..a812c3b
--- /dev/null
+++ b/src/overlapErrorAdjustment/findErrors-Dump.C
@@ -0,0 +1,97 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-MAY-20
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "findErrors.H"
+
+
+
+
+
+int
+main(int argc, char **argv) {
+ char *redName = NULL;
+
+ argc = AS_configure(argc, argv);
+
+ int arg = 1;
+ int err = 0;
+ while (arg < argc) {
+ if (strcmp(argv[arg], "-r") == 0) {
+ redName = argv[++arg];
+
+ } else {
+ fprintf(stderr, "Unknown option '%s'\n", argv[arg]);
+ err++;
+ }
+
+ arg++;
+ }
+
+ if (redName == NULL)
+ err++;
+
+ if (err > 0) {
+ fprintf(stderr, "usage: %s -r file.red\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Dumps, as ASCII, the results from findErrors *.red files.\n");
+
+ if (redName == NULL)
+ fprintf(stderr, "ERROR: no *.red file (-r) supplied.\n");
+
+ exit(1);
+ }
+
+ char *typeName[13] = { "IDENT",
+ "DELETE",
+ "A_SUBST",
+ "C_SUBST",
+ "G_SUBST",
+ "T_SUBST",
+ "A_INSERT",
+ "C_INSERT",
+ "G_INSERT",
+ "T_INSERT",
+ "NO_VOTE",
+ "EXTENSION",
+ NULL };
+
+ memoryMappedFile *Cfile = new memoryMappedFile(redName);
+ Correction_Output_t *C = (Correction_Output_t *)Cfile->get();
+ uint64 Cpos = 0;
+ uint64 Clen = Cfile->length() / sizeof(Correction_Output_t);
+
+ for (uint32 ii=0; ii<Clen; ii++) {
+ fprintf(stdout, "%8u %12s %8u %c %c\n",
+ C[ii].readID,
+ typeName[C[ii].type],
+ C[ii].pos,
+ C[ii].keep_left ? 't' : 'f',
+ C[ii].keep_right ? 't' : 'f');
+ }
+
+ exit(0);
+}
+
+
diff --git a/src/mhap/mhap.mk b/src/overlapErrorAdjustment/findErrors-Dump.mk
similarity index 57%
copy from src/mhap/mhap.mk
copy to src/overlapErrorAdjustment/findErrors-Dump.mk
index f00bd5b..682cc46 100644
--- a/src/mhap/mhap.mk
+++ b/src/overlapErrorAdjustment/findErrors-Dump.mk
@@ -7,6 +7,13 @@ ifeq "$(strip ${TARGET_DIR})" ""
TARGET_DIR := ../$(OSTYPE)-$(MACHINETYPE)/bin
endif
-TARGET := mhap-2.0.jar
-SOURCES := mhap-2.0.tar
+TARGET := findErrors-Dump
+SOURCES := findErrors-Dump.C
+SRC_INCDIRS := .. ../AS_UTL ../stores ../overlapInCore/liboverlap
+
+TGT_LDFLAGS := -L${TARGET_DIR}
+TGT_LDLIBS := -lcanu
+TGT_PREREQS := libcanu.a
+
+SUBMAKEFILES :=
diff --git a/src/overlapErrorAdjustment/findErrors.C b/src/overlapErrorAdjustment/findErrors.C
index 8bf3713..f39a5f5 100644
--- a/src/overlapErrorAdjustment/findErrors.C
+++ b/src/overlapErrorAdjustment/findErrors.C
@@ -413,6 +413,10 @@ main(int argc, char **argv) {
arg++;
}
+ if (G->gkpStorePath == NULL)
+ err++;
+ if (G->ovlStorePath == NULL)
+ err++;
if (G->numThreads == 0)
err++;
@@ -442,6 +446,10 @@ main(int argc, char **argv) {
fprintf(stderr, "-V specify number of exact match bases around an error to vote to change\n");
fprintf(stderr, "-x length of end of exact match to exclude in preventing change\n");
+ if (G->gkpStorePath == NULL)
+ fprintf(stderr, "ERROR: no gatekeeper store (-G) supplied.\n");
+ if (G->ovlStorePath == NULL)
+ fprintf(stderr, "ERROR: no overlap store (-O) supplied.\n");
if (G->numThreads == 0)
fprintf(stderr, "ERROR: number of compute threads (-t) must be larger than zero.\n");
diff --git a/src/overlapInCore/overlapInCore-Process_String_Overlaps.C b/src/overlapInCore/overlapInCore-Process_String_Overlaps.C
index 648fb1b..c6f2a18 100644
--- a/src/overlapInCore/overlapInCore-Process_String_Overlaps.C
+++ b/src/overlapInCore/overlapInCore-Process_String_Overlaps.C
@@ -60,6 +60,10 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-23
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -445,7 +449,7 @@ Process_Matches (int * Start,
}
}
- distinct_olap = new Olap_Info_t [MAX_DISTINCT_OLAPS];
+ distinct_olap = WA->distinct_olap;
distinct_olap_ct = 0;
while ((* Start) != 0) {
@@ -547,7 +551,7 @@ Process_Matches (int * Start,
int32 j = p->t_lo;
int32 q_len = 0;
- char *q_diff = new char [AS_MAX_READLEN];
+ char *q_diff = WA->q_diff;
for (int32 k=0; k<p->delta_ct; k++) {
int32 len = abs(p->delta[k]);
@@ -600,7 +604,6 @@ Process_Matches (int * Start,
Bad_Long_Window_Ct++;
}
- delete [] q_diff;
}
if (! rejected) {
@@ -632,8 +635,6 @@ Process_Matches (int * Start,
WA->Multi_Overlap_Ct++;
}
- delete [] distinct_olap;
-
return;
}
diff --git a/src/overlapInCore/overlapInCore.C b/src/overlapInCore/overlapInCore.C
index 8f352f5..74c2336 100644
--- a/src/overlapInCore/overlapInCore.C
+++ b/src/overlapInCore/overlapInCore.C
@@ -171,6 +171,9 @@ Initialize_Work_Area(Work_Area_t *WA, int id, gkStore *gkpStore) {
WA->editDist = new prefixEditDistance(G.Doing_Partial_Overlaps, G.maxErate);
+ WA->q_diff = new char [AS_MAX_READLEN];
+ WA->distinct_olap = new Olap_Info_t [MAX_DISTINCT_OLAPS];
+
fprintf(stderr, "Initialize_Work_Area()-- done\n");
}
@@ -181,6 +184,9 @@ Delete_Work_Area(Work_Area_t *WA) {
delete [] WA->String_Olap_Space;
delete [] WA->Match_Node_Space;
delete [] WA->overlaps;
+
+ delete [] WA->distinct_olap;
+ delete [] WA->q_diff;
}
@@ -419,7 +425,7 @@ main(int argc, char **argv) {
} else if (strcmp(argv[arg], "--minlength") == 0) {
G.Min_Olap_Len = strtol (argv[++arg], NULL, 10);
} else if (strcmp(argv[arg], "--maxerate") == 0) {
- G.maxErate = ceil(strtof(argv[++arg], NULL) * 100) / 100;
+ G.maxErate = strtof(argv[++arg], NULL);
} else if (strcmp(argv[arg], "-w") == 0) {
G.Use_Window_Filter = TRUE;
diff --git a/src/overlapInCore/overlapInCore.H b/src/overlapInCore/overlapInCore.H
index cc1e2a8..9fdd2e9 100644
--- a/src/overlapInCore/overlapInCore.H
+++ b/src/overlapInCore/overlapInCore.H
@@ -254,6 +254,18 @@ typedef struct String_Olap_Node {
unsigned consistent : 1;
} String_Olap_t;
+
+typedef struct Olap_Info {
+ int s_lo, s_hi;
+ int t_lo, t_hi;
+ double quality;
+ int delta [AS_MAX_READLEN+1]; // needs only MAX_ERRORS
+ int delta_ct;
+ int s_left_boundary, s_right_boundary;
+ int t_left_boundary, t_right_boundary;
+ int min_diag, max_diag;
+} Olap_Info_t;
+
// The following structure holds what used to be global information, but
// is now encapsulated so that multiple copies can be made for multiple
// parallel threads.
@@ -304,6 +316,10 @@ typedef struct Work_Area {
uint64 Multi_Overlap_Ct;
prefixEditDistance *editDist;
+
+
+ char * q_diff;
+ Olap_Info_t *distinct_olap;
} Work_Area_t;
@@ -313,20 +329,6 @@ typedef uint32 Check_Vector_t;
// Bit vector to see if hash bucket could possibly contain a match
-typedef struct Olap_Info {
- int s_lo, s_hi;
- int t_lo, t_hi;
- double quality;
- int delta [AS_MAX_READLEN+1]; // needs only MAX_ERRORS
- int delta_ct;
- int s_left_boundary, s_right_boundary;
- int t_left_boundary, t_right_boundary;
- int min_diag, max_diag;
-} Olap_Info_t;
-
-
-
-
typedef uint64 String_Ref_t;
#define BIT_EMPT 62
diff --git a/src/overlapInCore/overlapPair.C b/src/overlapInCore/overlapPair.C
index fad1186..0fa091e 100644
--- a/src/overlapInCore/overlapPair.C
+++ b/src/overlapInCore/overlapPair.C
@@ -100,6 +100,7 @@ public:
//analyze = NULL;
overlapsLen = 0;
overlaps = NULL;
+ readSeq = NULL;
};
~workSpace() {
#ifdef BUSTED
@@ -114,6 +115,7 @@ public:
delete align;
align=NULL;
#endif
+ delete[] readSeq;
};
public:
@@ -121,6 +123,7 @@ public:
double maxErate;
bool partialOverlaps;
bool invertOverlaps;
+ char* readSeq;
gkStore *gkpStore;
@@ -175,8 +178,6 @@ void *
recomputeOverlaps(void *ptr) {
workSpace *WA = (workSpace *)ptr;
- char *bRev = new char [AS_MAX_READLEN];
-
uint32 bgnID = 0;
uint32 endID = 0;
@@ -190,12 +191,6 @@ recomputeOverlaps(void *ptr) {
if (WA->NDaln == NULL)
WA->NDaln = new NDalign(WA->partialOverlaps ? pedLocal : pedOverlap, WA->maxErate, 15);
#endif
-#ifndef FALCON
- WA->align = new StripedSmithWaterman::Aligner(1, 3, 3, 1);
- WA->filter = new StripedSmithWaterman::Filter();
-#else
- WA->align = new NDalignment::NDalignResult();
-#endif
//if (WA->analyze == NULL)
// WA->analyze = new analyzeAlignment();
@@ -273,7 +268,7 @@ if (nTested % 1000 == 0) {
WA->align->display("MHAP align():", true);
// fprintf(stderr, "Reads %d to %d, expected overlap %d - %d to %d - %d and found error rate %f from %d - %d and %d - %d\n", aID, bID, ovl->a_bgn(), ovl->a_end(), ovl->b_bgn(), ovl->b_end(), WA->NDaln->erate(), WA->NDaln->abgn(), WA->NDaln->aend(), WA->NDaln->bbgn(), WA->NDaln->bend());
#else
- char *bRead = new char[rcache->getLength(bID)+1];
+ char *bRead = WA->readSeq;
int32 astart = std::max((int32)0, (int32)ovl->a_bgn() - MHAP_SLOP);
int32 aend = std::min((int32)rcache->getLength(aID), (int32)ovl->a_end() + MHAP_SLOP);
int32 bstart = std::max((int32)0, (int32)ovl->b_bgn() - MHAP_SLOP);
@@ -305,7 +300,6 @@ if (nTested % 1000 == 0) {
uint32 alignmentLength = alignment.ref_end-alignment.ref_begin+1;
#endif
- delete[] bRead;
//fprintf(stderr, "Reads %d (%d) to %d (%d), expected overlap %d - %d to %d - %d and found error rate %f from %d - %d and %d - %d\n", aID, rcache->getLength(aID), bID, rcache->getLength(bID), ovl->a_bgn(), ovl->a_end(), ovl->b_bgn(), ovl->b_end(), (double)alignResult._dist/(alignmentLength),alignResult._tgt_bgn+astart, alignResult._tgt_end+astart-1, alignResult._qry_bgn+bstart, alignResult._qry_end+bstart-1);
@@ -377,10 +371,6 @@ if (nTested % 1000 == 0) {
#endif
}
- // All done.
-
- delete [] bRev;
-
// Report. The last batch has no work to do.
if (nFailed + nPassed > 0)
@@ -537,6 +527,15 @@ main(int argc, char **argv) {
WA[tt].align = NULL;
#endif
WA[tt].overlaps = NULL;
+
+ // preallocate some work thread memory for common tasks to avoid allocation
+#ifndef FALCON
+ WA[tt].align = new StripedSmithWaterman::Aligner(1, 3, 3, 1);
+ WA[tt].filter = new StripedSmithWaterman::Filter();
+#else
+ WA[tt].align = new NDalignment::NDalignResult();
+#endif
+ WA[tt].readSeq = new char[AS_MAX_READLEN+1];
}
diff --git a/src/pipelines/bogart-sweep.pl b/src/pipelines/bogart-sweep.pl
new file mode 100644
index 0000000..0c3bddd
--- /dev/null
+++ b/src/pipelines/bogart-sweep.pl
@@ -0,0 +1,185 @@
+#!/usr/bin/env perl
+
+###############################################################################
+ #
+ # This file is part of canu, a software program that assembles whole-genome
+ # sequencing reads into contigs.
+ #
+ # This software is based on:
+ # 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ # the 'kmer package' (http://kmer.sourceforge.net)
+ # both originally distributed by Applera Corporation under the GNU General
+ # Public License, version 2.
+ #
+ # Canu branched from Celera Assembler at its revision 4587.
+ # Canu branched from the kmer project at its revision 1994.
+ #
+ # Modifications by:
+ #
+ # Brian P. Walenz beginning on 2016-MAR-10
+ # are a 'United States Government Work', and
+ # are released in the public domain
+ #
+ # File 'README.licenses' in the root directory of this distribution contains
+ # full conditions and disclaimers for each license.
+ ##
+
+use strict;
+
+my $wrk = "/work/canuassemblies/sent";
+my $asm = "test";
+
+system("mkdir -p $wrk") if (! -d $wrk);
+
+my $gs = "5000000";
+my $b = 6000;
+
+my $m = 4;
+my $t = 1;
+
+my $d = "all";
+
+my (@EG, @EB, @EM, @ER, @OL, @RS, @NS, @CS);
+
+ at EG = ( "0.0100", "0.0200", "0.0300", "0.0400", "0.0500", "0.0600", "0.0700", "0.0800", "0.0900", "0.1000" );
+ at EB = ( "-0.0050", "+0.0050" );
+ at EM = ( "-0.0050", "+0.0050" );
+ at ER = ( "-0.0050", "+0.0050" );
+ at OL = ( "100", "500", "1000", "2500", "5000", "7500", "10000" );
+ at RS = ( "-no", "-RS" );
+ at NS = ( "-no", "-NS" );
+ at CS = ( "-no", "-CS" );
+
+ at EG = ( "0.0400", "0.0500", "0.0600" );
+ at EB = ( "+0.0125" );
+ at EM = ( "-0.0125" );
+ at ER = ( "+0.0000" );
+ at OL = ( "50", "500", "1000", "2000", "3000", "4000", "5000", "6000", "7000", "8000", "9000", "10000", "11000" );
+ at RS = ( "-RS" );
+ at NS = ( "-NS" );
+ at CS = ( "-CS" );
+
+ at EG = ( "0.0500" );
+ at EB = ( "+0.0125" );
+ at EM = ( "-0.0125" );
+ at ER = ( "+0.0000" );
+ at OL = ( "4100", "4200", "4300", "4400", "4500", "4600", "4700", "4800", "4900", "5100", "5200", "5300", "5400", "5500", "5600", "5700", "5800", "5900" );
+ at RS = ( "-RS" );
+ at NS = ( "-NS" );
+ at CS = ( "-CS" );
+
+undef @OL;
+for (my $ii=1100; $ii<1200; $ii += 1) {
+ push @OL, $ii;
+}
+
+ at OL = ( "1135", "1136", "1137", "1138" );
+
+
+#-unassembled 2 1000 0.75 0.75 2 -repeatdetect 6 11 15 -threads 1 -D most
+
+foreach my $eg (@EG) {
+foreach my $eb (@EB) {
+foreach my $em (@EM) {
+foreach my $er (@ER) {
+foreach my $ol (@OL) {
+foreach my $rs (@RS) {
+foreach my $ns (@NS) {
+foreach my $cs (@CS) {
+ my ($egl, $ebl, $eml, $erl, $oll) = ($eg, $eb, $em, $er, $ol);
+
+ $ebl = $egl + $1 if ($eb =~ m/^\+(\d+.\d+)/);
+ $ebl = $egl - $1 if ($eb =~ m/^-(\d+.\d+)/);
+
+ $eml = $egl + $1 if ($em =~ m/^\+(\d+.\d+)/);
+ $eml = $egl - $1 if ($em =~ m/^-(\d+.\d+)/);
+
+ $erl = $egl + $1 if ($er =~ m/^\+(\d+.\d+)/);
+ $erl = $egl - $1 if ($er =~ m/^-(\d+.\d+)/);
+
+ $egl = sprintf("%6.4f", $egl);
+ $ebl = sprintf("%6.4f", $ebl);
+ $eml = sprintf("%6.4f", $eml);
+ $erl = sprintf("%6.4f", $erl);
+
+ $oll = sprintf("%05d", $ol);
+
+ my $path = "test-eg$egl-eb$ebl-em$eml-er$erl-ol$oll$rs$ns$cs";
+
+ print "$path\n";
+
+ system("mkdir -p $path") if (! -d $path);
+
+ open(F, "> $wrk/$path/bogart.sh") or die "can't open '$wrk/$path/bogart.sh' for writing: $!\n";
+ print F "#!/bin/sh\n";
+ print F "\n";
+ print F "cd $wrk/$path\n";
+ print F "\n";
+ print F "if [ ! -e test.tigStore ] ; then\n";
+ print F " /work/canu/FreeBSD-amd64/bin/bogart \\\n";
+ print F " -G $wrk/$asm.gkpStore \\\n";
+ print F " -O $wrk/$asm.ovlStore \\\n";
+ print F " -T test.tigStore -o test\\\n";
+ print F " -B $b -M $m -threads $t \\\n";
+ print F " -gs $gs \\\n";
+ print F " -eg $egl -eb $ebl -em $eml -er $erl -el $ol \\\n";
+ print F " -RS \\\n" if ($rs eq "-RS");
+ print F " -NS \\\n" if ($ns eq "-NS");
+ print F " -CS \\\n" if ($cs eq "-CS");
+ print F " -unassembled 2 1000 0.75 0.75 2 \\\n";
+ print F " -repeatdetect 6 32 5 \\\n";
+ print F " -D $d \\\n" if (length($d) > 0);
+ print F " > bogart.err 2>& 1\n";
+ print F "fi\n";
+ print F "\n";
+ close(F);
+
+ open(F, "> $wrk/$path/utgcns.sh") or die "can't open '$wrk/$path/utgcns.sh' for writing: $!\n";
+ print F "#!/bin/sh\n";
+ print F "\n";
+ print F "cd $wrk/$path\n";
+ print F "\n";
+ print F "if [ ! -e test.fasta ] ; then\n";
+ print F " /work/canu/FreeBSD-amd64/bin/utgcns \\\n";
+ print F " -G $wrk/$asm.gkpStore \\\n";
+ print F " -T test.tigStore 1 . \\\n";
+ print F " -O test.cns -L test.lay -A test.fasta\n";
+ print F "fi\n";
+ print F "\n";
+ print F "rm -f test.tigStore/seqDB.v002.dat\n";
+ print F "rm -f test.tigStore/seqDB.v002.tig\n";
+ print F "\n";
+ print F "/work/canu/FreeBSD-amd64/bin/tgStoreLoad \\\n";
+ print F " -G $wrk/$asm.gkpStore \\\n";
+ print F " -T test.tigStore 2 \\\n";
+ print F " test.cns\n";
+ print F "\n";
+ print F "/work/canu/FreeBSD-amd64/bin/tgStoreDump \\\n";
+ print F " -G $wrk/$asm.gkpStore \\\n";
+ print F " -T test.tigStore 2 \\\n";
+ print F " -consensus -fasta -contigs -bubbles \\\n";
+ print F "> contigs.fasta\n";
+ print F "\n";
+ print F "rm -f *.delta\n";
+ print F "rm -f *.coords\n";
+ print F "rm -f *.png\n";
+ print F "\n";
+ print F "sh /work/scripts/dotplot.sh usmarc /data/references/salmonella_enterica_usmarc_3124.1-cp006631.1.fasta contigs.fasta\n";
+ print F "sh /work/scripts/dotplot.sh serge /data/references/salmonella_enterica_serge.fasta contigs.fasta\n";
+ print F "\n";
+ print F "cp -fp usmarc.png $wrk/$path.usmarc.png\n";
+ print F "cp -fp serge.png $wrk/$path.serge.png\n";
+ print F "\n";
+ close(F);
+
+ system("sh $wrk/$path/bogart.sh");
+ system("qsub -q vomit.q -cwd -j y -o /dev/null $wrk/$path/utgcns.sh > /dev/null 2>&1");
+}
+}
+}
+}
+}
+}
+}
+}
+
diff --git a/src/pipelines/canu.pl b/src/pipelines/canu.pl
index 8728713..17ce844 100644
--- a/src/pipelines/canu.pl
+++ b/src/pipelines/canu.pl
@@ -68,6 +68,7 @@ use canu::OverlapMMap;
use canu::OverlapStore;
use canu::CorrectReads;
+use canu::ErrorEstimate;
use canu::OverlapBasedTrimming;
@@ -166,13 +167,13 @@ while (scalar(@ARGV)) {
if ($arg =~ m/pacbio/) {
setErrorRate(0.025);
setGlobal("corErrorRate", "0.30");
- setGlobal("batOptions", "-RS -NS -CS");
- setGlobal("cnsMaxCoverage", 20);
+ setGlobal("cnsMaxCoverage", 40);
+ setGlobal("utgGraphDeviation", 6);
} elsif ($arg =~ m/nanopore/) {
- setErrorRate(0.045);
+ setErrorRate(0.048);
setGlobal("corErrorRate", "0.50");
- setGlobal("batOptions", "-RS -NS -CS");
- setGlobal("cnsMaxCoverage", 20);
+ setGlobal("cnsMaxCoverage", 40);
+ setGlobal("utgGraphDeviation", 6);
}
$mode = "trim-assemble" if (!defined($mode) && ($arg =~ m/corrected/));
@@ -240,7 +241,7 @@ setParametersFromCommandLine(@specOpts);
if (scalar(@inputFiles) == 0 && ! defined(getGlobal("errorRate"))) {
my $gkpStore = undef;
$gkpStore = "$wrk/correction/$asm.gkpStore" if -e "$wrk/correction/$asm.gkpStore/libraries.txt";
- $gkpStore = "$wrk/trimming/$asm.gkpStore " if -e "$wrk/trimming/$asm.gkpStore/libraries.txt";
+ $gkpStore = "$wrk/trimming/$asm.gkpStore" if -e "$wrk/trimming/$asm.gkpStore/libraries.txt";
$gkpStore = "$wrk/unitigging/$asm.gkpStore" if -e "$wrk/unitigging/$asm.gkpStore/libraries.txt";
# set to the default if we can't find anything
@@ -262,14 +263,12 @@ if (scalar(@inputFiles) == 0 && ! defined(getGlobal("errorRate"))) {
if ($numPacBioRaw > 0 || $numPacBioCorrected > 0) {
setErrorRate(0.025);
setGlobal("corErrorRate", "0.30");
- setGlobal("batOptions", "-RS -NS -CS");
- setGlobal("cnsMaxCoverage", 20);
+ setGlobal("cnsMaxCoverage", 40);
}
if ($numNanoporeRaw > 0 || $numNanoporeCorrected > 0) {
- setErrorRate(0.045);
+ setErrorRate(0.048);
setGlobal("corErrorRate", "0.50");
- setGlobal("batOptions", "-RS -NS -CS");
- setGlobal("cnsMaxCoverage", 20);
+ setGlobal("cnsMaxCoverage", 40);
}
}
}
@@ -324,8 +323,8 @@ configureAssembler();
# Fail immediately if we run the script on the grid, and the gkpStore directory doesn't exist and
# we have no input files. Without this check we'd fail only after being scheduled on the grid.
-my $cor = (-e "$wrk/correction/$asm.gkpStore") || (-e "$wrk/$asm.correctedReads.fastq") || (-e "$wrk/$asm.correctedReads.gkp");
-my $obt = (-e "$wrk/trimming/$asm.gkpStore") || (-e "$wrk/$asm.trimmedReads.fastq") || (-e "$wrk/$asm.trimmedReads.gkp");
+my $cor = (-e "$wrk/correction/$asm.gkpStore") || sequenceFileExists("$wrk/$asm.correctedReads") || (-e "$wrk/$asm.correctedReads.gkp");
+my $obt = (-e "$wrk/trimming/$asm.gkpStore") || sequenceFileExists("$wrk/$asm.trimmedReads") || (-e "$wrk/$asm.trimmedReads.gkp");
my $utg = (-e "$wrk/unitigging/$asm.gkpStore");
if (($cor + $obt + $utg == 0) &&
@@ -459,6 +458,8 @@ if (setOptions($mode, "correct") eq "correct") {
generateCorrectedReads($wrk, $asm) foreach (1..getGlobal("canuIterationMax") + 1);
dumpCorrectedReads($wrk, $asm);
+ estimateCorrectedError($wrk, $asm, "cor");
+
buildHTML($wrk, $asm, "cor");
my $correctedReads = sequenceFileExists("$wrk/$asm.correctedReads");
diff --git a/src/pipelines/canu/Configure.pm b/src/pipelines/canu/Configure.pm
index 2ccaca0..36b7d2a 100644
--- a/src/pipelines/canu/Configure.pm
+++ b/src/pipelines/canu/Configure.pm
@@ -257,7 +257,7 @@ sub getAllowedResources ($$$$) {
# taskThreads = 4,8,32,64
# taskMemory = 16g,32g,64g
- my ($bestCores, $bestCoresM, $bestCoresT) = (0, undef, undef);
+ my ($bestCores, $bestCoresM, $bestCoresT, $availMemoryMin, $availMemoryMax) = (0, undef, undef, undef, undef);
foreach my $m (@taskMemory) {
foreach my $t (@taskThreads) {
@@ -265,14 +265,20 @@ sub getAllowedResources ($$$$) {
next if ($m > $maxMemory); # Bail if either of the suggest settings are
next if ($t > $maxThreads); # larger than the maximum allowed.
- my $processes = 0;
- my $cores = 0;
- my $memory = 0;
+ # Save this memory size. ovsMemory uses a list of possible memory sizes to
+ # pick the smallest one that results in an acceptable number of files.
+
+ $availMemoryMin = $m if (!defined($availMemoryMin) || ($m < $availMemoryMin));
+ $availMemoryMax = $m if (!defined($availMemoryMax) || ($availMemoryMax < $m));
# For a job using $m GB memory and $t threads, we can compute how many processes will
# fit on each node in our set of available machines. The smaller of the two is then
# the number of processes we can run on this node.
+ my $processes = 0;
+ my $cores = 0;
+ my $memory = 0;
+
for (my $ii=0; $ii<scalar(@gridCor); $ii++) {
my $np_cpu = $gridNum[$ii] * int($gridCor[$ii] / $t); # Each process uses $t cores, node has $gridCor[$ii] cores available.
my $np_mem = $gridNum[$ii] * int($gridMem[$ii] / $m); # Same idea.
@@ -304,18 +310,28 @@ sub getAllowedResources ($$$$) {
caExit("task $tag$alg failed to find a configuration to run on", undef);
}
- $taskMemory = $bestCoresM;
- $taskThreads = $bestCoresT;
+ # Reset the global values for later use. SPECIAL CASE! For ovsMemory, we just want the list
+ # of valid memory sizes.
- # Check for stupidity.
+ if ("$alg" ne "ovs") {
+ $taskMemory = $bestCoresM;
+ $taskThreads = $bestCoresT;
- caExit("invalid taskThread=$taskMemory; maxMemory=$maxMemory", undef) if ($taskMemory > $maxMemory);
- caExit("invalid taskThread=$taskThreads; maxThreads=$maxThreads", undef) if ($taskThreads > $maxThreads);
+ setGlobal("${tag}${alg}Memory", $taskMemory);
+ setGlobal("${tag}${alg}Threads", $taskThreads);
+
+ } else {
+ $taskMemory = $availMemoryMax;
+ $taskThreads = $bestCoresT;
+
+ setGlobal("${tag}${alg}Memory", "$availMemoryMin-$availMemoryMax");
+ setGlobal("${tag}${alg}Threads", $taskThreads);
+ }
- # Reset the global values for later use.
+ # Check for stupidity.
- setGlobal("${tag}${alg}Memory", $taskMemory);
- setGlobal("${tag}${alg}Threads", $taskThreads);
+ caExit("invalid taskMemory=$taskMemory; maxMemory=$maxMemory", undef) if ($taskMemory > $maxMemory);
+ caExit("invalid taskThread=$taskThreads; maxThreads=$maxThreads", undef) if ($taskThreads > $maxThreads);
# Finally, reset the concurrency (if we're running locally) so we don't swamp our poor workstation.
@@ -470,9 +486,9 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "8-13"); setGlobalIfUndef("obtMhapThreads", "1-16");
setGlobalIfUndef("utgMhapMemory", "8-13"); setGlobalIfUndef("utgMhapThreads", "1-16");
- setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
- setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
- setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");
+ setGlobalIfUndef("corMMapMemory", "8-13"); setGlobalIfUndef("corMMapThreads", "1-16");
+ setGlobalIfUndef("obtMMapMemory", "8-13"); setGlobalIfUndef("obtMMapThreads", "1-16");
+ setGlobalIfUndef("utgMMapMemory", "8-13"); setGlobalIfUndef("utgMMapThreads", "1-16");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("2g")) {
setGlobalIfUndef("corOvlMemory", "2-8"); setGlobalIfUndef("corOvlThreads", "1");
@@ -483,9 +499,9 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "16-32"); setGlobalIfUndef("obtMhapThreads", "4-16");
setGlobalIfUndef("utgMhapMemory", "16-32"); setGlobalIfUndef("utgMhapThreads", "4-16");
- setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
- setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
- setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");
+ setGlobalIfUndef("corMMapMemory", "16-32"); setGlobalIfUndef("corMMapThreads", "1-16");
+ setGlobalIfUndef("obtMMapMemory", "16-32"); setGlobalIfUndef("obtMMapThreads", "1-16");
+ setGlobalIfUndef("utgMMapMemory", "16-32"); setGlobalIfUndef("utgMMapThreads", "1-16");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("5g")) {
setGlobalIfUndef("corOvlMemory", "2-8"); setGlobalIfUndef("corOvlThreads", "1");
@@ -496,9 +512,9 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "16-48"); setGlobalIfUndef("obtMhapThreads", "4-16");
setGlobalIfUndef("utgMhapMemory", "16-48"); setGlobalIfUndef("utgMhapThreads", "4-16");
- setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
- setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
- setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");
+ setGlobalIfUndef("corMMapMemory", "16-48"); setGlobalIfUndef("corMMapThreads", "1-16");
+ setGlobalIfUndef("obtMMapMemory", "16-48"); setGlobalIfUndef("obtMMapThreads", "1-16");
+ setGlobalIfUndef("utgMMapMemory", "16-48"); setGlobalIfUndef("utgMMapThreads", "1-16");
} else {
setGlobalIfUndef("corOvlMemory", "2-8"); setGlobalIfUndef("corOvlThreads", "1");
@@ -509,9 +525,9 @@ sub configureAssembler () {
setGlobalIfUndef("obtMhapMemory", "32-64"); setGlobalIfUndef("obtMhapThreads", "4-16");
setGlobalIfUndef("utgMhapMemory", "32-64"); setGlobalIfUndef("utgMhapThreads", "4-16");
- setGlobalIfUndef("corMMapMemory", "4-6"); setGlobalIfUndef("corMMapThreads", "1-16");
- setGlobalIfUndef("obtMMapMemory", "4-6"); setGlobalIfUndef("obtMMapThreads", "1-16");
- setGlobalIfUndef("utgMMapMemory", "4-6"); setGlobalIfUndef("utgMMapThreads", "1-16");
+ setGlobalIfUndef("corMMapMemory", "32-64"); setGlobalIfUndef("corMMapThreads", "1-16");
+ setGlobalIfUndef("obtMMapMemory", "32-64"); setGlobalIfUndef("obtMMapThreads", "1-16");
+ setGlobalIfUndef("utgMMapMemory", "32-64"); setGlobalIfUndef("utgMMapThreads", "1-16");
}
# Overlapper block sizes probably don't need to be modified based on genome size.
@@ -544,21 +560,21 @@ sub configureAssembler () {
if (getGlobal("genomeSize") < adjustGenomeSize("40m")) {
setGlobalIfUndef("cnsMemory", "8-32"); setGlobalIfUndef("cnsThreads", "1-4");
- setGlobalIfUndef("corMemory", "6-16"); setGlobalIfUndef("corThreads", "1-4");
+ setGlobalIfUndef("corMemory", "6-16"); setGlobalIfUndef("corThreads", "1-2");
setGlobalIfUndef("cnsPartitions", "8"); setGlobalIfUndef("cnsPartitionMin", "15000");
- setGlobalIfUndef("corPartitions", "128"); setGlobalIfUndef("corPartitionMin", "5000");
+ setGlobalIfUndef("corPartitions", "256"); setGlobalIfUndef("corPartitionMin", "5000");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("1g")) {
setGlobalIfUndef("cnsMemory", "16-48"); setGlobalIfUndef("cnsThreads", "2-8");
- setGlobalIfUndef("corMemory", "10-20"); setGlobalIfUndef("corThreads", "2-4");
+ setGlobalIfUndef("corMemory", "6-20"); setGlobalIfUndef("corThreads", "2-4");
setGlobalIfUndef("cnsPartitions", "64"); setGlobalIfUndef("cnsPartitionMin", "20000");
- setGlobalIfUndef("corPartitions", "256"); setGlobalIfUndef("corPartitionMin", "15000");
+ setGlobalIfUndef("corPartitions", "512"); setGlobalIfUndef("corPartitionMin", "10000");
} else {
- setGlobalIfUndef("cnsMemory", "16-64"); setGlobalIfUndef("cnsThreads", "2-8");
+ setGlobalIfUndef("cnsMemory", "64-128"); setGlobalIfUndef("cnsThreads", "2-8");
setGlobalIfUndef("corMemory", "10-32"); setGlobalIfUndef("corThreads", "2-4");
setGlobalIfUndef("cnsPartitions", "256"); setGlobalIfUndef("cnsPartitionMin", "25000");
- setGlobalIfUndef("corPartitions", "512"); setGlobalIfUndef("corPartitionMin", "25000");
+ setGlobalIfUndef("corPartitions", "1024"); setGlobalIfUndef("corPartitionMin", "15000");
}
# Meryl too, basically just small or big. This should really be using the number of bases
@@ -576,32 +592,41 @@ sub configureAssembler () {
# Overlap error adjustment
#
- # Configuration is primarily done though memory size. If that blows up for some reason,
- # the actual number of reads (batchSize) or bases (batchLength) can be restricted. I expect
- # those to be used only from the command line, so they're left unset here.
+ # Configuration is primarily done though memory size. This blows up when there are many
+ # short(er) reads and large memory machines are available.
+ #
+ # The limit is arbitrary.
+ # On medicago, with 740,000 reads (median len ~1,500bp), this will result in about 150 jobs.
+ # The memory-only limit generated only 7 jobs.
+ #
+ # On drosophila, with 270,000 reads (median len ~17,000bp), this will result in about 50 jobs.
+ # The memory-only limit generated 36 jobs.
#
- #setGlobalIfUndef("redBatchSize", ""); setGlobalIfUndef("redBatchLength", "");
- #setGlobalIfUndef("oeaBatchSize", ""); setGlobalIfUndef("oeaBatchLength", "");
+ setGlobalIfUndef("redBatchSize", "5000");
+ setGlobalIfUndef("redBatchLength", "");
+
+ setGlobalIfUndef("oeaBatchSize", "25000");
+ setGlobalIfUndef("oeaBatchLength", "");
if (getGlobal("genomeSize") < adjustGenomeSize("40m")) {
- setGlobalIfUndef("redMemory", "2-8"); setGlobalIfUndef("redThreads", "1-4");
- setGlobalIfUndef("oeaMemory", "2"); setGlobalIfUndef("oeaThreads", "1");
+ setGlobalIfUndef("redMemory", "1-2"); setGlobalIfUndef("redThreads", "1-4");
+ setGlobalIfUndef("oeaMemory", "1"); setGlobalIfUndef("oeaThreads", "1");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("500m")) {
- setGlobalIfUndef("redMemory", "4-12"); setGlobalIfUndef("redThreads", "1-6");
+ setGlobalIfUndef("redMemory", "2-6"); setGlobalIfUndef("redThreads", "1-6");
setGlobalIfUndef("oeaMemory", "2"); setGlobalIfUndef("oeaThreads", "1");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("2g")) {
- setGlobalIfUndef("redMemory", "4-16"); setGlobalIfUndef("redThreads", "1-8");
+ setGlobalIfUndef("redMemory", "2-8"); setGlobalIfUndef("redThreads", "1-8");
setGlobalIfUndef("oeaMemory", "2"); setGlobalIfUndef("oeaThreads", "1");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("5g")) {
- setGlobalIfUndef("redMemory", "4-32"); setGlobalIfUndef("redThreads", "1-8");
- setGlobalIfUndef("oeaMemory", "2"); setGlobalIfUndef("oeaThreads", "1");
+ setGlobalIfUndef("redMemory", "2-16"); setGlobalIfUndef("redThreads", "1-8");
+ setGlobalIfUndef("oeaMemory", "4"); setGlobalIfUndef("oeaThreads", "1");
} else {
- setGlobalIfUndef("redMemory", "4-32"); setGlobalIfUndef("redThreads", "1-8");
- setGlobalIfUndef("oeaMemory", "2"); setGlobalIfUndef("oeaThreads", "1");
+ setGlobalIfUndef("redMemory", "2-16"); setGlobalIfUndef("redThreads", "1-8");
+ setGlobalIfUndef("oeaMemory", "4"); setGlobalIfUndef("oeaThreads", "1");
}
# And bogart.
@@ -610,7 +635,7 @@ sub configureAssembler () {
setGlobalIfUndef("batMemory", "2-16"); setGlobalIfUndef("batThreads", "1-4");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("500m")) {
- setGlobalIfUndef("batMemory", "8-64"); setGlobalIfUndef("batThreads", "2-8");
+ setGlobalIfUndef("batMemory", "16-64"); setGlobalIfUndef("batThreads", "2-8");
} elsif (getGlobal("genomeSize") < adjustGenomeSize("2g")) {
setGlobalIfUndef("batMemory", "32-256"); setGlobalIfUndef("batThreads", "4-16");
@@ -623,6 +648,10 @@ sub configureAssembler () {
}
# Finally, use all that setup to pick actual values for each component.
+ #
+ # ovsMemory needs to be configured here iff the sequential build method is used. This runs in
+ # the canu process, and needs to have a single memory size. The parallel method will pick a
+ # memory size based on the number of overlaps and submit jobs using that size.
my $err;
my $all;
@@ -635,6 +664,7 @@ sub configureAssembler () {
($err, $all) = getAllowedResources("", "oea", $err, $all);
($err, $all) = getAllowedResources("", "cns", $err, $all);
($err, $all) = getAllowedResources("", "ovb", $err, $all);
+ ($err, $all) = getAllowedResources("", "ovs", $err, $all);
($err, $all) = getAllowedResources("cor", "ovl", $err, $all);
($err, $all) = getAllowedResources("obt", "ovl", $err, $all);
($err, $all) = getAllowedResources("utg", "ovl", $err, $all);
diff --git a/src/pipelines/canu/CorrectReads.pm b/src/pipelines/canu/CorrectReads.pm
index fa91378..9a545cf 100644
--- a/src/pipelines/canu/CorrectReads.pm
+++ b/src/pipelines/canu/CorrectReads.pm
@@ -175,7 +175,7 @@ sub buildCorrectionLayouts_direct ($$) {
$cmd .= " -L " . getGlobal("corMinEvidenceLength") . " \\\n" if (defined(getGlobal("corMinEvidenceLength")));
$cmd .= " -E " . getGlobal("corMaxEvidenceErate") . " \\\n" if (defined(getGlobal("corMaxEvidenceErate")));
$cmd .= " -C $maxCov \\\n" if (defined($maxCov));
- $cmd .= " -legacy \\\n" if (!defined(getGlobal("corNoLegacyFilter")));
+ $cmd .= " -legacy \\\n" if (defined(getGlobal("corLegacyFilter")));
$cmd .= "> $wrk/$asm.corStore.err 2>&1";
if (runCommand($wrk, $cmd)) {
@@ -279,6 +279,7 @@ sub buildCorrectionLayouts_direct ($$) {
print F "\$bin/falcon_sense \\\n" if (!defined(getGlobal("falconSense")));
print F " --min_idt $minidt \\\n";
print F " --min_len " . getGlobal("minReadLength") . "\\\n";
+ print F " --max_read_len " . 2 * getMaxReadInStore($wrk, $asm) . "\\\n";
print F " --min_ovl_len " . getGlobal("minOverlapLength") . "\\\n";
print F " --min_cov " . getGlobal("corMinCoverage") . " \\\n";
print F " --n_core " . getGlobal("corThreads") . " \\\n";
@@ -390,7 +391,7 @@ sub buildCorrectionLayouts_piped ($$) {
print F " -L " . getGlobal("corMinEvidenceLength") . " \\\n" if (defined(getGlobal("corMinEvidenceLength")));
print F " -E " . getGlobal("corMaxEvidenceErate") . " \\\n" if (defined(getGlobal("corMaxEvidenceErate")));
print F " -C $maxCov \\\n" if (defined($maxCov));
- print F " -legacy \\\n" if (!defined(getGlobal("corNoLegacyFilter")));
+ print F " -legacy \\\n" if (defined(getGlobal("corLegacyFilter")));
print F " -F \\\n";
print F "&& \\\n";
print F " touch $path/correction_outputs/\$jobid.dump.success \\\n";
@@ -400,6 +401,7 @@ sub buildCorrectionLayouts_piped ($$) {
print F "\$bin/falcon_sense \\\n" if (!defined(getGlobal("falconSense")));
print F " --min_idt $minidt \\\n";
print F " --min_len " . getGlobal("minReadLength") . "\\\n";
+ print F " --max_read_len " . 2 * getMaxReadInStore($wrk, $asm) . "\\\n";
print F " --min_ovl_len " . getGlobal("minOverlapLength") . "\\\n";
print F " --min_cov " . getGlobal("corMinCoverage") . " \\\n";
print F " --n_core " . getGlobal("corThreads") . " \\\n";
@@ -454,7 +456,7 @@ sub quickFilter ($$$) {
my $minCorLength = 0;
open(O, "> $path/$asm.readsToCorrect.WORKING") or caExit("can't open '$path/$asm.readsToCorrect.WORKING' for writing: $!\n", undef);
- open(F, "$bin/gatekeeperDumpMetaData -G $wrk/$asm.gkpStore -reads | sort -k3nr | ") or caExit("can't dump gatekeeper for read lengths: $!\n", undef);
+ open(F, "$bin/gatekeeperDumpMetaData -G $wrk/$asm.gkpStore -reads | sort -T . -k3nr | ") or caExit("can't dump gatekeeper for read lengths: $!\n", undef);
print O "read\toriginalLength\tcorrectedLength\n";
@@ -497,7 +499,7 @@ sub expensiveFilter ($$) {
$cmd .= " -L " . getGlobal("corMinEvidenceLength") . " \\\n" if (defined(getGlobal("corMinEvidenceLength")));
$cmd .= " -E " . getGlobal("corMaxEvidenceErate") . " \\\n" if (defined(getGlobal("corMaxEvidenceErate")));
$cmd .= " -C $maxCov \\\n" if (defined($maxCov));
- $cmd .= " -legacy \\\n" if (!defined(getGlobal("corNoLegacyFilter")));
+ $cmd .= " -legacy \\\n" if (defined(getGlobal("corLegacyFilter")));
$cmd .= " -p $path/$asm.estimate";
if (runCommand($wrk, $cmd)) {
@@ -506,11 +508,11 @@ sub expensiveFilter ($$) {
}
}
- if (runCommandSilently($path, "sort -k4nr -k2nr < $path/$asm.estimate.log > $path/$asm.estimate.correctedLength.log", 1)) {
+ if (runCommandSilently($path, "sort -T . -k4nr -k2nr < $path/$asm.estimate.log > $path/$asm.estimate.correctedLength.log", 1)) {
caExit("failed to sort by corrected read length", undef);
}
- if (runCommandSilently($path, "sort -k2nr -k4nr < $path/$asm.estimate.log > $path/$asm.estimate.originalLength.log", 1)) {
+ if (runCommandSilently($path, "sort -T . -k2nr -k4nr < $path/$asm.estimate.log > $path/$asm.estimate.originalLength.log", 1)) {
caExit("failed to sort by original read length", undef);
}
@@ -567,7 +569,7 @@ sub expensiveFilter ($$) {
}
open(F, "< $path/$asm.estimate.correctedLength.log");
- open(O, "| sort -k1n > $path/$asm.readsToCorrect.WORKING") or caExit("can't open sort -k1n > '$path/$asm.readsToCorrect.WORKING' for writing: $!\n", undef);
+ open(O, "| sort -T . -k1n > $path/$asm.readsToCorrect.WORKING") or caExit("can't open sort -k1n > '$path/$asm.readsToCorrect.WORKING' for writing: $!\n", undef);
print O "read\toriginalLength\tcorrectedLength\n";
@@ -743,7 +745,7 @@ sub buildCorrectionLayouts ($$) {
$cmd .= " -c $maxCov \\\n";
$cmd .= " -l $minLen \\\n";
$cmd .= " -e " . getGlobal("corMaxEvidenceErate") . " \\\n" if (defined(getGlobal("corMaxEvidenceErate")));
- $cmd .= " -legacy \\\n" if (!defined(getGlobal("corNoLegacyFilter")));
+ $cmd .= " -legacy \\\n" if (defined(getGlobal("corLegacyFilter")));
$cmd .= "> $path/$asm.globalScores.err 2>&1";
if (runCommand($path, $cmd)) {
@@ -774,6 +776,18 @@ sub buildCorrectionLayouts ($$) {
caFailure("unknown corFilter '" . getGlobal("corFilter") . "'", undef);
}
+ # Set the minimum coverage for a corrected read based on coverage in input reads.
+
+ if (!defined(getGlobal("corMinCoverage"))) {
+ my $cov = getExpectedCoverage($wrk, $asm);
+
+ setGlobal("corMinCoverage", 4);
+ setGlobal("corMinCoverage", 4) if ($cov < 60);
+ setGlobal("corMinCoverage", 0) if ($cov <= 20);
+
+ print STDERR "-- Set corMinCoverage=", getGlobal("corMinCoverage"), " based on read coverage of $cov.\n";
+ }
+
caExit("failed to create list of reads to correct", undef) if (! -e "$path/$asm.readsToCorrect");
buildCorrectionLayouts_direct($wrk, $asm) if (getGlobal("corConsensus") eq "utgcns");
diff --git a/src/pipelines/canu/CorrectReads.txt b/src/pipelines/canu/CorrectReads.txt
new file mode 100644
index 0000000..d586a0b
--- /dev/null
+++ b/src/pipelines/canu/CorrectReads.txt
@@ -0,0 +1,41 @@
+
+buildCorrectionLayouts()
+
+----------------------------------------
+filterCorrectionOverlaps (binary)
+ - writes asm.globalScores
+ - log to asm.globalScores.log -- PER READ, #olaps, #scored, #filtered, #saved, reason
+ - log to asm.globalScores.err -- STATS
+ - knows a -S <prefix>, so write log and stats using that.
+ - params corMaxEvidenceCoverageGlobal
+ - params corMinEvidenceLength
+
+----------------------------------------
+quickFilter() or expensiveFilter()
+
+Creates asm.readsToCorrect with 'readID', 'originalLength', 'expectedCorrectedLength'
+
+quick filter just picks the longest originalLength reads that sum to corOutCoverage * genomeSize
+no logging, no stats, no plot
+
+expensive filter calls generateCorrectionLayouts (binary) to write asm.estimate.log and
+asm.estimate.stats with expected corrected length based on the overlaps we'd use
+canu.pl makes asm.estimate.* files with tp/tn rates and a figure
+canu.pl makes asm.readsToCorrect.summary and asm.estimate.original-x-corrected.png
+
+----------------------------------------
+buildCorrectionLayouts (_direct or _piped)
+
+generateCorrectionLayouts
+ - reads asm.readsToCorrect (who makes this?)
+ - reads asm.globalScores
+ - writes asm.corStore (direct)
+ - writes falcon-formatted reads for a pipe to compute consensus
+ - params -L corMinEvidenceLength
+ - params -E corMaxEvidenceErate
+ - params -C maxCov (corMaxEvidenceCoverageLocal)
+ - only errors to stderr
+ - writes no log or summary (disabled in canu here, output in the expensiveFilter above)
+
+When outputs of the parallel processes are merged, a length file is created. This, with
+the expensive filter length file, can generate stats on corrections.
diff --git a/src/pipelines/canu/Defaults.pm b/src/pipelines/canu/Defaults.pm
index 499f092..467dbeb 100644
--- a/src/pipelines/canu/Defaults.pm
+++ b/src/pipelines/canu/Defaults.pm
@@ -266,7 +266,7 @@ sub getNumberOfCPUs () {
$ncpu = int(`/usr/bin/getconf _NPROCESSORS_ONLN`);
}
- if ($os eq "linux") {
+ if ($os eq "linux" || $os eq "cygwin") {
$ncpu = int(`getconf _NPROCESSORS_ONLN`);
}
@@ -286,7 +286,7 @@ sub getPhysicalMemorySize () {
$memory = `/usr/sbin/sysctl -n hw.memsize` / 1024 / 1024 / 1024;
}
- if ($os eq "linux") {
+ if ($os eq "linux" || $os eq "cygwin") {
open(F, "< /proc/meminfo"); # Way to go, Linux! Make it easy on us!
while (<F>) {
if (m/MemTotal:\s+(\d+)/) {
@@ -543,11 +543,6 @@ sub showErrorRates ($) {
print STDERR "${prefix}\n";
print STDERR "${prefix}obtErrorRate -- ", getGlobal("obtErrorRate"), "\n";
print STDERR "${prefix}\n";
- print STDERR "${prefix}utgGraphErrorRate -- ", getGlobal("utgGraphErrorRate"), "\n";
- print STDERR "${prefix}utgBubbleErrorRate -- ", getGlobal("utgBubbleErrorRate"), "\n";
- print STDERR "${prefix}utgMergeErrorRate -- ", getGlobal("utgMergeErrorRate"), "\n";
- print STDERR "${prefix}utgRepeatErrorRate -- ", getGlobal("utgRepeatErrorRate"), "\n";
- print STDERR "${prefix}\n";
#print STDERR "${prefix}corErrorRate -- ", getGlobal("corErrorRate"), "\n";
print STDERR "${prefix}cnsErrorRate -- ", getGlobal("cnsErrorRate"), "\n";
}
@@ -571,14 +566,9 @@ sub setErrorRate ($@) {
setGlobal("obtErrorRate", $er * 3);
- setGlobal("utgGraphErrorRate", $er * 2);
- setGlobal("utgBubbleErrorRate", $er * 2 + 0.5 * $er); # Not tested!
- setGlobal("utgMergeErrorRate", $er * 2 - 0.5 * $er);
- setGlobal("utgRepeatErrorRate", $er * 2);
-
# Removed, is usually set in CorrectReads, can be set from command line directly.
#setGlobal("corErrorRate", $er * 10); # Erorr rate used for raw sequence alignment/consensus
- setGlobal("cnsErrorRate", $er * 2.5);
+ setGlobal("cnsErrorRate", $er * 3);
showErrorRates("-- ") if (defined($verbose));
}
@@ -606,7 +596,7 @@ sub setOverlapDefaults ($$$) {
$global{"${tag}OvlRefBlockLength"} = 0;
$synops{"${tag}OvlRefBlockLength"} = "Amount of sequence (bp) to search against the hash table per batch";
- $global{"${tag}OvlHashBits"} = ($tag eq "cor") ? 18 : 22;
+ $global{"${tag}OvlHashBits"} = ($tag eq "cor") ? 18 : 23;
$synops{"${tag}OvlHashBits"} = "Width of the kmer hash. Width 22=1gb, 23=2gb, 24=4gb, 25=8gb. Plus 10b per ${tag}OvlHashBlockLength";
$global{"${tag}OvlHashLoad"} = 0.75;
@@ -629,20 +619,29 @@ sub setOverlapDefaults ($$$) {
# Mhap parameters.
- $global{"${tag}MhapVersion"} = "2.0";
+ $global{"${tag}MhapVersion"} = "2.1";
$synops{"${tag}MhapVersion"} = "Version of the MHAP jar file to use";
$global{"${tag}MhapFilterThreshold"} = "0.000005";
$synops{"${tag}MhapFilterThreshold"} = "Value between 0 and 1. kmers which comprise more than this percentage of the input are downweighted";
+ $global{"${tag}MhapFilterUnique"} = undef;
+ $synops{"${tag}MhapFilterUnique"} = "Expert option: True or false, supress the low-frequency k-mer distribution based on them being likely noise and not true overlaps. Threshold auto-computed based on error rate and coverage.";
+
+ $global{"${tag}MhapNoTf"} = undef;
+ $synops{"${tag}MhapNoTf"} = "Expert option: True or false, do not use tf weighting, only idf of tf-idf.";
+
$global{"${tag}MhapBlockSize"} = 3000;
$synops{"${tag}MhapBlockSize"} = "Number of reads per 1GB; memory * blockSize = the size of block loaded into memory per job";
$global{"${tag}MhapMerSize"} = ($tag eq "cor") ? 16 : 22;
$synops{"${tag}MhapMerSize"} = "K-mer size for seeds in mhap";
- $global{"${tag}MhapSensitivity"} = "normal";
- $synops{"${tag}MhapSensitivity"} = "Coarse sensitivity level: 'normal' or 'high'; default 'normal'";
+ $global{"${tag}MhapOrderedMerSize"} = ($tag eq "cor") ? 12 : 22;
+ $synops{"${tag}MhapOrderedMerSize"} = "K-mer size for second-stage filter in mhap";
+
+ $global{"${tag}MhapSensitivity"} = undef;
+ $synops{"${tag}MhapSensitivity"} = "Coarse sensitivity level: 'low', 'normal' or 'high'. Usually set automatically based on coverage; 'high' <= 30x < 'normal' < 60x <= 'low'";
$global{"${tag}MMapBlockSize"} = 6000;
$synops{"${tag}MMapBlockSize"} = "Number of reads per 1GB; memory * blockSize = the size of block loaded into memory per job";
@@ -674,7 +673,7 @@ sub setDefaults () {
$global{"shell"} = "/bin/sh";
$synops{"shell"} = "Command interpreter to use; sh-compatible (e.g., bash), NOT C-shell (csh or tcsh); default '/bin/sh'";
- $global{"java"} = "java";
+ $global{"java"} = (exists $ENV{"JAVA_HOME"} && -e "$ENV{'JAVA_HOME'}/bin/java") ? "$ENV{'JAVA_HOME'}/bin/java" : "java";
$synops{"java"} = "Java interpreter to use; at least version 1.8; default 'java'";
##### Cleanup options
@@ -691,7 +690,7 @@ sub setDefaults () {
##### Error Rates
$global{"errorRate"} = undef;
- $synops{"errorRate"} = "The expected error rate in the input reads";
+ $synops{"errorRate"} = "The expected error rate in the corrected reads, typically set based on sequencing type. Set to 0 to try to estimate dynamically. (EXPERIMENTAL)";
$global{"corOvlErrorRate"} = undef;
$synops{"corOvlErrorRate"} = "Overlaps above this error rate are not computed";
@@ -705,17 +704,14 @@ sub setDefaults () {
#$global{"utgErrorRate"} = undef;
#$synops{"utgErrorRate"} = "Overlaps at or below this error rate are used to construct unitigs (BOG and UTG)";
- $global{"utgGraphErrorRate"} = undef;
- $synops{"utgGraphErrorRate"} = "Overlaps at or below this error rate are used to construct unitigs (BOGART)";
-
- $global{"utgBubbleErrorRate"} = undef;
- $synops{"utgBubbleErrorRate"} = "Overlaps at or below this error rate are used to construct unitigs (BOGART)";
+ $global{"utgGraphDeviation"} = 5;
+ $synops{"utgGraphDeviation"} = "Overlaps this much above median will not be used for initial graph construction (BOGART)";
- $global{"utgMergeErrorRate"} = undef;
- $synops{"utgMergeErrorRate"} = "Overlaps at or below this error rate are used to construct unitigs (BOGART)";
+ $global{"utgRepeatDeviation"} = 3;
+ $synops{"utgRepeatDeviation"} = "Overlaps this much above mean unitig error rate will not be used for repeat splitting (BOGART)";
- $global{"utgRepeatErrorRate"} = undef;
- $synops{"utgRepeatErrorRate"} = "Overlaps at or below this error rate are used to construct unitigs (BOGART)";
+ $global{"utgRepeatConfusedBP"} = 5000;
+ $synops{"utgRepeatConfusedBP"} = "Repeats where the next best edge is at least this many bp shorter will not be split (BOGART)";
$global{"corErrorRate"} = undef;
$synops{"corErrorRate"} = "Only use raw alignments below this error rate to construct corrected reads";
@@ -764,6 +760,7 @@ sub setDefaults () {
$global{"gridEngineNameOption"} = undef;
$global{"gridEngineArrayOption"} = undef;
$global{"gridEngineArrayName"} = undef;
+ $global{"gridEngineArrayMaxJobs"} = undef;
$global{"gridEngineOutputOption"} = undef;
$global{"gridEnginePropagateCommand"} = undef;
$global{"gridEngineThreadsOption"} = undef;
@@ -780,6 +777,11 @@ sub setDefaults () {
$global{"useGrid"} = 1;
$synops{"useGrid"} = "If 'true', enable grid-based execution; if 'false', run all jobs on the local machine; if 'remote', create jobs for grid execution but do not submit; default 'true'";
+ foreach my $c (qw(BAT CNS COR MERYL CORMHAP CORMMAP COROVL OBTMHAP OBTOVL OEA OVB OVS RED UTGMHAP UTGMMAP UTGOVL)) {
+ $global{"useGrid$c"} = 1;
+ $synops{"useGrid$c"} = "If 'true', run module $c under grid control; if 'false' run locally.";
+ }
+
##### Grid Engine configuration, for each step of the pipeline
$global{"gridOptions"} = undef;
@@ -933,8 +935,8 @@ sub setDefaults () {
$global{"corOutCoverage"} = 40;
$synops{"corOutCoverage"} = "Only correct the longest reads up to this coverage; default 40";
- $global{"corMinCoverage"} = 4;
- $synops{"corMinCoverage"} = "Minimum number of bases supporting each corrected base, if less than this sequences are split; default 4";
+ $global{"corMinCoverage"} = undef;
+ $synops{"corMinCoverage"} = "Minimum number of bases supporting each corrected base, if less than this sequences are split; default based on input read coverage: 0 <= 30x < 4 < 60x <= 4";
$global{"corFilter"} = "expensive";
$synops{"corFilter"} = "Method to filter short reads from correction; 'quick' or 'expensive'; default 'expensive'";
@@ -945,8 +947,8 @@ sub setDefaults () {
$global{"falconSense"} = undef;
$synops{"falconSense"} = "Path to fc_consensus.py or falcon_sense.bin";
- $global{"corNoLegacyFilter"} = undef;
- $synops{"corNoLegacyFilter"} = "Expert option: global filter, length with ties broken by identity (default) or length * identity (if on)";
+ $global{"corLegacyFilter"} = undef;
+ $synops{"corLegacyFilter"} = "Expert option: global filter, length * identity (default) or length with broken by identity (if on)";
# Convert all the keys to lowercase, and remember the case-sensitive version
@@ -1032,6 +1034,19 @@ sub checkParameters () {
}
}
+ foreach my $tag ("cor", "obt", "utg") {
+ if (getGlobal("${tag}MhapSensitivity") eq "fast") {
+ print STDERR "WARNING: deprecated ${tag}NhapSensitivity=fast replaced with ${tag}MhapSensitivity=low\n";
+ }
+
+ if ((getGlobal("${tag}MhapSensitivity") ne undef) &&
+ (getGlobal("${tag}MhapSensitivity") ne "low") &&
+ (getGlobal("${tag}MhapSensitivity") ne "normal") &&
+ (getGlobal("${tag}MhapSensitivity") ne "high")) {
+ addCommandLineError("ERROR: Invalid '${tag}MhapSensitivity' specified (" . getGlobal("${tag}MhapSensitivity") . "); must be 'fast' or 'normal' or 'high'\n");
+ }
+ }
+
if ((getGlobal("unitigger") ne "unitigger") &&
(getGlobal("unitigger") ne "bogart")) {
addCommandLineError("ERROR: Invalid 'unitigger' specified (" . getGlobal("unitigger") . "); must be 'unitigger' or 'bogart'\n");
@@ -1086,8 +1101,10 @@ sub checkParameters () {
"meryl",
"trimReads",
"splitReads",
+ "red", "oea",
"unitig",
- "consensusConfigure");
+ "consensusConfigure",
+ "cns");
foreach my $sb (@stopBefore) {
$failureString .= "ERROR: '$sb'\n";
@@ -1145,19 +1162,44 @@ sub checkParameters () {
my $versionStr = "unknown";
my $version = 0;
+ # Argh, we can't use runCommand() here, because we're included in Execution.pm. Try to check it with -x.
+ # Nope. Fails if $java == "java".
+
+ #if (! -x $java) {
+ # addCommandLineError("ERROR: java executable '$java' not found or not executable\n");
+ #}
+
open(F, "$java -showversion 2>&1 |");
while (<F>) {
# First word is either "java" or "openjdk" or ...
- if (m/^.*\s+version\s+\"(\d+.\d+)(.*)\"$/) {
+ if (m/^.*\s+version\s+\"(\d+.\d+)(.*)\".*$/) {
$versionStr = "$1$2";
$version = $1;
}
}
close(F);
- print STDERR "-- Detected Java(TM) Runtime Environment '$versionStr' (from '$java').\n";
+ if ($version < 1.8) {
+ addCommandLineError("ERROR: mhap overlapper requires java version at least 1.8.0; you have $versionStr (from '$java').\n");
+ addCommandLineError("ERROR: '$java -showversion' reports:\n");
+
+ open(F, "$java -showversion 2>&1 |");
+ while (<F>) {
+ chomp;
+ addCommandLineError("ERROR: '$_'\n");
+ }
+ close(F);
+
+ } else {
+ print STDERR "-- Detected Java(TM) Runtime Environment '$versionStr' (from '$java').\n";
+ }
+ }
- addCommandLineError("ERROR: mhap overlapper requires java version at least 1.8.0; you have $versionStr\n") if ($version < 1.8);
+ #
+ # Minimap, no valid identities, set legacy
+ #
+ if (getGlobal("corOverlapper") eq "minimap") {
+ setGlobalIfUndef("corLegacyFilter", 1);
}
#
@@ -1180,10 +1222,6 @@ sub checkParameters () {
setGlobalIfUndef("utgOvlErrorRate", 3.0 * getGlobal("errorRate"));
setGlobalIfUndef("ovlErrorRate", 2.5 * getGlobal("errorRate"));
- setGlobalIfUndef("utgGraphErrorRate", 2.0 * getGlobal("errorRate"));
- setGlobalIfUndef("utgBubbleErrorRate", 2.0 * getGlobal("errorRate") + 0.5 * getGlobal("errorRate"));
- setGlobalIfUndef("utgMergeErrorRate", 2.0 * getGlobal("errorRate") - 0.5 * getGlobal("errorRate"));
- setGlobalIfUndef("utgRepeatErrorRate", 2.0 * getGlobal("errorRate"));
setGlobalIfUndef("corsErrorRate", 10.0 * getGlobal("errorRate"));
setGlobalIfUndef("cnsErrorRate", 2.5 * getGlobal("errorRate"));
diff --git a/src/pipelines/canu/ErrorEstimate.pm b/src/pipelines/canu/ErrorEstimate.pm
new file mode 100644
index 0000000..3adcd2c
--- /dev/null
+++ b/src/pipelines/canu/ErrorEstimate.pm
@@ -0,0 +1,258 @@
+
+###############################################################################
+ #
+ # This file is part of canu, a software program that assembles whole-genome
+ # sequencing reads into contigs.
+ #
+ # This software is based on:
+ # 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ # the 'kmer package' (http://kmer.sourceforge.net)
+ # both originally distributed by Applera Corporation under the GNU General
+ # Public License, version 2.
+ #
+ # Canu branched from Celera Assembler at its revision 4587.
+ # Canu branched from the kmer project at its revision 1994.
+ #
+ # Modifications by:
+ #
+ # Sergey Koren beginning on 2016-MAY-16
+ # are a 'United States Government Work', and
+ # are released in the public domain
+ #
+ # File 'README.licenses' in the root directory of this distribution contains
+ # full conditions and disclaimers for each license.
+ ##
+
+package canu::ErrorEstimate;
+
+require Exporter;
+
+ at ISA = qw(Exporter);
+ at EXPORT = qw(estimateKmerError estimateRawError estimateCorrectedError uniqueKmerThreshold);
+
+use strict;
+use POSIX qw(floor);
+
+use File::Path qw(make_path remove_tree);
+
+use canu::Defaults;
+use canu::Execution;
+use canu::Gatekeeper;
+use canu::HTML;
+
+sub fac($) {
+ my $x = shift @_;
+
+ return 1 if($x == 0);
+ return 1 if($x == 1);
+ return $x * fac($x - 1);
+}
+
+sub poisson_pdf ($$) {
+ my $lambda = shift @_;
+ my $k = shift @_;
+
+ return ( ( ($lambda ** $k) * exp(-$lambda) ) / fac($k) );
+}
+
+sub uniqueKmerThreshold($$$$) {
+ my $wrk = shift @_;
+ my $asm = shift @_;
+ my $merSize = shift @_;
+ my $loss = shift @_;
+ my $bin = getBinDirectory();
+ my $errorRate = estimateRawError($wrk, $asm, "cor", $merSize);
+
+ my $readLength = getNumberOfBasesInStore($wrk, $asm) / getNumberOfReadsInStore ($wrk, $asm);
+ my $effective_coverage = getExpectedCoverage($wrk, $asm) * ( ($readLength - $merSize + 1)/$readLength ) * (1 - $errorRate) ** $merSize;
+
+ my $threshold = 0;
+ my $kMer_loss = poisson_pdf($effective_coverage, 0);
+
+ return 1 if($kMer_loss > $loss);
+
+ my $keepTrying = 1;
+ while($keepTrying)
+ {
+ $keepTrying = 0;
+ my $p_true_kMers_threshold_p1 = poisson_pdf($effective_coverage, $threshold+1);
+ if(($kMer_loss + $p_true_kMers_threshold_p1) <= $loss)
+ {
+ $threshold++;
+ $kMer_loss += $p_true_kMers_threshold_p1;
+ $keepTrying = 1;
+ }
+ }
+
+ return ($threshold == 0 ? 1 : $threshold);
+}
+
+sub computeSampleSize($$$$$) {
+ my $wrk = shift @_;
+ my $asm = shift @_;
+ my $tag = shift @_;
+ my $percent = shift @_;
+ my $coverage = shift @_;
+ my $sampleSize = 0;
+
+ my $minSampleSize = 100;
+ my $maxSampleSize = getGlobal("${tag}MhapBlockSize") * 4;
+
+ if (defined($percent)) {
+ $sampleSize = int($percent * getNumberOfReadsInStore ($wrk, $asm))+1;
+ $sampleSize++ if ($sampleSize % 2 != 0);
+ } elsif (defined($coverage)) {
+ $sampleSize = int(($coverage * getGlobal("genomeSize")) / (getNumberOfBasesInStore($wrk, $asm) / getNumberOfReadsInStore ($wrk, $asm))) + 1;
+ }
+
+ $sampleSize = $maxSampleSize if (defined($percent) && $sampleSize > $maxSampleSize);
+ return $sampleSize < $minSampleSize ? $minSampleSize : $sampleSize;
+}
+
+sub runMHAP($$$$$$$$$$$$) {
+ my ($wrk, $tag, $numHashes, $minNumMatches, $threshold, $ordSketch, $ordSketchMer, $sampleSize, $hash, $query, $out, $err) = @_;
+
+ my $filterThreshold = getGlobal("${tag}MhapFilterThreshold");
+ my $merSize = getGlobal("${tag}MhapMerSize");
+ my $javaPath = getGlobal("java");
+ my $bin = getBinDirectory();
+
+ print STDERR "--\n";
+ print STDERR "-- PARAMETERS: hashes=$numHashes, minMatches=$minNumMatches, threshold=$threshold\n";
+ print STDERR "--\n";
+
+ my $cmd = "$javaPath -d64 -server -Xmx4g -jar $bin/mhap-" . getGlobal("${tag}MhapVersion") . ".jar ";
+ $cmd .= " --no-self --repeat-weight 0.9 -k $merSize --num-hashes $numHashes --num-min-matches $minNumMatches --ordered-sketch-size $ordSketch --ordered-kmer-size $ordSketchMer --threshold $threshold --filter-threshold $filterThreshold --num-threads " . getGlobal("${tag}mhapThreads");
+ $cmd .= " -s $hash -q $query 2> /dev/null | awk '{if (\$1 != \$2+$sampleSize) { print \$0}}' | $bin/errorEstimate -d 2 -m 0.95 -S - > $out 2> $err";
+ runCommand($wrk, $cmd);
+}
+
+
+sub estimateRawError($$$$) {
+ my $WRK = shift @_; # Root work directory (the -d option to canu)
+ my $wrk = $WRK; # Local work directory
+ my $asm = shift @_;
+ my $tag = shift @_;
+ my $merSize = shift @_;
+ my $bin = getBinDirectory();
+ my $numReads = getNumberOfReadsInStore ($wrk, $asm);
+
+ goto allDone if (skipStage($WRK, $asm, "errorEstimate") == 1);
+ goto allDone if (-e "$wrk/asm.gkpStore/raw.estimate.out");
+ goto allDone if (getGlobal("errorrate") > 0);
+
+ my ($numHashes, $minNumMatches, $threshold, $ordSketch, $ordSketchMer);
+
+ $numHashes = 10000;
+ $minNumMatches = 3;
+ $threshold = 0.65;
+ $ordSketch = 10000;
+ $ordSketchMer = getGlobal("${tag}MhapOrderedMerSize");
+
+ # subsample raw reads
+ my $sampleSize = computeSampleSize($wrk, $asm, $tag, 0.01, undef);
+ $sampleSize /= 2;
+ my $cmd = "$bin/gatekeeperDumpFASTQ -G $wrk/$asm.gkpStore -nolibname -fasta -r 1-$sampleSize -o - > $wrk/$asm.gkpStore/subset.fasta 2> /dev/null";
+ runCommandSilently($wrk, $cmd, 1);
+ my $min = $numReads - $sampleSize + 1;
+ my $cmd = "$bin/gatekeeperDumpFASTQ -G $wrk/$asm.gkpStore -nolibname -fasta -r $min-$numReads -o - >> $wrk/$asm.gkpStore/subset.fasta 2> /dev/null";
+ runCommandSilently($wrk, $cmd, 1);
+ my $querySize = computeSampleSize($wrk, $asm, $tag, undef, 2);
+ my $cmd = "$bin/gatekeeperDumpFASTQ -G $wrk/$asm.gkpStore -nolibname -fasta -r 1-$querySize -o - > $wrk/$asm.gkpStore/reads.fasta 2> /dev/null";
+ runCommandSilently($wrk, $cmd, 1);
+
+ print STDERR "--\n";
+ print STDERR "-- ESTIMATOR (mhap) (raw) (hash sample size=". ($sampleSize*2) . ") (query sample size=$querySize)\n";
+ runMHAP($wrk, $tag, $numHashes, $minNumMatches, $threshold, $ordSketch, $ordSketchMer, $sampleSize*2, "$wrk/$asm.gkpStore/subset.fasta", "$wrk/$asm.gkpStore/reads.fasta", "$wrk/$asm.gkpStore/raw.estimate.out", "$wrk/$asm.gkpStore/raw.estimate.err");
+ unlink("$wrk/$asm.gkpStore/subset.fasta");
+ unlink("$wrk/$asm.gkpStore/reads.fasta");
+
+ allDone:
+ return 0.15 if (! -e "$wrk/$asm.gkpStore/raw.estimate.out");
+
+ my $errorRate = 0;
+ open(L, "< $wrk/$asm.gkpStore/raw.estimate.out") or caExit("can't open '$wrk/$asm.gkpStore/raw.estimate.out' for reading: $!", undef);
+ while (<L>) {
+ $errorRate = sprintf "%.3f", ($_ / 2);
+ $errorRate = 0.15 if ($errorRate <= 0.005);
+ }
+ close(L);
+
+ return $errorRate;
+}
+
+# Map subset of reads to long reads with mhap.
+# Compute resulting distribution and estimate error rate
+
+sub estimateCorrectedError ($$$) {
+ my $WRK = shift @_; # Root work directory (the -d option to canu)
+ my $wrk = $WRK; # Local work directory
+ my $asm = shift @_;
+ my $tag = shift @_;
+ my $bin = getBinDirectory();
+
+ $wrk = "$wrk/correction";
+
+ my $path = "$wrk/3-estimator";
+
+ # only run if we aren't done and were asked to
+ goto allDone if (skipStage($WRK, $asm, "errorEstimate") == 1);
+ goto allDone if (-e "$path/$asm.estimate.out");
+ goto allDone if (getGlobal("errorrate") > 0);
+
+ # Mhap parameters - filterThreshold needs to be a string, else it is printed as 5e-06.
+ #
+
+ my ($numHashes, $minNumMatches, $threshold, $ordSketch, $ordSketchMer);
+
+ $numHashes = 256;
+ $minNumMatches = 4;
+ $threshold = 0.85;
+ $ordSketch = 1000;
+ $ordSketchMer = getGlobal("${tag}MhapOrderedMerSize") + 2;
+
+ make_path("$path");
+
+ # subsample corrected reads, this assumes the fasta records are on a single line. We take some reads from the top and bottom of file to avoid sampling one library
+ my $sampleSize = computeSampleSize($wrk, $asm, $tag, 0.01, undef);
+ my $cmd = "gunzip -c $WRK/asm.correctedReads.fasta.gz |head -n $sampleSize > $path/subset.fasta";
+ runCommandSilently($path, $cmd, 1);
+ my $cmd = "gunzip -c $WRK/asm.correctedReads.fasta.gz |tail -n $sampleSize >> $path/subset.fasta";
+ runCommandSilently($path, $cmd, 1);
+ my $querySize = computeSampleSize($wrk, $asm, $tag, undef, 2);
+ my $cmd = "gunzip -c $WRK/asm.correctedReads.fasta.gz |head -n $querySize > $path/reads.fasta";
+ runCommandSilently($path, $cmd, 1);
+ my $cmd = "gunzip -c $WRK/asm.correctedReads.fasta.gz |tail -n $querySize >> $path/reads.fasta";
+ runCommandSilently($path, $cmd, 1);
+
+ # now compute the overlaps
+ print STDERR "--\n";
+ print STDERR "-- ESTIMATOR (mhap) (corrected) (hash sample size=$sampleSize) (query sample size=$querySize)\n";
+
+ runMHAP($wrk, $tag, $numHashes, $minNumMatches, $threshold, $ordSketch, $ordSketchMer, $sampleSize, "$path/subset.fasta", "$path/reads.fasta", "$path/$asm.estimate.out", "$path/$asm.estimate.err");
+ unlink("$path/subset.fasta");
+ unlink("$path/reads.fasta");
+ allDone:
+ return if (! -e "$path/$asm.estimate.out");
+
+ my $errorRate = 0;
+ open(L, "< $path/$asm.estimate.out") or caExit("can't open '$path/$asm.estimate.out' for reading: $!", undef);
+ while (<L>) {
+ $errorRate = sprintf "%.3f", ($_ / 2);
+ }
+ close(L);
+
+ print STDERR "-- \n";
+ if ($errorRate > 0.13) {
+ print STDERR "-- Estimated error rate: " . ($errorRate*100) . "% > " . (0.13 * 100) . "% limit, capping it.\n";
+ $errorRate = 0.13;
+ } elsif ($errorRate < 0.005) {
+ print STDERR "-- Estimated error rate: " . ($errorRate*100) . "%, increasing to " . (0.005 * 100). "%.\n";
+ $errorRate = 0.005;
+ } else {
+ print STDERR "-- Estimated error rate: " . ($errorRate * 100) . "%.\n";
+ }
+ setErrorRate($errorRate);
+ showErrorRates("-- ");
+ print STDERR "-- \n";
+}
diff --git a/src/pipelines/canu/Execution.pm b/src/pipelines/canu/Execution.pm
index 6534115..acbb55d 100644
--- a/src/pipelines/canu/Execution.pm
+++ b/src/pipelines/canu/Execution.pm
@@ -228,8 +228,10 @@ sub stopBefore ($$) {
if ((defined($stopBefore)) &&
(defined(getGlobal("stopBefore"))) &&
(getGlobal("stopBefore") eq $stopBefore)) {
+ print STDERR "\n";
print STDERR "Stop requested before '$stopBefore'.\n";
- print STDERR "Command:\n$cmd\n" if (defined($cmd));
+ print STDERR "\n";
+ print STDERR "Command:\n $cmd\n" if (defined($cmd));
exit(0);
}
}
@@ -906,29 +908,23 @@ sub buildMemoryOption ($$) {
$m /= $t;
}
+ if ((uc(getGlobal("gridEngine")) eq "SLURM") && (getGlobal("gridEngineMemoryOption") =~ m/mem-per-cpu/i)) {
+ $m /= $t;
+ }
+
+ if (int($m) != $m) {
+ $m = int($m * 1024);
+ $u = "m";
+ }
+
if (uc(getGlobal("gridEngine")) eq "LSF") {
- my $updated = 0;
- if (defined(getGlobal("gridEngineMemoryUnits"))) {
- if (getGlobal("gridEngineMemoryUnits") eq "t") {
- $m = $m / 1024;
- $updated++;
- } elsif (getGlobal("gridEngineMemoryUnits") eq "g") {
- $m = $m * 1;
- $updated++;
- } elsif (getGlobal("gridEngineMemoryUnits") eq "m") {
- $m = $m * 1024;
- $updated++;
- } elsif (getGlobal("gridEngineMemoryUnits") eq "k") {
- $m = $m * 1024 * 1024;
- $updated++;
- }
- }
- if ($updated == 0) {
- print STDERR "-- Warning: unknown memory units for grid engine " . getGlobal("gridEngine") . " assuming KB\n";
- $m = $m * 1024 * 1024;
- }
- $u = "";
+ $m = $m / 1024 if (getGlobal("gridEngineMemoryUnits") eq "t");
+ $m = $m * 1 if (getGlobal("gridEngineMemoryUnits") eq "g");
+ $m = $m * 1024 if (getGlobal("gridEngineMemoryUnits") eq "m");
+ $m = $m * 1024 * 1024 if (getGlobal("gridEngineMemoryUnits") eq "k");
+ $u = "";
}
+
$r = getGlobal("gridEngineMemoryOption");
$r =~ s/MEMORY/${m}${u}/g;
@@ -988,7 +984,7 @@ sub buildGridJob ($$$$$$$$) {
$gridOpts = getGlobal("gridOptions") if (defined(getGlobal("gridOptions")));
$gridOpts .= " " if (defined($gridOpts));
- $gridOpts = getGlobal("gridOptions$jobType") if (defined(getGlobal("gridOptions$jobType")));
+ $gridOpts .= getGlobal("gridOptions$jobType") if (defined(getGlobal("gridOptions$jobType")));
$gridOpts .= " " if (defined($gridOpts));
$gridOpts .= $memOption if (defined($memOption));
$gridOpts .= " " if (defined($gridOpts));
@@ -1027,6 +1023,8 @@ sub buildGridJob ($$$$$$$$) {
sub convertToJobRange (@) {
my @jobs;
+ # Expand the ranges into a simple list of job ids.
+
foreach my $j (@_) {
if ($j =~ m/^(\d+)-(\d+)$/) {
for (my $a=$1; $a<=$2; $a++) {
@@ -1041,10 +1039,14 @@ sub convertToJobRange (@) {
}
}
+ # Sort.
+
my @jobsA = sort { $a <=> $b } @jobs;
undef @jobs;
+ # Merge adjacent ids into a range.
+
my $st = $jobsA[0];
my $ed = $jobsA[0];
@@ -1062,6 +1064,33 @@ sub convertToJobRange (@) {
push @jobs, ($st == $ed) ? "$st" : "$st-$ed";
+
+ # If any of the ranges are larger than allowed, split into multiple pieces.
+
+ my $l = getGlobal("gridEngineArrayMaxJobs") - 1;
+
+ if ($l > 0) {
+ @jobsA = @jobs;
+ undef @jobs;
+
+ foreach my $j (@jobsA) {
+ if ($j =~ m/^(\d+)-(\d+)$/) {
+ my $b = $1;
+ my $e = $2;
+
+ while ($b <= $e) {
+ my $B = ($b + $l < $e) ? ($b + $l) : $e;
+ push @jobs, "$b-$B";
+ $b += $l + 1;
+ }
+ } else {
+ push @jobs, $j
+ }
+ }
+
+ undef @jobsA;
+ }
+
return(@jobs);
}
@@ -1100,6 +1129,10 @@ sub submitOrRunParallelJob ($$$$$@) {
#print STDERR "----------------------------------------GRIDSTART $t\n";
#print STDERR "$path/$script.sh with $mem gigabytes memory and $thr threads.\n";
+ # Check stopping rules.
+
+ stopBefore($jobType, "$path/$script.sh");
+
# Break infinite loops. If the grid jobs keep failing, give up after a few attempts.
#
# submitScript() passes canuIteration on to the next call.
@@ -1129,6 +1162,7 @@ sub submitOrRunParallelJob ($$$$$@) {
if (defined(getGlobal("gridEngine")) &&
(getGlobal("useGrid") eq "1") &&
+ (getGlobal("useGrid$jobType") eq "1") &&
(exists($ENV{getGlobal("gridEngineJobID")}))) {
my $cmd;
my $jobName;
@@ -1150,6 +1184,7 @@ sub submitOrRunParallelJob ($$$$$@) {
if (defined(getGlobal("gridEngine")) &&
(getGlobal("useGrid") ne "0") &&
+ (getGlobal("useGrid$jobType") eq "1") &&
(! exists($ENV{getGlobal("gridEngineJobID")}))) {
print STDERR "\n";
print STDERR "Please submit the following jobs to the grid for execution using $mem gigabytes memory and $thr threads:\n";
@@ -1190,9 +1225,19 @@ sub submitOrRunParallelJob ($$$$$@) {
}
}
- my $nParallel = getGlobal("${jobType}Concurrency");
- $nParallel = int(getNumberOfCPUs() / $thr) if ((!defined($nParallel)) || ($nParallel == 0));
- $nParallel = 1 if ((!defined($nParallel)) || ($nParallel == 0));
+
+ # compute limit based on # of cpus
+ my $nCParallel = getGlobal("${jobType}Concurrency");
+ $nCParallel = int(getGlobal("maxThreads") / $thr) if ((!defined($nCParallel)) || ($nCParallel == 0));
+ $nCParallel = 1 if ((!defined($nCParallel)) || ($nCParallel == 0));
+
+ # compute limit based on physical memory
+ my $nMParallel = getGlobal("${jobType}Concurrency");
+ $nMParallel = int(getGlobal("maxMemory") / getGlobal("${jobType}Memory")) if ((!defined($nMParallel)) || ($nMParallel == 0));
+ $nMParallel = 1 if ((!defined($nMParallel)) || ($nMParallel == 0));
+
+ # run min of our limits
+ my $nParallel = $nCParallel < $nMParallel ? $nCParallel : $nMParallel;
schedulerSetNumberOfProcesses($nParallel);
schedulerFinish($path);
@@ -1245,11 +1290,11 @@ sub reportRunError ($) {
print STDERR "ERROR:\n";
if ($rc == -1) {
- print STDERR "ERROR: Failed to run the command.\n";
+ print STDERR "ERROR: Failed to run the command. (rc=$rc)\n";
} elsif ($rc & 127) {
- print STDERR "ERROR: Failed with signal $signame[$rc&127].\n";
+ print STDERR "ERROR: Failed with signal $signame[$rc & 127]. (rc=$rc)\n";
} else {
- print STDERR "ERROR: Failed with exit code ", $rc >> 8 , ".\n";
+ print STDERR "ERROR: Failed with exit code ", $rc >> 8 , ". (rc=$rc)\n";
}
print STDERR "ERROR:\n";
diff --git a/src/pipelines/canu/Execution.txt b/src/pipelines/canu/Execution.txt
new file mode 100644
index 0000000..575df18
--- /dev/null
+++ b/src/pipelines/canu/Execution.txt
@@ -0,0 +1,85 @@
+
+
+Running Commands
+----------------
+
+Simple single commands can be run, and logged to the chatter output, with something like
+
+if (output-doesn't-exist) {
+ if (runCommand(directory, command)) {
+ caExit(command-failed-message, command.err)
+ }
+ do-steps-to-make-output-exist
+}
+
+If no chatter output is desired, runCommandSilently() can be used. Ideally in the same recipe as
+above, but usually it isn't guarded at all. This function will terminate ungracefully if the
+command fails.
+
+For jobs to be run on the grid, either in parallel or a single job, the function
+submitOrRunParallelJob() is used. This takes a job type (as in gridOptions{jobType}), a path and a
+script, and a list of numeric job IDs to run. The list can be formed of simple integers or ranges,
+or both (e.g., 1,2,3-9,10,12-99). Its use is straightforward, but the wrapper to make it
+work both for grid-based execution and local execution is non-trivial.
+
+
+
+
+useGrid=remote fails
+
+maxMemory / maxThreads
+minMemory / minThreads - means what?
+maxGridCores - sge "-tc N", slurm/pbs "-a 1-1000%50"
+
+
+
+
+A *Configure() function needs to prepare the job. Its product is a shell script
+to run the job.
+
+A *Check() function parses the shell script (usually) to find out which jobs to run,
+then submitOrRunParallelJob() to execute them. Each Check() function is called
+up to three times (for a MaxIteration=2) - the first two to actually try to compute
+and the last to fail.
+
+For executions not using the grid, the check function will run the jobs and fall through
+to the finishStage: clause, reporting the job finished and maybe generating some stats.
+If the job fails, it is retried, using the same flow.
+
+For executions using the grid, the check function breaks execution and checking into two grid jobs.
+A parallel job runs the compute, and a sequential job holds on the parallel job. The sequential job
+remembers canuIteration (by having it passed on the command line as a parameter). If the parallel
+jobs succeeded, it falls through to finishStage: as above. If they had failed, they are tried
+again.
+
+The flow of each *Check() function is:
+
+goto allDone if (outputs exist)
+
+decide if job outputs exist or not
+
+if (job outputs do not exist) {
+ if (attempt > 1) report job failed
+ if (attempt > max) report failed, caExit()
+ report starting an attempt
+ emitStage(check, $attempt)
+ buildHTML()
+ submitOrRun()
+ return # if not on grid, we need to call again to decide if job outputs exist
+}
+
+finishStage:
+
+report job finished successfully
+
+do any processing to make 'outputs exist' true above
+
+setGlobal(iteration, 0)
+emitStage()
+buildHTML()
+stopAfter()
+
+allDone:
+
+anything that runs after EVERY call
+
diff --git a/src/pipelines/canu/Gatekeeper.pm b/src/pipelines/canu/Gatekeeper.pm
index 2ad257c..80f627c 100644
--- a/src/pipelines/canu/Gatekeeper.pm
+++ b/src/pipelines/canu/Gatekeeper.pm
@@ -40,7 +40,7 @@ package canu::Gatekeeper;
require Exporter;
@ISA = qw(Exporter);
- at EXPORT = qw(getNumberOfReadsInStore getNumberOfBasesInStore getExpectedCoverage sequenceFileExists gatekeeper);
+ at EXPORT = qw(getMaxReadInStore getNumberOfReadsInStore getNumberOfBasesInStore getExpectedCoverage sequenceFileExists gatekeeper);
use strict;
@@ -57,6 +57,26 @@ sub storeExists ($$) {
}
+sub getMaxReadInStore ($$) {
+ my $wrk = shift @_; # Local work directory
+ my $asm = shift @_;
+ my $nr = 0;
+
+ # No file, no reads.
+
+ return($nr) if (! -e "$wrk/$asm.gkpStore/readlengthhistogram.txt");
+
+ # Read the info file. gatekeeperCreate creates this at the end.
+
+ open(F, "< $wrk/$asm.gkpStore/readlengthhistogram.txt") or caExit("can't open '$wrk/$asm.gkpStore/readlengthhistogram.txt' for reading: $!", undef);
+ while (<F>) {
+ my @v = split '\s+', $_;
+ $nr = $v[1];
+ }
+ close(F);
+
+ return($nr);
+}
sub getNumberOfReadsInStore ($$) {
my $wrk = shift @_; # Local work directory
diff --git a/src/pipelines/canu/Grid_LSF.pm b/src/pipelines/canu/Grid_LSF.pm
index 2ce402a..b287d7c 100644
--- a/src/pipelines/canu/Grid_LSF.pm
+++ b/src/pipelines/canu/Grid_LSF.pm
@@ -64,6 +64,7 @@ sub configureLSF () {
setGlobalIfUndef("gridEngineNameOption", "-J");
setGlobalIfUndef("gridEngineArrayOption", "");
setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME\[ARRAY_JOBS\]");
+ setGlobalIfUndef("gridEngineArrayMaxJobs", 65535);
setGlobalIfUndef("gridEngineOutputOption", "-o");
setGlobalIfUndef("gridEngineThreadsOption", "-n THREADS");
setGlobalIfUndef("gridEngineMemoryOption", "-M MEMORY");
@@ -79,8 +80,11 @@ sub configureLSF () {
# They are defined by the LSF_UNIT_FOR_LIMITS variable in lsf.conf
# Poll and see if we can find it
#
+
my $memUnits = undef;
+
open(F, "lsadmin showconf lim |");
+
my $s = <F>; # cluster name
my $d = <F>; # dat/time
@@ -96,6 +100,11 @@ sub configureLSF () {
close(F);
+ if (!defined($memUnits)) {
+ print STDERR "-- Warning: unknown memory units for grid engine LSF assuming KB\n";
+ $memUnits = "k";
+ }
+
# Build a list of the resources available in the grid. This will contain a list with keys
# of "#CPUs-#GBs" and values of the number of nodes With such a config. Later on, we'll use this
# to figure out what specific settings to use for each algorithm.
diff --git a/src/pipelines/canu/Grid_PBSTorque.pm b/src/pipelines/canu/Grid_PBSTorque.pm
index f434155..a3dbb9b 100644
--- a/src/pipelines/canu/Grid_PBSTorque.pm
+++ b/src/pipelines/canu/Grid_PBSTorque.pm
@@ -57,22 +57,23 @@ sub configurePBSTorque () {
return if (uc(getGlobal("gridEngine")) ne "PBS");
- setGlobalIfUndef("gridEngineSubmitCommand", "qsub");
- setGlobalIfUndef("gridEngineHoldOption", "-W depend=afteranyarray:WAIT_TAG");
- setGlobalIfUndef("gridEngineHoldOptionNoArray", "-W depend=afterany:WAIT_TAG");
- setGlobalIfUndef("gridEngineSyncOption", "");
- setGlobalIfUndef("gridEngineNameOption", "-d `pwd` -N");
- setGlobalIfUndef("gridEngineArrayOption", "-t ARRAY_JOBS");
- setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME");
- setGlobalIfUndef("gridEngineOutputOption", "-j oe -o");
- setGlobalIfUndef("gridEngineThreadsOption", "-l nodes=1:ppn=THREADS");
- setGlobalIfUndef("gridEngineMemoryOption", "-l mem=MEMORY");
- setGlobalIfUndef("gridEnginePropagateCommand", "qalter -W depend=afterany:\"WAIT_TAG\"");
- setGlobalIfUndef("gridEngineNameToJobIDCommand", "qstat -f |grep -F -B 1 WAIT_TAG | grep Id: | grep -F [] |awk '{print \$NF}'");
- setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "qstat -f |grep -F -B 1 WAIT_TAG | grep Id: |awk '{print \$NF}'");
- setGlobalIfUndef("gridEngineTaskID", "PBS_ARRAYID");
- setGlobalIfUndef("gridEngineArraySubmitID", "\\\$PBS_ARRAYID");
- setGlobalIfUndef("gridEngineJobID", "PBS_JOBID");
+ setGlobalIfUndef("gridEngineSubmitCommand", "qsub");
+ setGlobalIfUndef("gridEngineHoldOption", "-W depend=afteranyarray:WAIT_TAG");
+ setGlobalIfUndef("gridEngineHoldOptionNoArray", "-W depend=afterany:WAIT_TAG");
+ setGlobalIfUndef("gridEngineSyncOption", "");
+ setGlobalIfUndef("gridEngineNameOption", "-d `pwd` -N");
+ setGlobalIfUndef("gridEngineArrayOption", "-t ARRAY_JOBS");
+ setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME");
+ setGlobalIfUndef("gridEngineArrayMaxJobs", 65535);
+ setGlobalIfUndef("gridEngineOutputOption", "-j oe -o");
+ setGlobalIfUndef("gridEngineThreadsOption", "-l nodes=1:ppn=THREADS");
+ setGlobalIfUndef("gridEngineMemoryOption", "-l mem=MEMORY");
+ setGlobalIfUndef("gridEnginePropagateCommand", "qalter -W depend=afterany:\"WAIT_TAG\"");
+ setGlobalIfUndef("gridEngineNameToJobIDCommand", "qstat -f |grep -F -B 1 WAIT_TAG | grep Id: | grep -F [] |awk '{print \$NF}'");
+ setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "qstat -f |grep -F -B 1 WAIT_TAG | grep Id: |awk '{print \$NF}'");
+ setGlobalIfUndef("gridEngineTaskID", "PBS_ARRAYID");
+ setGlobalIfUndef("gridEngineArraySubmitID", "\\\$PBS_ARRAYID");
+ setGlobalIfUndef("gridEngineJobID", "PBS_JOBID");
# Build a list of the resources available in the grid. This will contain a list with keys
# of "#CPUs-#GBs" and values of the number of nodes With such a config. Later on, we'll use this
diff --git a/src/pipelines/canu/Grid_SGE.pm b/src/pipelines/canu/Grid_SGE.pm
index 3fd10b9..777f252 100644
--- a/src/pipelines/canu/Grid_SGE.pm
+++ b/src/pipelines/canu/Grid_SGE.pm
@@ -54,6 +54,16 @@ sub configureSGE () {
return if (uc(getGlobal("gridEngine")) ne "SGE");
+ my $maxArraySize = 65535;
+
+ open(F, "qconf -sconf |") or caExit("can't run 'qconf' to get SGE config", undef);
+ while (<F>) {
+ if (m/max_aj_tasks\s+(\d+)/) {
+ $maxArraySize = $1;
+ }
+ }
+ close(F);
+
setGlobalIfUndef("gridEngineSubmitCommand", "qsub");
setGlobalIfUndef("gridEngineHoldOption", "-hold_jid \"WAIT_TAG\"");
setGlobalIfUndef("gridEngineHoldOptionNoArray", undef);
@@ -61,6 +71,7 @@ sub configureSGE () {
setGlobalIfUndef("gridEngineNameOption", "-cwd -N");
setGlobalIfUndef("gridEngineArrayOption", "-t ARRAY_JOBS");
setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME");
+ setGlobalIfUndef("gridEngineArrayMaxJobs", $maxArraySize);
setGlobalIfUndef("gridEngineOutputOption", "-j y -o");
setGlobalIfUndef("gridEnginePropagateCommand", "qalter -hold_jid \"WAIT_TAG\"");
setGlobalIfUndef("gridEngineThreadsOption", undef); #"-pe threads THREADS");
diff --git a/src/pipelines/canu/Grid_Slurm.pm b/src/pipelines/canu/Grid_Slurm.pm
index fb4a1d1..dedd058 100644
--- a/src/pipelines/canu/Grid_Slurm.pm
+++ b/src/pipelines/canu/Grid_Slurm.pm
@@ -57,22 +57,33 @@ sub configureSlurm () {
return if (uc(getGlobal("gridEngine")) ne "SLURM");
- setGlobalIfUndef("gridEngineSubmitCommand", "sbatch");
- setGlobalIfUndef("gridEngineHoldOption", "--depend=afterany:WAIT_TAG");
- setGlobalIfUndef("gridEngineHoldOptionNoArray", "--depend=afterany:WAIT_TAG");
- setGlobalIfUndef("gridEngineSyncOption", ""); ## TODO: SLURM may not support w/out wrapper; See LSF bsub manpage to compare
- setGlobalIfUndef("gridEngineNameOption", "-D `pwd` -J");
- setGlobalIfUndef("gridEngineArrayOption", "-a ARRAY_JOBS");
- setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME");
- setGlobalIfUndef("gridEngineOutputOption", "-o"); ## NB: SLURM default joins STDERR & STDOUT if no -e specified
- setGlobalIfUndef("gridEngineThreadsOption", "--cpus-per-task=THREADS");
- setGlobalIfUndef("gridEngineMemoryOption", "--mem=MEMORY");
- setGlobalIfUndef("gridEnginePropagateCommand", "scontrol update job=\"WAIT_TAG\""); ## TODO: manually verify this in all cases
- setGlobalIfUndef("gridEngineNameToJobIDCommand", "squeue -h -o\%F -n \"WAIT_TAG\" | uniq"); ## TODO: manually verify this in all cases
- setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "squeue -h -o\%i -n \"WAIT_TAG\""); ## TODO: manually verify this in all cases
- setGlobalIfUndef("gridEngineTaskID", "SLURM_ARRAY_TASK_ID");
- setGlobalIfUndef("gridEngineArraySubmitID", "%A_%a");
- setGlobalIfUndef("gridEngineJobID", "SLURM_JOB_ID");
+ my $maxArraySize = 65535;
+
+ open(F, "scontrol show config |") or caExit("can't run 'scontrol' to get SLURM config", undef);
+ while (<F>) {
+ if (m/MaxArraySize\s+=\s+(\d+)/) {
+ $maxArraySize = $1;
+ }
+ }
+ close(F);
+
+ setGlobalIfUndef("gridEngineSubmitCommand", "sbatch");
+ setGlobalIfUndef("gridEngineHoldOption", "--depend=afterany:WAIT_TAG");
+ setGlobalIfUndef("gridEngineHoldOptionNoArray", "--depend=afterany:WAIT_TAG");
+ setGlobalIfUndef("gridEngineSyncOption", ""); ## TODO: SLURM may not support w/out wrapper; See LSF bsub manpage to compare
+ setGlobalIfUndef("gridEngineNameOption", "-D `pwd` -J");
+ setGlobalIfUndef("gridEngineArrayOption", "-a ARRAY_JOBS");
+ setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME");
+ setGlobalIfUndef("gridEngineArrayMaxJobs", $maxArraySize);
+ setGlobalIfUndef("gridEngineOutputOption", "-o"); ## NB: SLURM default joins STDERR & STDOUT if no -e specified
+ setGlobalIfUndef("gridEngineThreadsOption", "--cpus-per-task=THREADS");
+ setGlobalIfUndef("gridEngineMemoryOption", "--mem-per-cpu=MEMORY");
+ setGlobalIfUndef("gridEnginePropagateCommand", "scontrol update job=\"WAIT_TAG\""); ## TODO: manually verify this in all cases
+ setGlobalIfUndef("gridEngineNameToJobIDCommand", "squeue -h -o\%F -n \"WAIT_TAG\" | uniq"); ## TODO: manually verify this in all cases
+ setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "squeue -h -o\%i -n \"WAIT_TAG\""); ## TODO: manually verify this in all cases
+ setGlobalIfUndef("gridEngineTaskID", "SLURM_ARRAY_TASK_ID");
+ setGlobalIfUndef("gridEngineArraySubmitID", "%A_%a");
+ setGlobalIfUndef("gridEngineJobID", "SLURM_JOB_ID");
# Build a list of the resources available in the grid. This will contain a list with keys
diff --git a/src/pipelines/canu/HTML.pm b/src/pipelines/canu/HTML.pm
index 43c2608..b7d4872 100644
--- a/src/pipelines/canu/HTML.pm
+++ b/src/pipelines/canu/HTML.pm
@@ -32,34 +32,62 @@ require Exporter;
use strict;
+use File::Copy;
+use File::Path qw(make_path remove_tree);
+
use canu::Defaults;
use canu::Execution;
-sub simpleFigure ($$$) {
- my $body = shift @_;
- my $image = shift @_;
- my $text = shift @_;
+sub copyFile ($$) {
+ my $sPath = shift @_; # Path to source file.
+ my $dPath = shift @_; # Path to destination file.
- if ((! -e "$image.sm.png") && (! -e "$image.lg.png")) {
- push @$body, "<p>Image '$image' not found.</p>\n";
+ if ((-e $sPath) &&
+ ((! -e $dPath) ||
+ ((-M $sPath) < (-M $dPath)))) {
+ copy($sPath, $dPath);
}
+}
+
+
- elsif ((-z "$image.sm.png") || (-z "$image.lg.png")) {
- push @$body, "<p>Image '$image' is empty. Probably no data to display.</p>\n";
+sub simpleFigure ($$$$) {
+ my $body = shift @_;
+ my $sImage = shift @_;
+ my $dImage = shift @_;
+ my $text = shift @_;
+
+ # No image? Note so in the html.
+
+ if ((! -e "$sImage.sm.png") && (! -e "$sImage.lg.png") &&
+ (! -e "$dImage.sm.png") && (! -e "$dImage.lg.png")) {
+ push @$body, "<p>Image '$sImage' not found.</p>\n";
+ return;
}
- else {
- push @$body, "<figure>\n";
- push @$body, "<a href='$image.lg.png'><img src='$image.sm.png'></a>\n";
- push @$body, "<figcaption>\n";
- push @$body, "$text\n";
- push @$body, "</figcaption>\n";
- push @$body, "</figure>\n";
+ # Copy the file to our files location.
+
+ copyFile("$sImage.lg.png", "$dImage.lg.png");
+ copyFile("$sImage.sm.png", "$dImage.sm.png");
+
+ # Empty image? Note so in the html.
+
+ if ((-z "$dImage.sm.png") || (-z "$dImage.lg.png")) {
+ push @$body, "<p>Image '$sImage' is empty. Probably no data to display.</p>\n";
+ return;
}
-}
+ # Otherwise, show it!
+
+ push @$body, "<figure>\n";
+ push @$body, "<a href='$dImage.lg.png'><img src='$dImage.sm.png'></a>\n";
+ push @$body, "<figcaption>\n";
+ push @$body, "$text\n";
+ push @$body, "</figcaption>\n";
+ push @$body, "</figure>\n";
+}
@@ -167,7 +195,10 @@ sub buildGatekeeperHTML ($$$$$$) {
close(F);
push @$body, "<h3>Read Length Histogram</h3>\n";
- push @$body, "<a href='$wrk/$asm.gkpStore/readlengths.lg.png'><img src='$wrk/$asm.gkpStore/readlengths.sm.png'></img></a>\n";
+ simpleFigure($body,
+ "$wrk/$asm.gkpStore/readlengths",
+ "$wrk.html.files/readlengths",
+ "");
}
@@ -213,14 +244,10 @@ sub buildMerylHTML ($$$$$$) {
}
close(F);
- if (-e "$wrk/0-mercounts/$asm.ms$ms.histogram.sm.png") {
- push @$body, "<figure>\n";
- push @$body, "<a href='$wrk/0-mercounts/$asm.ms$ms.histogram.lg.png'><img src='$wrk/0-mercounts/$asm.ms$ms.histogram.sm.png'></a>\n";
- push @$body, "<figcaption>\n";
- push @$body, "Histogram for k=$ms with $numTotal mers, $numDistinct distinct mers and $numUnique single-copy mers. Largest count is $largest.\n";
- push @$body, "</figcaption>\n";
- push @$body, "</figure>\n";
- }
+ simpleFigure($body,
+ "$wrk/0-mercounts/$asm.ms$ms.histogram",
+ "$wrk.html.files/$asm.ms$ms.histogram",
+ "Histogram for k=$ms with $numTotal mers, $numDistinct distinct mers and $numUnique single-copy mers. Largest count is $largest.");
}
elsif ((-e "$wrk/0-mercounts/$asm.ms$ms.ignore") && (-z "$wrk/0-mercounts/$asm.ms$ms.ignore")) {
@@ -366,6 +393,7 @@ sub buildCorrectionHTML ($$$$$$) {
simpleFigure($body,
"$wrk/2-correction/$asm.estimate.original-x-corrected",
+ "correction.html.files/$asm.estimate.original-x-corrected",
"Scatter plot of the original read length (X axis) against the expected corrected read length (Y axis).\n" .
"Colors show a comparison of the simple filter (which doesn't use overlaps) to the expensive filter (which does).\n" .
"A large green triangle (false negatives) hints that there could be abnormally low quality regions in the reads.\n");
@@ -375,20 +403,24 @@ sub buildCorrectionHTML ($$$$$$) {
# Original vs expected shown above.
simpleFigure($body,
"$wrk/2-correction/$asm.originalLength-vs-expectedLength",
+ "correction.html.files/$asm.originalLength-vs-expectedLength",
"Scatter plot of original vs expected read length. Shown in filter plot above.");
simpleFigure($body,
"$wrk/2-correction/$asm.originalLength-vs-correctedLength",
+ "correction.html.files/$asm.originalLength-vs-correctedLength",
"Scatter plot of original vs corrected read length.");
simpleFigure($body,
"$wrk/2-correction/$asm.expectedLength-vs-correctedLength",
+ "correction.html.files/$asm.expectedLength-vs-correctedLength",
"Scatter plot of expected vs corrected read length.");
# Histogram - expected vs corrected lengths NEEDS TO SHOW NEGATIVES!?
simpleFigure($body,
"$wrk/2-correction/$asm.length-difference-histograms",
+ "correction.html.files/$asm.length-difference-histograms",
"Histogram of the difference between the expected and corrected read lengths.\n" .
"Note that a negative difference means the corrected read is larger than expected.\n");
@@ -396,6 +428,7 @@ sub buildCorrectionHTML ($$$$$$) {
simpleFigure($body,
"$wrk/2-correction/$asm.length-histograms",
+ "correction.html.files/$asm.length-histograms",
"Histogram of original (red), expected (green) and actual corrected (blue) read lengths.\n");
}
@@ -465,15 +498,15 @@ sub buildTrimmingHTML ($$$$$$) {
push @$body, "<p>Stage not computed or results file removed ($wrk/3-overlapbasedtrimming/$asm.1.trimReads.stats).</p>\n";
}
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.inputDeletedReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.inputNoTrimReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.inputReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputDeletedReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputNoOvlReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputTrimmedReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputUnchangedReads", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.trim3", "");
- simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.trim5", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.inputDeletedReads", "trimming.html.files/$asm.1.trimReads.inputDeletedReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.inputNoTrimReads", "trimming.html.files/$asm.1.trimReads.inputNoTrimReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.inputReads", "trimming.html.files/$asm.1.trimReads.inputReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputDeletedReads", "trimming.html.files/$asm.1.trimReads.outputDeletedReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputNoOvlReads", "trimming.html.files/$asm.1.trimReads.outputNoOvlReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputTrimmedReads", "trimming.html.files/$asm.1.trimReads.outputTrimmedReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.outputUnchangedReads", "trimming.html.files/$asm.1.trimReads.outputUnchangedReads", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.trim3", "trimming.html.files/$asm.1.trimReads.trim3", "");
+ simpleFigure($body, "$wrk/3-overlapbasedtrimming/$asm.1.trimReads.trim5", "trimming.html.files/$asm.1.trimReads.trim5", "");
push @$body, "<h2>Splitting</h2>\n";
push @$body, "\n";
@@ -569,9 +602,9 @@ sub buildOverlapperHTML ($$$$$$) {
}
push @$body, "<table>\n";
- push @$body, "<tr><th>Category</th><th>Reads</th><th colspan='3'>Read Length</th><th colspan='3'>Feature Size or Coverage</th><th>Analysis</th></tr>\n";
+ push @$body, "<tr><th>Category</th><th>Reads</th><th>%</th><th colspan='3'>Read Length</th><th colspan='3'>Feature Size or Coverage</th><th>Analysis</th></tr>\n";
- my ($category, $reads, $length, $lengthsd, $size, $sizesd, $analysis);
+ my ($category, $reads, $readsP, $length, $lengthsd, $size, $sizesd, $analysis);
open(F, "< $wrk/$asm.ovlStore.summary") or caExit("Failed to open overlap store statistics in '$wrk/$asm.ovlStore': $!", undef);
$_ = <F>;
@@ -581,25 +614,27 @@ sub buildOverlapperHTML ($$$$$$) {
next if ($_ eq "");
- if (m/(.*)\s+(\d+)\s+(\d+.\d+)\s+\+-\s+(\d+.\d+)\s+(\d+.\d+)\s+\+-\s+(\d+.\d+)\s+\((.*)\)$/) {
+ if (m/(.*)\s+(\d+)\s+(\d+.\d+)\s+(\d+.\d+)\s+\+-\s+(\d+.\d+)\s+(\d+.\d+)\s+\+-\s+(\d+.\d+)\s+\((.*)\)$/) {
$category = $1;
$reads = $2;
- $length = $3;
- $lengthsd = $4;
- $size = $5;
- $sizesd = $6;
- $analysis = $7;
- push @$body, "<tr><td>$category</td><td>$reads</td><td align='right'>$length</td><td>±</td><td align='left'>$lengthsd</td><td align='right'>$size</td><td>±</td><td align='left'>$sizesd</td><td align='left'>$analysis</td></tr>\n";
-
- } elsif (m/(.*)\s+(\d+)\s+(\d+.\d+)\s+\+-\s+(\d+.\d+)\s+\((.*)\)$/) {
+ $readsP = $3;
+ $length = $4;
+ $lengthsd = $5;
+ $size = $6;
+ $sizesd = $7;
+ $analysis = $8;
+ push @$body, "<tr><td>$category</td><td>$reads</td><td>$readsP</td><td align='right'>$length</td><td>±</td><td align='left'>$lengthsd</td><td align='right'>$size</td><td>±</td><td align='left'>$sizesd</td><td align='left'>$analysis</td></tr>\n";
+
+ } elsif (m/(.*)\s+(\d+)\s+(\d+.\d+)\s+(\d+.\d+)\s+\+-\s+(\d+.\d+)\s+\((.*)\)$/) {
$category = $1;
$reads = $2;
- $length = $3;
- $lengthsd = $4;
+ $readsP = $3;
+ $length = $4;
+ $lengthsd = $5;
$size = undef;
$sizesd = undef;
- $analysis = $5;
- push @$body, "<tr><td>$category</td><td>$reads</td><td align='right'>$length</td><td>±</td><td align-'left'>$lengthsd</td><td></td><td></td><td></td><td align='left'>$analysis</td></tr>\n";
+ $analysis = $6;
+ push @$body, "<tr><td>$category</td><td>$reads</td><td>$readsP</td><td align='right'>$length</td><td>±</td><td align-'left'>$lengthsd</td><td></td><td></td><td></td><td align='left'>$analysis</td></tr>\n";
} else {
chomp;
@@ -667,18 +702,17 @@ sub buildOutputHTML ($$$$$$) {
sub buildHTML ($$$) {
my $WRK = shift @_; # Root work directory (the -d option to canu)
my $wrk = $WRK; # Local work directory
- my $dir;
my $asm = shift @_;
my $tag = shift @_;
my @css;
my @body;
my @scripts;
- $dir = "correction" if ($tag eq "cor");
- $dir = "trimming" if ($tag eq "obt");
- $dir = "unitigging" if ($tag eq "utg");
+ $wrk = "$WRK/correction" if ($tag eq "cor");
+ $wrk = "$WRK/trimming" if ($tag eq "obt");
+ $wrk = "$WRK/unitigging" if ($tag eq "utg");
- $wrk = "$WRK/$dir";
+ make_path("$wrk.html.files") if (! -e "$wrk.html.files");
# For correction runs
if ($tag eq "cor") {
@@ -713,7 +747,7 @@ sub buildHTML ($$$) {
#print STDERR "WRITING '$wrk/$asm-summary.html'\n";
- open(F, "> $WRK/$dir.html") or die "can't open '$WRK/$dir.html' for writing: $!\n";
+ open(F, "> $wrk.html") or die "can't open '$wrk.html' for writing: $!\n";
print F "<!DOCTYPE html>\n";
print F "\n";
diff --git a/src/pipelines/canu/Meryl.pm b/src/pipelines/canu/Meryl.pm
index 3bc13b2..596fa36 100644
--- a/src/pipelines/canu/Meryl.pm
+++ b/src/pipelines/canu/Meryl.pm
@@ -48,10 +48,10 @@ use File::Path qw(make_path);
use canu::Defaults;
use canu::Execution;
+use canu::Gatekeeper;
+use canu::ErrorEstimate;
use canu::HTML;
-
-
sub getGenomeCoverage($$$) {
my $wrk = shift @_;
my $asm = shift @_;
@@ -206,8 +206,8 @@ sub merylParameters ($$$) {
$merDistinct = undef;
$merTotal = undef;
- $ffile = "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore"; # The mhap-specific file we should be creating (ends in IGNORE).
- $ofile = "$wrk/0-mercounts/$asm.ms$merSize"; # The meryl database 'intermediate file'.
+ $ffile = "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore.gz"; # The mhap-specific file we should be creating (ends in IGNORE).
+ $ofile = "$wrk/0-mercounts/$asm.ms$merSize"; # The meryl database 'intermediate file'.
} elsif (getGlobal("${tag}Overlapper") eq "minimap") {
# do nothing
@@ -501,6 +501,7 @@ sub merylProcess ($$$) {
(! -e $ffile)) {
my $totalMers = 0;
+ my $maxCount = 0;
# Meryl reports number of distinct canonical mers, we multiply by two to get the
# (approximate) number of distinct mers. Palindromes are counted twice, oh well.
@@ -510,16 +511,35 @@ sub merylProcess ($$$) {
if (m/Found\s+(\d+)\s+mers./) {
$totalMers = 2 * $1;
}
+ if (m/Largest\s+mercount\s+is\s+(\d+)./) {
+ $maxCount = $1;
+ }
}
close(F);
-
caFailure("didn't find any mers?", "$ofile.histogram.info") if ($totalMers == 0);
my $filterThreshold = (getGlobal("${tag}MhapSensitivity") eq "normal") ? getGlobal("${tag}MhapFilterThreshold") : getGlobal("${tag}MhapFilterThreshold"); # Also set in Meryl.pm
- my $minCount = int($filterThreshold * $totalMers);
+
+ my $misRate = 0.1;
+ my $minCount = defined(getGlobal("${tag}MhapFilterUnique")) ? uniqueKmerThreshold($wrk, $asm, $merSize, $misRate)+1 : int($filterThreshold * $totalMers);
+ my $totalToOutput = 0;
+ my $totalFiltered = 0;
+ open(F, "< $ofile.histogram") or die "Failed to open '$ofile.histogram' for reading: $!\n";
+ while (<F>) {
+ my ($kCount, $occurences, $cumsum, $faction) = split '\s+', $_;
+ if ($kCount < $minCount) {
+ $totalFiltered = $cumsum * 100;
+ }
+ if ($kCount >= $minCount) {
+ $totalToOutput += $occurences;
+ }
+ }
+ close(F);
+ $totalToOutput *= 2; # for the reverse complement
open(F, "$bin/meryl -Dt -n $minCount -s $ofile | ") or die "Failed to run meryl to generate frequent mers $!\n";
- open(O, "> $ofile.frequentMers.ignore") or die "Failed to open '$ofile.frequentMers.mhap_ignore' for writing: $!\n";
+ open(O, "| gzip -c > $ofile.frequentMers.ignore.gz") or die "Failed to open '$ofile.frequentMers.ignore.gz' for writing: $!\n";
+ printf(O "%d\n", $totalToOutput);
while (!eof(F)) {
my $h = <F>;
@@ -529,13 +549,17 @@ sub merylProcess ($$$) {
$r =~ tr/ACGTacgt/TGCAtgca/;
if ($h =~ m/^>(\d+)/) {
- printf(O "%s\t%.16f\t$1\t$totalMers\n", $m, $1 / $totalMers);
- printf(O "%s\t%.16f\t$1\t$totalMers\n", $r, $1 / $totalMers);
+ printf(O "%s\t%e\n", $m, $1 / $totalMers);
+ printf(O "%s\t%e\n", $r, $1 / $totalMers);
}
}
-
close(O);
close(F);
+
+ if (defined(getGlobal("${tag}MhapFilterUnique"))) {
+ printf STDERR "-- For %s overlapping, filtering low-occurence k-mers < %d (%.2f\%) based on estimated error of %.2f\%.\n", getGlobal("${tag}Overlapper"), $minCount, $totalFiltered, 100*estimateRawError($wrk, $asm, $tag, $merSize);
+ }
+ printf STDERR "-- For %s overlapping, set repeat k-mer threshold to %d.\n", getGlobal("${tag}Overlapper"), int($filterThreshold * $totalMers);
}
# Report the new threshold.
diff --git a/src/pipelines/canu/Output.pm b/src/pipelines/canu/Output.pm
index d6d9563..e61911f 100644
--- a/src/pipelines/canu/Output.pm
+++ b/src/pipelines/canu/Output.pm
@@ -100,13 +100,13 @@ sub outputGraph ($$) {
goto allDone if (skipStage($WRK, $asm, "outputGraph") == 1);
goto allDone if (-e "$WRK/$asm.gfa");
- if (-e "$wrk/4-unitigger/$asm.unused.best.edges") {
+ if (-e "$wrk/4-unitigger/$asm.unused.edges") {
$cmd = "$bin/buildGraph \\\n";
$cmd .= " -G $wrk/$asm.gkpStore \\\n";
$cmd .= " -T $wrk/$asm.tigStore 2 \\\n";
- $cmd .= " -E $wrk/4-unitigger/$asm.unused.best.edges \\\n";
- $cmd .= "> $WRK/$asm.gfa \\\n";
- $cmd .= "2> $WRK/$asm.gfa.err\n";
+ $cmd .= " -E $wrk/4-unitigger/$asm.unused.edges \\\n";
+ $cmd .= " -o $WRK/$asm.gfa \\\n";
+ $cmd .= "2>&1 > $WRK/$asm.gfa.err\n";
if (runCommand($wrk, $cmd)) {
caExit("failed to output consensus", "$WRK/$asm.gfa.err");
diff --git a/src/pipelines/canu/OverlapBasedTrimming.pm b/src/pipelines/canu/OverlapBasedTrimming.pm
index e16ae7a..c67f85f 100644
--- a/src/pipelines/canu/OverlapBasedTrimming.pm
+++ b/src/pipelines/canu/OverlapBasedTrimming.pm
@@ -177,7 +177,8 @@ sub dumpReads ($$) {
my $inp;
goto allDone if (skipStage($WRK, $asm, "obt-dumpReads") == 1);
- goto allDone if (-e "$WRK/$asm.trimmedReads.fastq");
+ goto allDone if (-e "$WRK/$asm.trimmedReads.fasta");
+ goto allDone if (-e "$WRK/$asm.trimmedReads.fasta.gz");
make_path($path) if (! -d $path);
@@ -186,10 +187,10 @@ sub dumpReads ($$) {
caFailure("dumping trimmed reads failed; no 'clear' input", "$WRK/$asm.trimmedReads.err") if (!defined($inp));
- $cmd = "$bin/gatekeeperDumpFASTQ -nolibname \\\n";
+ $cmd = "$bin/gatekeeperDumpFASTQ -fasta -nolibname \\\n";
$cmd .= " -G $wrk/$asm.gkpStore \\\n";
$cmd .= " -c $inp \\\n";
- $cmd .= " -o $WRK/$asm.trimmedReads \\\n";
+ $cmd .= " -o $WRK/$asm.trimmedReads.gz \\\n";
$cmd .= "> $WRK/$asm.trimmedReads.err 2>&1";
if (runCommand($wrk, $cmd)) {
@@ -207,5 +208,5 @@ sub dumpReads ($$) {
allDone:
print STDERR "--\n";
- print STDERR "-- Trimmed reads saved in '$WRK/$asm.trimmedReads.fastq'\n";
+ print STDERR "-- Trimmed reads saved in '$WRK/$asm.trimmedReads.fasta.gz'\n";
}
diff --git a/src/pipelines/canu/OverlapErrorAdjustment.pm b/src/pipelines/canu/OverlapErrorAdjustment.pm
index dcfea5a..4316102 100644
--- a/src/pipelines/canu/OverlapErrorAdjustment.pm
+++ b/src/pipelines/canu/OverlapErrorAdjustment.pm
@@ -27,6 +27,10 @@
# are a 'United States Government Work', and
# are released in the public domain
#
+ # Sergey Koren beginning on 2016-MAR-27
+ # are a 'United States Government Work', and
+ # are released in the public domain
+ #
# File 'README.licenses' in the root directory of this distribution contains
# full conditions and disclaimers for each license.
##
@@ -135,6 +139,10 @@ sub readErrorDetectionConfigure ($$) {
my $maxReads = getGlobal("redBatchSize");
my $maxBases = getGlobal("redBatchLength");
+ print STDERR "\n";
+ print STDERR "Configure RED for ", getGlobal("redMemory"), "gb memory with batches of at most ", ($maxReads > 0) ? $maxReads : "(unlimited)", " reads and ", ($maxBases > 0) ? $maxBases : "(unlimited)", " bases.\n";
+ print STDERR "\n";
+
my $reads = 0;
my $bases = 0;
my $olaps = 0;
@@ -159,11 +167,12 @@ sub readErrorDetectionConfigure ($$) {
(($id == $maxID - 1))) {
push @end, $id;
- #printf(STDERR "RED job %3u from read %9u to read %9u using %7.3f GB for %7u reads, %7.3f GB for %9u olaps and %7.3f GB for evidence\n",
- # $nj + 1, $bgn[$nj], $end[$nj], $reads,
- # 13 * $bases / 1024 / 1024 / 1024, $bases,
- # 12 * $olaps / 1024 / 1024 / 1024, $olaps,
- # 2 * $bases * $coverage / 1024 / 1024 / 1024);
+ printf(STDERR "RED job %3u from read %9u to read %9u - %7.3f GB for %7u reads - %7.3f GB for %9u olaps - %7.3f GB for evidence\n",
+ $nj + 1, $bgn[$nj], $end[$nj],
+ $memory / 1024 / 1024 / 1024, $reads,
+ 13 * $bases / 1024 / 1024 / 1024, $bases,
+ 12 * $olaps / 1024 / 1024 / 1024, $olaps,
+ 2 * $bases * $coverage / 1024 / 1024 / 1024);
$nj++;
@@ -377,32 +386,54 @@ sub overlapErrorAdjustmentConfigure ($$) {
my $maxReads = getGlobal("oeaBatchSize");
my $maxBases = getGlobal("oeaBatchLength");
+ print STDERR "\n";
+ print STDERR "Configure OEA for ", getGlobal("oeaMemory"), "gb memory with batches of at most ", ($maxReads > 0) ? $maxReads : "(unlimited)", " reads and ", ($maxBases > 0) ? $maxBases : "(unlimited)", " bases.\n";
+ print STDERR "\n";
+
my $reads = 0;
my $bases = 0;
my $olaps = 0;
my $coverage = getExpectedCoverage($wrk, $asm);
+ my $corrSize = (-s "$path/red.red");
push @bgn, 1;
- for (my $id = 1; $id < $maxID; $id++) {
+ for (my $id = 1; $id <= $maxID; $id++) {
$reads += 1;
$bases += $readLengths[$id];
$olaps += $numOlaps[$id];
- my $memory = (1 * $bases) + (28 * $olaps) + (8 * $bases * getGlobal("utgOvlErrorRate"));
+ # Hacked to attempt to estimate adjustment size better. Olaps should only require 12 bytes each.
+
+ my $memBases = (1 * $bases); # Corrected reads for this batch
+ my $memAdj1 = (8 * $corrSize) * 0.33; # Overestimate of the size of the indel adjustments needed (total size includes mismatches)
+ my $memReads = (32 * $reads); # Read data in the batch
+ my $memOlaps = (32 * $olaps); # Loaded overlaps
+ my $memSeq = (4 * 2097152); # two char arrays of 2*maxReadLen
+ my $memAdj2 = (16 * 2097152); # two Adjust_t arrays of maxReadLen
+ my $memWA = (32 * 1048576); # Work area (16mb) and edit array (16mb)
+ my $memMisc = (256 * 1048576); # Work area (16mb) and edit array (16mb) and (192mb) slop
- if ((($maxMem > 0) && ($memory >= $maxMem * 0.75)) || # Allow 25% slop
+ my $memory = $memBases + $memAdj1 + $memReads + $memOlaps + $memSeq + $memAdj2 + $memWA + $memMisc;
+
+ if ((($maxMem > 0) && ($memory >= $maxMem * 0.75)) ||
(($maxReads > 0) && ($reads >= $maxReads)) ||
(($maxBases > 0) && ($bases >= $maxBases)) ||
- (($id == $maxID - 1))) {
+ (($id == $maxID))) {
push @end, $id;
- #printf(STDERR "OEA job %3u from read %9u to read %9u using %7.3f GB for reads, %7.3f GB for olaps and %7.3f GB for adjustments\n",
- # $nj + 1, $bgn[$nj], $end[$nj],
- # 1 * $bases / 1024 / 1024 / 1024,
- # 12 * $olaps / 1024 / 1024 / 1024,
- # 8 * $bases * getGlobal("utgOvlErrorRate") / 1024 / 1024 / 1024);
+ printf(STDERR "OEA job %3u from read %9u to read %9u - %4.1f bases + %4.1f adjusts + %4.1f reads + %4.1f olaps + %4.1f fseq/rseq + %4.1f fadj/radj + %4.1f work + %4.1f misc = %5.1f MB\n",
+ $nj + 1, $bgn[$nj], $end[$nj],
+ $memBases / 1024 / 1024,
+ $memAdj1 / 1024 / 1024,
+ $memReads / 1024 / 1024,
+ $memOlaps / 1024 / 1024,
+ $memSeq / 1024 / 1024,
+ $memAdj2 / 1024 / 1024,
+ $memWA / 1024 / 1024,
+ $memMisc / 1024 / 1024,
+ $memory / 1024 / 1024);
$nj++;
diff --git a/src/pipelines/canu/OverlapInCore.pm b/src/pipelines/canu/OverlapInCore.pm
index 3853c60..b1609aa 100644
--- a/src/pipelines/canu/OverlapInCore.pm
+++ b/src/pipelines/canu/OverlapInCore.pm
@@ -319,6 +319,7 @@ sub overlapCheck ($$$$) {
my $currentJobID = 1;
my @successJobs;
my @statsJobs;
+ my @miscJobs;
my @failedJobs;
my $failureMessage = "";
@@ -329,18 +330,26 @@ sub overlapCheck ($$$$) {
if (-e "$path/$1.ovb.gz") {
push @successJobs, "$path/$1.ovb.gz\n"; # Dumped to a file, so include \n
push @statsJobs, "$path/$1.stats"; # Used here, don't include \n
+ push @miscJobs, "$path/$1.stats\n";
+ push @miscJobs, "$path/$1.counts\n";
} elsif (-e "$path/$1.ovb") {
push @successJobs, "$path/$1.ovb\n";
push @statsJobs, "$path/$1.stats";
+ push @miscJobs, "$path/$1.stats\n";
+ push @miscJobs, "$path/$1.counts\n";
} elsif (-e "$path/$1.ovb.bz2") {
push @successJobs, "$path/$1.ovb.bz2\n";
push @statsJobs, "$path/$1.stats";
+ push @miscJobs, "$path/$1.stats\n";
+ push @miscJobs, "$path/$1.counts\n";
} elsif (-e "$path/$1.ovb.xz") {
push @successJobs, "$path/$1.ovb.xz\n";
push @statsJobs, "$path/$1.stats";
+ push @miscJobs, "$path/$1.stats\n";
+ push @miscJobs, "$path/$1.counts\n";
} else {
$failureMessage .= "-- job $path/$1 FAILED.\n";
@@ -390,6 +399,10 @@ sub overlapCheck ($$$$) {
print L @successJobs;
close(L);
+ open(L, "> $path/ovljob.more.files") or caExit("can't open '$path/ovljob.more.files' for writing: $!", undef);
+ print L @miscJobs;
+ close(L);
+
reportOverlapStats($wrk, $asm, @statsJobs);
setGlobal("canuIteration", 0);
diff --git a/src/pipelines/canu/OverlapMMap.pm b/src/pipelines/canu/OverlapMMap.pm
index da2ffdb..cfde7c3 100644
--- a/src/pipelines/canu/OverlapMMap.pm
+++ b/src/pipelines/canu/OverlapMMap.pm
@@ -19,6 +19,10 @@
# are a 'United States Government Work', and
# are released in the public domain
#
+ # Brian P. Walenz beginning on 2016-MAY-02
+ # are a 'United States Government Work', and
+ # are released in the public domain
+ #
# File 'README.licenses' in the root directory of this distribution contains
# full conditions and disclaimers for each license.
##
@@ -186,38 +190,6 @@ sub mmapConfigure ($$$$) {
close(L);
- # The seed length is the shortest read such that all reads longer than this sum to 50x genome size.
- # genomeSize must be set (canu should be failing early if it isn't).
-
- my $seedLength = 500;
-
- {
- my @readLengths;
-
- open(F, "< $wrk/$asm.gkpStore/reads.txt") or caExit("can't open '$wrk/$asm.gkpStore/reads.txt' for reading: $!", undef);
- while (<F>) {
- my @v = split '\s+', $_;
- push @readLengths, $v[2];
- }
- close(F);
-
- @readLengths = sort { $b <=> $a } @readLengths;
-
- my $readLengthSum = 0;
- my $targetSum = getGlobal("corOutCoverage") * getGlobal("genomeSize");
-
- foreach my $l (@readLengths) {
- $readLengthSum += $l;
-
- if ($readLengthSum > $targetSum) {
- $seedLength = $l;
- last;
- }
- }
-
- print STDERR "-- Computed seed length $seedLength from desired output coverage ", getGlobal("corOutCoverage"), " and genome size ", getGlobal("genomeSize"), "\n";
- }
-
# Create a script to generate precomputed blocks, including extracting the reads from gkpStore.
open(F, "> $path/precompute.sh") or caFailure("can't open '$path/precompute.sh' for writing: $!", undef);
@@ -349,32 +321,40 @@ sub mmapConfigure ($$$$) {
print F "mv $path/results/\$qry.mmap.WORKING $path/results/\$qry.mmap\n";
print F "\n";
- print F "\n";
-
print F "if [ -e \"$path/results/\$qry.mmap\" -a \\\n";
print F " ! -e \"$path/results/\$qry.ovb.gz\" ] ; then\n";
print F " \$bin/mmapConvert \\\n";
- print F " -o $path/results/\$qry.mmap.ovb.gz \\\n";
- print F " $path/results/\$qry.mmap\n";
+ print F " -o $path/results/\$qry.mmap.ovb.WORKING.gz \\\n";
+ print F " $path/results/\$qry.mmap \\\n";
+ print F " && \\\n";
+ print F " mv $path/results/\$qry.mmap.ovb.WORKING.gz $path/results/\$qry.mmap.ovb.gz\n";
print F "fi\n";
-
print F "\n";
+ if (getGlobal('saveOverlaps') eq "0") {
+ print F "if [ -e \"$path/results/\$qry.mmap\" -a \\\n";
+ print F " -e \"$path/results/\$qry.mmap.ovb.gz\" ] ; then\n";
+ print F " rm -f $path/results/\$qry.mmap\n";
+ print F "fi\n";
+ print F "\n";
+ }
+
+ print F "if [ -e \"$path/results/\$qry.mmap.ovb.gz\" ] ; then\n";
if (getGlobal("${tag}ReAlign") eq "raw") {
- print F "if [ -e \"$path/results/\$qry.mmap.ovb.gz\" ] ; then\n";
print F " \$bin/overlapPair \\\n";
print F " -G $wrk/$asm.gkpStore \\\n";
print F " -O $path/results/\$qry.mmap.ovb.gz \\\n";
print F " -o $path/results/\$qry.ovb.gz \\\n";
print F " -partial \\\n" if ($typ eq "partial");
- print F " -erate ", getGlobal("obtOvlErrorRate"), " \\\n" if ($typ eq "partial");
- print F " -erate ", getGlobal("utgOvlErrorRate"), " \\\n" if ($typ eq "normal");
+ print F " -erate ", getGlobal("corErrorRate"), " \\\n" if ($tag eq "cor");
+ print F " -erate ", getGlobal("obtOvlErrorRate"), " \\\n" if ($tag eq "obt");
+ print F " -erate ", getGlobal("utgOvlErrorRate"), " \\\n" if ($tag eq "utg");
print F " -memory " . getGlobal("${tag}mmapMemory") . " \\\n";
print F " -t " . getGlobal("${tag}mmapThreads") . " \n";
- print F "fi\n";
} else {
- print F "mv -f \"$path/results/\$qry.mmap.ovb.gz\" \"$path/results/\$qry.ovb.gz\"\n";
+ print F " mv -f \"$path/results/\$qry.mmap.ovb.gz\" \"$path/results/\$qry.ovb.gz\"\n";
}
+ print F "fi\n";
print F "\n";
print F "\n";
@@ -524,6 +504,7 @@ sub mmapCheck ($$$$) {
my $currentJobID = 1;
my @mmapJobs;
my @successJobs;
+ my @miscJobs;
my @failedJobs;
my $failureMessage = "";
@@ -533,18 +514,26 @@ sub mmapCheck ($$$$) {
if (-e "$path/results/$1.ovb.gz") {
push @mmapJobs, "$path/results/$1.mmap\n";
push @successJobs, "$path/results/$1.ovb.gz\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} elsif (-e "$path/results/$1.ovb") {
push @mmapJobs, "$path/results/$1.mmap\n";
push @successJobs, "$path/results/$1.ovb\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} elsif (-e "$path/results/$1.ovb.bz2") {
push @mmapJobs, "$path/results/$1.mmap\n";
push @successJobs, "$path/results/$1.ovb.bz2\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} elsif (-e "$path/results/$1.ovb.xz") {
push @mmapJobs, "$path/results/$1.mmap\n";
push @successJobs, "$path/results/$1.ovb.xz\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} else {
$failureMessage .= "-- job $path/results/$1.ovb FAILED.\n";
@@ -605,6 +594,10 @@ sub mmapCheck ($$$$) {
print L @successJobs;
close(L);
+ open(L, "> $path/ovljob.more.files") or caExit("failed to open '$path/ovljob.more.files'", undef);
+ print L @miscJobs;
+ close(L);
+
setGlobal("canuIteration", 0);
emitStage($WRK, $asm, "$tag-mmapCheck");
buildHTML($WRK, $asm, $tag);
diff --git a/src/pipelines/canu/OverlapMhap.pm b/src/pipelines/canu/OverlapMhap.pm
index b9e968d..954457c 100644
--- a/src/pipelines/canu/OverlapMhap.pm
+++ b/src/pipelines/canu/OverlapMhap.pm
@@ -91,22 +91,41 @@ sub mhapConfigure ($$$$) {
# Mhap parameters - filterThreshold needs to be a string, else it is printed as 5e-06.
- my $numHashes = "512";
- my $minNumMatches = "3";
- my $threshold = "0.6";
- my $ordSketch = "1536";
- my $ordSketchMer = 12;
-
- if (getGlobal("${tag}MhapSensitivity") eq "sens" || getGlobal("${tag}MhapSensitivity") eq "high") {
- $numHashes = "768";
- $minNumMatches = "2";
- $threshold = "0.6";
- $ordSketch = "1536";
- } elsif (getGlobal("${tag}MhapSensitivity") eq "fast") {
- $numHashes = "256";
- $minNumMatches = "3";
- $threshold = "0.7";
- $ordSketch = "1000";
+ my ($numHashes, $minNumMatches, $threshold, $ordSketch, $ordSketchMer);
+
+ if (!defined(getGlobal("${tag}MhapSensitivity"))) {
+ my $cov = getExpectedCoverage($wrk, $asm);
+
+ setGlobal("${tag}MhapSensitivity", "low"); # Yup, super inefficient. The code is
+ setGlobal("${tag}MhapSensitivity", "normal") if ($cov < 60); # compact and clear and runs once.
+ setGlobal("${tag}MhapSensitivity", "high") if ($cov <= 30); # Live with it.
+
+ print STDERR "-- Set ${tag}MhapSensitivity=", getGlobal("${tag}MhapSensitivity"), " based on read coverage of $cov.\n";
+ }
+
+ if (getGlobal("${tag}MhapSensitivity") eq "low") {
+ $numHashes = 256;
+ $minNumMatches = 3;
+ $threshold = 0.80;
+ $ordSketch = 1000;
+ $ordSketchMer = getGlobal("${tag}MhapOrderedMerSize") + 2;
+
+ } elsif (getGlobal("${tag}MhapSensitivity") eq "normal") {
+ $numHashes = 512;
+ $minNumMatches = 3;
+ $threshold = 0.78;
+ $ordSketch = 1536;
+ $ordSketchMer = getGlobal("${tag}MhapOrderedMerSize");
+
+ } elsif (getGlobal("${tag}MhapSensitivity") eq "high") {
+ $numHashes = 768;
+ $minNumMatches = 2;
+ $threshold = 0.73;
+ $ordSketch = 1536;
+ $ordSketchMer = getGlobal("${tag}MhapOrderedMerSize");
+
+ } else {
+ caFailure("invalid ${tag}MhapSensitivity=" . getGlobal("${tag}MhapSensitivity"), undef);
}
my $filterThreshold = getGlobal("${tag}MhapFilterThreshold");
@@ -127,11 +146,14 @@ sub mhapConfigure ($$$$) {
$numHashes /= 4;
$minNumMatches = floor(1.5 * $minNumMatches);
$ordSketch = floor($ordSketch / 2);
- $ordSketchMer = floor($ordSketchMer * 1.3);
$threshold = 1-getGlobal("${tag}OvlErrorRate");
$blockPerGb *= 2;
}
+ print STDERR "--\n";
+ print STDERR "-- PARAMETERS: hashes=$numHashes, minMatches=$minNumMatches, threshold=$threshold\n";
+ print STDERR "--\n";
+
my $blockSize = int($blockPerGb * $memorySize);
print STDERR "-- Given $memorySize GB, can fit $blockSize reads per block.\n";
@@ -266,38 +288,6 @@ sub mhapConfigure ($$$$) {
# The ignore file is created in Meryl.pm
- # The seed length is the shortest read such that all reads longer than this sum to 50x genome size.
- # genomeSize must be set (canu should be failing early if it isn't).
-
- my $seedLength = 500;
-
- {
- my @readLengths;
-
- open(F, "< $wrk/$asm.gkpStore/reads.txt") or caExit("can't open '$wrk/$asm.gkpStore/reads.txt' for reading: $!", undef);
- while (<F>) {
- my @v = split '\s+', $_;
- push @readLengths, $v[2];
- }
- close(F);
-
- @readLengths = sort { $b <=> $a } @readLengths;
-
- my $readLengthSum = 0;
- my $targetSum = getGlobal("corOutCoverage") * getGlobal("genomeSize");
-
- foreach my $l (@readLengths) {
- $readLengthSum += $l;
-
- if ($readLengthSum > $targetSum) {
- $seedLength = $l;
- last;
- }
- }
-
- print STDERR "-- Computed seed length $seedLength from desired output coverage ", getGlobal("corOutCoverage"), " and genome size ", getGlobal("genomeSize"), "\n";
- }
-
# Create a script to generate precomputed blocks, including extracting the reads from gkpStore.
#getAllowedResources($tag, "mhap");
@@ -366,20 +356,21 @@ sub mhapConfigure ($$$$) {
print F "# So mhap writes its output in the correct spot.\n";
print F "cd $path/blocks\n";
print F "\n";
- print F "$javaPath -server -Xmx", getGlobal("${tag}mhapMemory"), "g \\\n";
- print F " -jar \$bin/mhap-" . getGlobal("${tag}MhapVersion") . ".jar \\\n";
- print F " --weighted -k $merSize \\\n";
+ print F "$javaPath -d64 -server -Xmx", getGlobal("${tag}mhapMemory"), "g \\\n";
+ print F " -jar " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "\$bin/mhap-" . getGlobal("${tag}MhapVersion") . ".jar " . ($^O eq "cygwin" ? ")" : "") . "\\\n";
+ print F " --repeat-weight 0.9 -k $merSize \\\n";
+ print F " --supress-noise 2 \\\n" if (defined(getGlobal("${tag}MhapFilterUnique")) && getGlobal("${tag}MhapFilterUnique") == 1);
+ print F " --no-tf \\\n" if (defined(getGlobal("${tag}MhapNoTf")) && getGlobal("${tag}MhapNoTf") == 1);
print F " --num-hashes $numHashes \\\n";
print F " --num-min-matches $minNumMatches \\\n";
print F " --ordered-sketch-size $ordSketch \\\n";
print F " --ordered-kmer-size $ordSketchMer \\\n";
print F " --threshold $threshold \\\n";
print F " --filter-threshold $filterThreshold \\\n";
- print F " --min-store-length " . ($seedLength-1) . " \\\n";
print F " --num-threads ", getGlobal("${tag}mhapThreads"), " \\\n";
- print F " -f $wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore \\\n" if (-e "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore");
- print F " -p $path/blocks/\$job.fasta \\\n";
- print F " -q $path/blocks \\\n";
+ print F " -f " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore.gz" . ($^O eq "cygwin" ? ") " : "") . "\\\n" if (-e "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore.gz");
+ print F " -p " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "$path/blocks/\$job.fasta" . ($^O eq "cygwin" ? ") " : "") . "\\\n";
+ print F " -q " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "$path/blocks" .($^O eq "cygwin" ? ") " : "") . "\\\n";
print F "|| \\\n";
print F "mv -f $path/blocks/\$job.dat $path/blocks/\$job.dat.FAILED\n";
print F "\n";
@@ -442,50 +433,62 @@ sub mhapConfigure ($$$$) {
print F getBinDirectoryShellCode();
print F "\n";
print F "if [ ! -e \"$path/results/\$qry.mhap\" ] ; then\n";
- print F " $javaPath -server -Xmx", getGlobal("${tag}mhapMemory"), "g \\\n";
- print F " -jar \$bin/mhap-" . getGlobal("${tag}MhapVersion"). ".jar \\\n";
- print F " --weighted -k $merSize \\\n";
+ print F " $javaPath -d64 -server -Xmx", getGlobal("${tag}mhapMemory"), "g \\\n";
+ print F " -jar " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "\$bin/mhap-" . getGlobal("${tag}MhapVersion") . ".jar " . ($^O eq "cygwin" ? ")" : "") . "\\\n";
+ print F " --repeat-weight 0.9 -k $merSize \\\n";
+ print F " --supress-noise 2 \\\n" if (defined(getGlobal("${tag}MhapFilterUnique")) && getGlobal("${tag}MhapFilterUnique") == 1);
+ print F " --no-tf \\\n" if (defined(getGlobal("${tag}MhapNoTf")) && getGlobal("${tag}MhapNoTf") == 1);
print F " --num-hashes $numHashes \\\n";
print F " --num-min-matches $minNumMatches \\\n";
print F " --threshold $threshold \\\n";
print F " --filter-threshold $filterThreshold \\\n";
- print F " --min-store-length " . ($seedLength-1) . " \\\n";
+ print F " --ordered-sketch-size $ordSketch \\\n";
+ print F " --ordered-kmer-size $ordSketchMer \\\n";
print F " --num-threads ", getGlobal("${tag}mhapThreads"), " \\\n";
- print F " -f $wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore \\\n" if (-e "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore");
- print F " -s $path/blocks/\$blk.dat \$slf \\\n";
- print F " -q $path/queries/\$qry \\\n";
+ print F " -f " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore.gz" . ($^O eq "cygwin" ? ")" : "") . "\\\n" if (-e "$wrk/0-mercounts/$asm.ms$merSize.frequentMers.ignore.gz");
+ print F " -s " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "$path/blocks/\$blk.dat \$slf" . ($^O eq "cygwin" ? ")" : "") . "\\\n";
+ print F " -q " . ($^O eq "cygwin" ? "\$(cygpath -w " : "") . "$path/queries/\$qry" . ($^O eq "cygwin" ? ")" : "") . "\\\n";
print F " > $path/results/\$qry.mhap.WORKING \\\n";
print F " && \\\n";
print F " mv -f $path/results/\$qry.mhap.WORKING $path/results/\$qry.mhap\n";
print F "fi\n";
-
print F "\n";
print F "if [ -e \"$path/results/\$qry.mhap\" -a \\\n";
print F " ! -e \"$path/results/\$qry.ovb.gz\" ] ; then\n";
print F " \$bin/mhapConvert \\\n";
print F " \$cvt \\\n";
- print F " -o $path/results/\$qry.mhap.ovb.gz \\\n";
- print F " $path/results/\$qry.mhap\n";
+ print F " -o $path/results/\$qry.mhap.ovb.WORKING.gz \\\n";
+ print F " $path/results/\$qry.mhap \\\n";
+ print F " && \\\n";
+ print F " mv $path/results/\$qry.mhap.ovb.WORKING.gz $path/results/\$qry.mhap.ovb.gz\n";
print F "fi\n";
-
print F "\n";
+ if (getGlobal('saveOverlaps') eq "0") {
+ print F "if [ -e \"$path/results/\$qry.mhap\" -a \\\n";
+ print F " -e \"$path/results/\$qry.mhap.ovb.gz\" ] ; then\n";
+ print F " rm -f $path/results/\$qry.mhap\n";
+ print F "fi\n";
+ print F "\n";
+ }
+
+ print F "if [ -e \"$path/results/\$qry.mhap.ovb.gz\" ] ; then\n";
if (getGlobal("${tag}ReAlign") eq "raw") {
- print F "if [ -e \"$path/results/\$qry.mhap.ovb.gz\" ] ; then\n";
print F " \$bin/overlapPair \\\n";
print F " -G $wrk/$asm.gkpStore \\\n";
print F " -O $path/results/\$qry.mhap.ovb.gz \\\n";
print F " -o $path/results/\$qry.ovb.gz \\\n";
print F " -partial \\\n" if ($typ eq "partial");
- print F " -erate ", getGlobal("obtOvlErrorRate"), " \\\n" if ($typ eq "partial");
- print F " -erate ", getGlobal("utgOvlErrorRate"), " \\\n" if ($typ eq "normal");
+ print F " -erate ", getGlobal("corErrorRate"), " \\\n" if ($tag eq "cor");
+ print F " -erate ", getGlobal("obtOvlErrorRate"), " \\\n" if ($tag eq "obt");
+ print F " -erate ", getGlobal("utgOvlErrorRate"), " \\\n" if ($tag eq "utg");
print F " -memory " . getGlobal("${tag}mhapMemory") . " \\\n";
print F " -t " . getGlobal("${tag}mhapThreads") . " \n";
- print F "fi\n";
} else {
- print F "mv -f \"$path/results/\$qry.mhap.ovb.gz\" \"$path/results/\$qry.ovb.gz\"\n";
+ print F " mv -f \"$path/results/\$qry.mhap.ovb.gz\" \"$path/results/\$qry.ovb.gz\"\n";
}
+ print F "fi\n";
print F "\n";
print F "\n";
@@ -641,6 +644,7 @@ sub mhapCheck ($$$$) {
my $currentJobID = 1;
my @mhapJobs;
my @successJobs;
+ my @miscJobs;
my @failedJobs;
my $failureMessage = "";
@@ -650,18 +654,26 @@ sub mhapCheck ($$$$) {
if (-e "$path/results/$1.ovb.gz") {
push @mhapJobs, "$path/results/$1.mhap\n";
push @successJobs, "$path/results/$1.ovb.gz\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} elsif (-e "$path/results/$1.ovb") {
push @mhapJobs, "$path/results/$1.mhap\n";
push @successJobs, "$path/results/$1.ovb\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} elsif (-e "$path/results/$1.ovb.bz2") {
push @mhapJobs, "$path/results/$1.mhap\n";
push @successJobs, "$path/results/$1.ovb.bz2\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} elsif (-e "$path/results/$1.ovb.xz") {
push @mhapJobs, "$path/results/$1.mhap\n";
push @successJobs, "$path/results/$1.ovb.xz\n";
+ push @miscJobs, "$path/results/$1.stats\n";
+ push @miscJobs, "$path/results/$1.counts\n";
} else {
$failureMessage .= "-- job $path/results/$1.ovb FAILED.\n";
@@ -722,6 +734,10 @@ sub mhapCheck ($$$$) {
print L @successJobs;
close(L);
+ open(L, "> $path/ovljob.more.files") or caExit("failed to open '$path/ovljob.more.files'", undef);
+ print L @miscJobs;
+ close(L);
+
setGlobal("canuIteration", 0);
emitStage($WRK, $asm, "$tag-mhapCheck");
buildHTML($WRK, $asm, $tag);
diff --git a/src/pipelines/canu/OverlapStore.pm b/src/pipelines/canu/OverlapStore.pm
index e176dc4..b8cdefa 100644
--- a/src/pipelines/canu/OverlapStore.pm
+++ b/src/pipelines/canu/OverlapStore.pm
@@ -76,6 +76,9 @@ sub createOverlapStoreSequential ($$$$) {
my $memSize = getGlobal("ovsMemory");
+ # The parallel store build will unlimit 'max user processes'. The sequential method usually
+ # runs out of open file handles first (meaning it has never run out of processes yet).
+
$cmd = "$bin/ovStoreBuild \\\n";
$cmd .= " -O $wrk/$asm.ovlStore.BUILDING \\\n";
$cmd .= " -G $wrk/$asm.gkpStore \\\n";
@@ -120,9 +123,9 @@ sub getNumOlapsAndSlices ($$) {
my $wrk = shift @_;
my $asm = shift @_;
- my $numOlaps = 0;
- my $numSlices = 0;
- my $memLimit = 0;
+ my $numOlaps = undef;
+ my $numSlices = undef;
+ my $memLimit = undef;
open(F, "< $wrk/$asm.ovlStore.BUILDING/config.err") or caExit("can't open '$wrk/$asm.ovlStore.BUILDING/config.err' for reading: $!\n", undef);
while (<F>) {
@@ -134,8 +137,8 @@ sub getNumOlapsAndSlices ($$) {
}
close(F);
- if (($numOlaps == 0) || ($numSlices == 0)) {
- caExit("Failed to find any overlaps ($numOlaps) or slices ($numSlices).\n", undef);
+ if (!defined($numOlaps) || !defined($numSlices) || !defined($memLimit)) {
+ caExit("Failed to find any overlaps ($numOlaps) or slices ($numSlices) or memory limit ($memLimit)", undef);
}
return($numOlaps, $numSlices, $memLimit);
@@ -238,6 +241,18 @@ sub overlapStoreConfigure ($$$$) {
print F " rm -rf \"$wrk/$asm.ovlStore.BUILDING/create\$bn\"\n";
print F "fi\n";
print F "\n";
+ print F "max=`ulimit -Hu`\n";
+ print F "bef=`ulimit -Su`\n";
+ print F "if [ \$bef -lt \$max ] ; then\n";
+ print F " ulimit -Su \$max\n";
+ print F " aft=`ulimit -Su`\n";
+ print F " echo \"Changed max processes per user from \$bef to \$aft (max \$max).\"\n";
+ print F " echo \"\"\n";
+ print F "else\n";
+ print F " echo \"Max processes per user limited to \$bef, no increase possible.\"\n";
+ print F " echo \"\"\n";
+ print F "fi\n";
+ print F "\n";
print F getBinDirectoryShellCode();
print F "\n";
print F "\$bin/ovStoreBucketizer \\\n";
@@ -267,6 +282,18 @@ sub overlapStoreConfigure ($$$$) {
print F " exit 1\n";
print F "fi\n";
print F "\n";
+ print F "max=`ulimit -Hu`\n";
+ print F "bef=`ulimit -Su`\n";
+ print F "if [ \$bef -lt \$max ] ; then\n";
+ print F " ulimit -Su \$max\n";
+ print F " aft=`ulimit -Su`\n";
+ print F " echo \"Changed max processes per user from \$bef to \$aft (max \$max).\"\n";
+ print F " echo \"\"\n";
+ print F "else\n";
+ print F " echo \"Max processes per user limited to \$bef, no increase possible.\"\n";
+ print F " echo \"\"\n";
+ print F "fi\n";
+ print F "\n";
print F getBinDirectoryShellCode();
print F "\n";
print F "\$bin/ovStoreSorter \\\n";
@@ -533,8 +560,8 @@ sub createOverlapStoreParallel ($$$$) {
overlapStoreBucketizerCheck($WRK, $asm, $tag, $files) foreach (1..getGlobal("canuIterationMax") + 1);
overlapStoreSorterCheck($WRK, $asm, $tag, $files) foreach (1..getGlobal("canuIterationMax") + 1);
- if (runCommand("$wrk/$asm.ovlStore.BUILDING", "$wrk/$asm.ovlStore.BUILDING/scripts/3-index.sh > $wrk/$asm.ovlStore.BUILDING/scripts/3-index.err 2>&1")) {
- caExit("failed to build index for overlap store", "$wrk/$asm.ovlStore.BUILDING/scripts/3-index.err");
+ if (runCommand("$wrk/$asm.ovlStore.BUILDING", "$wrk/$asm.ovlStore.BUILDING/scripts/3-index.sh > $wrk/$asm.ovlStore.BUILDING/logs/3-index.err 2>&1")) {
+ caExit("failed to build index for overlap store", "$wrk/$asm.ovlStore.BUILDING/logs/3-index.err");
}
rename "$wrk/$asm.ovlStore.BUILDING", "$wrk/$asm.ovlStore";
@@ -556,7 +583,10 @@ sub generateOverlapStoreStats ($$) {
$cmd .= " > $wrk/$asm.ovlStore.summary.err 2>&1";
if (runCommand($wrk, $cmd)) {
- caExit("failed to generate statistics for the overlap store", "$wrk/$asm.ovlStore.summary.err");
+ print STDERR "--\n";
+ print STDERR "-- WARNING: failed to generate statistics for the overlap store; no summary will appear in HTML output.\n";
+ print STDERR "--\n";
+ print STDERR "----------------------------------------\n";
}
}
@@ -599,7 +629,7 @@ sub createOverlapStore ($$$$) {
my $bytes = 0;
my $files = 0;
- foreach my $file ("$path/ovljob.files", "$path/mhap.files", "$path/precompute.files") {
+ foreach my $file ("$path/ovljob.files", "$path/ovljob.more.files", "$path/mhap.files", "$path/mmap.files", "$path/precompute.files") {
next if (! -e $file);
open(F, "< $file") or caExit("can't open '$file' for reading: $!\n", undef);
diff --git a/src/pipelines/canu/Unitig.pm b/src/pipelines/canu/Unitig.pm
index 9f9ae6c..59c42cf 100644
--- a/src/pipelines/canu/Unitig.pm
+++ b/src/pipelines/canu/Unitig.pm
@@ -54,45 +54,6 @@ use canu::Gatekeeper;
use canu::HTML;
use canu::Meryl;
-sub roundoff($$) {
- my $num = shift;
- my $base = shift || 1;
-
- return int($num/$base + 0.5)*$base;
-}
-
-sub estimateOverlapSize($$) {
- my $wrk = shift @_;
- my $asm = shift @_;
- my $bin = getBinDirectory();
- my $minread = getGlobal("minReadLength");
- my $ovl = getGlobal("minOverlapLength");
- my $gs = getGlobal("genomeSize");
-
- # if the user manually set a threshold, use theirs, don't change it
- if ($ovl != 500) {
- return $ovl;
- }
-
- # for larger genomes bogart works better when ignoring short (repetitive) overlaps
- # this is a temoporary fix while bogart splitting is improved
- # we don't do it for all genomes to avoid losing overlaps for assembly of small genomes
- open(F, "$bin/gatekeeperDumpMetaData -stats -G $wrk/$asm.gkpStore | ") or caFailure("failed to read gatekeeper stats fromfrom '$wrk/$asm.gkpStore'", undef);
- while (<F>) {
- my ($junk1, $library, $junk2, $reads, $junk3, $junk4, $bases, $junk5, $average, $junk6, $min, $junk7, $max) = split '\s+', $_;
- if ($library == 0) {
- my $cov = ceil($bases / $gs);
-
- if ($gs >= 100000000) {
- $ovl = roundoff(($average - $minread + $ovl) * 0.65, 500);
- print STDERR "-- Average read length $average (found $reads reads with $bases bases). Minimum overlap set to $ovl\n";
- }
- last;
- }
- }
- close(F);
- return $ovl;
-}
@@ -195,8 +156,7 @@ sub unitig ($$) {
my $perPart = int(getNumberOfReadsInStore($wrk, $asm) / getGlobal("cnsPartitions"));
my $minPart = getGlobal("cnsPartitionMin");
- my $genomeCoverage = int(1.3 * getGenomeCoverage($wrk, $asm, getGlobal("utgOvlMerSize")));
- my $overlapLength = estimateOverlapSize($wrk, $asm);
+ my $overlapLength = getGlobal("minOverlapLength");
$perPart = ($perPart < $minPart) ? ($perPart) : ($minPart);
@@ -221,16 +181,18 @@ sub unitig ($$) {
print F " -o $wrk/4-unitigger/$asm \\\n";
print F " -B $perPart \\\n";
print F " -gs " . getGlobal("genomeSize") . " \\\n";
- print F " -eg " . getGlobal("utgGraphErrorRate") . " \\\n";
- print F " -eb " . getGlobal("utgBubbleErrorRate") . " \\\n";
- print F " -em " . getGlobal("utgMergeErrorRate") . " \\\n";
- print F " -er " . getGlobal("utgRepeatErrorRate") . " \\\n";
+ print F " -eg " . getGlobal("utgOvlErrorRate") . " \\\n";
+ print F " -eM " . getGlobal("utgOvlErrorRate") . " \\\n";
+ print F " -el " . $overlapLength . " \\\n";
+ print F " -dg " . getGlobal("utgGraphDeviation") . " \\\n";
+ print F " -db " . getGlobal("utgGraphDeviation") . " \\\n";
+ print F " -dr " . getGlobal("utgRepeatDeviation") . " \\\n";
+ print F " -ca " . getGlobal("utgRepeatConfusedBP"). " \\\n";
+ print F " -cp " . "500" . " \\\n";
print F " -threads " . getGlobal("batThreads") . " \\\n" if (defined(getGlobal("batThreads")));
print F " -M " . getGlobal("batMemory") . " \\\n" if (defined(getGlobal("batMemory")));
- print F " " . getGlobal("batOptions") . " \\\n" if (defined(getGlobal("batOptions")));
print F " -unassembled " . getGlobal("contigFilter") . " \\\n" if (defined(getGlobal("contigFilter")));
- print F " -repeatdetect 6 " . $genomeCoverage . " 15" . " \\\n" if (defined($genomeCoverage));
- print F " -el " . $overlapLength . " \\\n";
+ print F " " . getGlobal("batOptions") . " \\\n" if (defined(getGlobal("batOptions")));
print F " > $wrk/4-unitigger/unitigger.err 2>&1 \\\n";
print F "&& \\\n";
print F "mv $wrk/$asm.tigStore.WORKING $wrk/$asm.tigStore.FINISHED\n";
diff --git a/src/stores/gatekeeperCreate.C b/src/stores/gatekeeperCreate.C
index 66bed6b..eb418a0 100644
--- a/src/stores/gatekeeperCreate.C
+++ b/src/stores/gatekeeperCreate.C
@@ -84,8 +84,8 @@ loadFASTA(char *L,
// Clear the sequence.
- S[0] = 0;
- Q[0] = -1; // Sentinel to tell us to use the fixed QV value
+ S[0] = 0;
+ Q[0] = 0; // Sentinel to tell gatekeeper to use the fixed QV value
Slen = 0;
@@ -317,7 +317,7 @@ loadFASTQ(char *L,
// If we're not using QVs, just reset the first value to -1. This is the sentinel that FASTA sequences set,
// causing the encoding later to use a fixed QV for all bases.
- Q[0] = -1;
+ Q[0] = 0;
#endif
diff --git a/src/stores/gatekeeperDumpFASTQ.C b/src/stores/gatekeeperDumpFASTQ.C
index 78b8b76..fe2f3e2 100644
--- a/src/stores/gatekeeperDumpFASTQ.C
+++ b/src/stores/gatekeeperDumpFASTQ.C
@@ -48,23 +48,27 @@
//
class libOutput {
public:
- libOutput(char const *outPrefix, char const *libName = NULL) {
+ libOutput(char const *outPrefix, char const *outSuffix, char const *libName = NULL) {
strcpy(_p, outPrefix);
+ if (outSuffix[0])
+ sprintf(_s, ".%s", outSuffix);
+ else
+ _s[0] = 0;
+
if (libName)
strcpy(_n, libName);
else
_n[0] = 0;
- _FASTA = NULL;
- _FASTQ = NULL;
+ _WRITER = NULL;
+ _FASTA = NULL;
+ _FASTQ = NULL;
};
~libOutput() {
- if (_FASTA)
- fclose(_FASTA);
- if (_FASTQ)
- fclose(_FASTQ);
+ if (_WRITER)
+ delete _WRITER;
};
FILE *getFASTQ(void) {
@@ -79,14 +83,19 @@ public:
char N[FILENAME_MAX];
if (_n[0])
- sprintf(N, "%s.%s.fastq", _p, _n);
+ sprintf(N, "%s.%s.fastq%s", _p, _n, _s);
else
- sprintf(N, "%s.fastq", _p);
+ sprintf(N, "%s.fastq%s", _p, _s);
- errno = 0;
- _FASTQ = ((_p[0] == '-') && (_p[1] == 0)) ? stdout : fopen(N, "w");
- if (errno)
- fprintf(stderr, "Failed to open FASTQ output file '%s': %s\n", N, strerror(errno)), exit(1);
+ if ((_p[0] == '-') && (_p[1] == 0)) {
+ sprintf(N, "(stdout)");
+ _FASTQ = stdout;
+ }
+
+ else {
+ _WRITER = new compressedFileWriter(N);
+ _FASTQ = _WRITER->file();
+ }
return(_FASTQ);
};
@@ -104,33 +113,66 @@ public:
char N[FILENAME_MAX];
if (_n[0])
- sprintf(N, "%s.%s.fasta", _p, _n);
+ sprintf(N, "%s.%s.fasta%s", _p, _n, _s);
else
- sprintf(N, "%s.fasta", _p);
+ sprintf(N, "%s.fasta%s", _p, _s);
- errno = 0;
- _FASTA = ((_p[0] == '-') && (_p[1] == 0)) ? stdout : fopen(N, "w");
- if (errno)
- fprintf(stderr, "Failed to open FASTA output file '%s': %s\n", N, strerror(errno)), exit(1);
+ if ((_p[0] == '-') && (_p[1] == 0)) {
+ sprintf(N, "(stdout)");
+ _FASTA = stdout;
+ }
+
+ else {
+ _WRITER = new compressedFileWriter(N);
+ _FASTA = _WRITER->file();
+ }
return(_FASTA);
};
private:
char _p[FILENAME_MAX];
+ char _s[FILENAME_MAX];
char _n[FILENAME_MAX];
- FILE *_FASTA;
- FILE *_FASTQ;
+ compressedFileWriter *_WRITER;
+ FILE *_FASTA;
+ FILE *_FASTQ;
};
+char *
+scanPrefix(char *prefix) {
+ int32 len = strlen(prefix);
+
+ if ((len > 3) && (strcasecmp(prefix + len - 3, ".gz") == 0)) {
+ prefix[len-3] = 0;
+ return(prefix + len - 2);
+ }
+
+ if ((len > 4) && (strcasecmp(prefix + len - 4, ".bz2") == 0)) {
+ prefix[len-4] = 0;
+ return(prefix + len - 3);
+ }
+
+ if ((len > 3) && (strcasecmp(prefix + len - 3, ".xz") == 0)) {
+ prefix[len-3] = 0;
+ return(prefix + len - 2);
+ }
+
+ return(prefix + len);
+}
+
+
+
+
int
main(int argc, char **argv) {
char *gkpStoreName = NULL;
char *outPrefix = NULL;
+ char *outSuffix = NULL;
char *clrName = NULL;
@@ -158,6 +200,7 @@ main(int argc, char **argv) {
} else if (strcmp(argv[arg], "-o") == 0) {
outPrefix = argv[++arg];
+ outSuffix = scanPrefix(outPrefix);
} else if (strcmp(argv[arg], "-c") == 0) {
@@ -216,6 +259,7 @@ main(int argc, char **argv) {
fprintf(stderr, " -G gkpStore\n");
fprintf(stderr, " -o fastq-prefix write files fastq-prefix.(libname).fastq, ...\n");
fprintf(stderr, " if fastq-prefix is '-', all sequences output to stdout\n");
+ fprintf(stderr, " if fastq-prefix ends in .gz, .bz2 or .xz, output is compressed\n");
fprintf(stderr, "\n");
fprintf(stderr, " -l libToDump output only read in library number libToDump (NOT IMPLEMENTED)\n");
fprintf(stderr, " -r id[-id] output only the single read 'id', or the specified range of ids\n");
@@ -264,10 +308,10 @@ main(int argc, char **argv) {
// Allocate outputs. If withLibName == false, all reads will artificially be in lib zero, the
// other files won't ever be created. Otherwise, the zeroth file won't ever be created.
- out[0] = new libOutput(outPrefix, NULL);
+ out[0] = new libOutput(outPrefix, outSuffix, NULL);
for (uint32 i=1; i<=numLibs; i++)
- out[i] = new libOutput(outPrefix, gkpStore->gkStore_getLibrary(i)->gkLibrary_libraryName());
+ out[i] = new libOutput(outPrefix, outSuffix, gkpStore->gkStore_getLibrary(i)->gkLibrary_libraryName());
// Grab a new readData, and iterate through reads to dump.
diff --git a/src/stores/gkStore.C b/src/stores/gkStore.C
index aab7d64..f81003f 100644
--- a/src/stores/gkStore.C
+++ b/src/stores/gkStore.C
@@ -368,7 +368,7 @@ gkRead::gkRead_encodeSeqQlt(char *H, char *S, char *Q, uint32 qv) {
uint32 Slen = _seqLen = strlen(S);
uint32 Qlen = 0;
- if (Q[0] != -1) {
+ if (Q[0] != 0) {
Qlen = strlen(Q);
if (Slen < Qlen) {
@@ -384,9 +384,8 @@ gkRead::gkRead_encodeSeqQlt(char *H, char *S, char *Q, uint32 qv) {
Q[ii] = Q[Qlen-1];
}
- if (Q[0] != -1)
- for (uint32 ii=0; ii<Qlen; ii++)
- Q[ii] -= '!';
+ for (uint32 ii=0; ii<Qlen; ii++)
+ Q[ii] -= '!';
}
// Compute the preferred encodings. If either fail, the length is set to zero, and ...
@@ -415,29 +414,28 @@ gkRead::gkRead_encodeSeqQlt(char *H, char *S, char *Q, uint32 qv) {
rd->gkReadData_encodeBlobChunk("BLOB", 0, NULL);
rd->gkReadData_encodeBlobChunk("VERS", 4, &blobVers);
- if (seq2Len > 0)
+ if (seq2Len > 0)
rd->gkReadData_encodeBlobChunk("2SEQ", seq2Len, seq); // Two-bit encoded sequence (ACGT only)
else if (seq3Len > 0)
rd->gkReadData_encodeBlobChunk("3SEQ", seq3Len, seq); // Three-bit encoded sequence (ACGTN)
else
rd->gkReadData_encodeBlobChunk("USEQ", Slen, S); // Unencoded sequence
- if (qlt4Len > 0)
+ if (qlt4Len > 0)
rd->gkReadData_encodeBlobChunk("4QLT", qlt4Len, qlt); // Four-bit (0-15) encoded QVs
else if (qlt5Len > 0)
rd->gkReadData_encodeBlobChunk("5QLT", qlt5Len, qlt); // Five-bit (0-32) encoded QVs
- else if (Q[0] != -1)
- rd->gkReadData_encodeBlobChunk("UQLT", Qlen, Q); // Unencoded quality
+ else if (Q[0] == 0)
+ rd->gkReadData_encodeBlobChunk("QVAL", 4, &qv); // Constant QV for every base
else
- rd->gkReadData_encodeBlobChunk("QVAL", 4, &qv); // Constant QV for every base
+ rd->gkReadData_encodeBlobChunk("UQLT", Qlen, Q); // Unencoded quality
rd->gkReadData_encodeBlobChunk("STOP", 0, NULL);
// Cleanup. Restore the QV's. Delete temporary storage.
- if (Q[0] != -1)
- for (uint32 ii=0; ii<Qlen; ii++)
- Q[ii] += '!';
+ for (uint32 ii=0; ii<Qlen; ii++)
+ Q[ii] += '!';
delete [] seq;
delete [] qlt;
@@ -1278,7 +1276,7 @@ gkStore::gkStore_deletePartitions(void) {
if (errno)
fprintf(stderr, "ERROR: failed to open partition meta data '%s': %s\n", path, strerror(errno)), exit(1);
- fread(&_numberOfPartitions, sizeof(uint32), 1, F);
+ AS_UTL_safeRead(F, &_numberOfPartitions, "gkStore_deletePartitions::numberOfPartitions", sizeof(uint32), 1);
fclose(F);
diff --git a/src/stores/gkStore.H b/src/stores/gkStore.H
index d57c4a7..fb7eee0 100644
--- a/src/stores/gkStore.H
+++ b/src/stores/gkStore.H
@@ -461,14 +461,15 @@ public:
if (_instance != NULL) {
_instanceCount++;
+ //fprintf(stderr, "gkStore_open(%s), %u instances now\n", path, _instanceCount);
}
// Otherwise, make a new one.
else {
- //fprintf(stderr, "gkStore_open(%s) CREATE\n", path);
_instance = new gkStore(path, mode, partID);
_instanceCount = 1;
+ //fprintf(stderr, "gkStore_open(%s), first instance, create store\n", path);
}
}
@@ -482,11 +483,14 @@ public:
{
_instanceCount--;
- //fprintf(stderr, "gkStore_close(%s) return existing, %u instances remain\n", _storeName, _instanceCount);
-
if (_instanceCount == 0) {
delete _instance;
_instance = NULL;
+ //fprintf(stderr, "gkStore_close(%s), no instances remain, delete store\n", _storeName, _instanceCount);
+ }
+
+ else {
+ //fprintf(stderr, "gkStore_close(%s), %u instances remain\n", _storeName, _instanceCount);
}
}
diff --git a/src/stores/gkStoreEncode.C b/src/stores/gkStoreEncode.C
index f642de0..92e8173 100644
--- a/src/stores/gkStoreEncode.C
+++ b/src/stores/gkStoreEncode.C
@@ -133,7 +133,7 @@ gkRead::gkRead_decode3bit(uint8 *UNUSED(chunk), uint32 UNUSED(chunkLen), char *U
// Encode qualities as 4 bit integers. Doesn't touch seq.
uint32
gkRead::gkRead_encode4bit(uint8 *&UNUSED(chunk), char *qlt, uint32 UNUSED(seqLen)) {
- if (qlt[0] == -1)
+ if (qlt[0] == 0)
// No QVs in the string.
return(0);
@@ -152,7 +152,7 @@ gkRead::gkRead_decode4bit(uint8 *UNUSED(chunk), uint32 UNUSED(chunkLen), char *U
// Encode qualities as 5 bit integers. Doesn't touch seq.
uint32
gkRead::gkRead_encode5bit(uint8 *&UNUSED(chunk), char *qlt, uint32 UNUSED(seqLen)) {
- if (qlt[0] == -1)
+ if (qlt[0] == 0)
// No QVs in the string.
return(0);
diff --git a/src/stores/ovOverlap.C b/src/stores/ovOverlap.C
index 72ab6a5..d4b70de 100644
--- a/src/stores/ovOverlap.C
+++ b/src/stores/ovOverlap.C
@@ -19,6 +19,10 @@
* are Copyright 2014-2015 Battelle National Biodefense Institute, and
* are subject to the BSD 3-Clause License
*
+ * Sergey Koren beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -80,6 +84,21 @@ ovOverlap::toString(char *str,
erate() * 100.0,
(newLine) ? "\n" : "");
break;
+ case ovOverlapAsPaf:
+ // miniasm/map expects entries to be separated by tabs
+ // no padding spaces on names we don't confuse read identifiers
+ sprintf(str, "%"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%c\t%"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P" %s",
+ a_iid,
+ (g->gkStore_getRead(a_iid)->gkRead_sequenceLength()), a_bgn(), a_end(),
+ flipped() ? '-' : '+',
+ b_iid,
+ (g->gkStore_getRead(b_iid)->gkRead_sequenceLength()), flipped() ? b_end() : b_bgn(), flipped() ? b_bgn() : b_end(),
+ (uint32)floor(span() == 0 ? (1-erate() * (a_end()-a_bgn())) : (1-erate()) * span()),
+ span() == 0 ? a_end() - a_bgn() : span(),
+ 255,
+ (newLine) ? "\n" : "");
+ break;
+
}
return(str);
diff --git a/src/stores/ovStore.H b/src/stores/ovStore.H
index 6b20ef9..c178efa 100644
--- a/src/stores/ovStore.H
+++ b/src/stores/ovStore.H
@@ -23,6 +23,10 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -46,6 +50,7 @@
#define AS_OVS_decodeEvalue(E) ((E) / 10000.0)
#define AS_OVS_encodeEvalue(Q) (((Q) < AS_OVS_decodeEvalue(AS_MAX_EVALUE)) ? (int)(10000.0 * (Q) + 0.5) : AS_MAX_EVALUE)
+#define AS_MAX_ERATE AS_OVS_decodeEvalue(AS_MAX_EVALUE)
// The old implementation allowed up to 20-bit reads, and used 3 32-bit words. No alignment was
// stored.
@@ -150,6 +155,7 @@ enum ovOverlapDisplayType {
ovOverlapAsCoords = 1, // Show bgn,end for each read
ovOverlapAsRaw = 2, // Show all four hangs
ovOverlapAsCompat = 3, // Show in a format more-or-less compatible with CA 8.3
+ ovOverlapAsPaf = 4, // Show in a format compatible with miniasm
};
diff --git a/src/stores/ovStoreBuild.C b/src/stores/ovStoreBuild.C
index 7a6a1be..0c66ce7 100644
--- a/src/stores/ovStoreBuild.C
+++ b/src/stores/ovStoreBuild.C
@@ -64,8 +64,8 @@ using namespace std;
static
uint32 *
computeIIDperBucket(uint32 fileLimit,
- uint64 memoryLimit,
- uint64 maxMemoryLimit,
+ uint64 minMemory,
+ uint64 maxMemory,
uint32 maxIID,
vector<char *> &fileList) {
uint32 *iidToBucket = new uint32 [maxIID];
@@ -75,9 +75,10 @@ computeIIDperBucket(uint32 fileLimit,
// that the IIDs must be consecutive; the obvious, simple and clean division of 'mod' won't work.
if (fileList[0][0] == '-') {
- if (memoryLimit > 0) {
- memoryLimit = 0;
- fileLimit = maxFiles;
+ if (maxMemory > 0) {
+ minMemory = 0;
+ maxMemory = 0;
+ fileLimit = maxFiles;
fprintf(stderr, "WARNING: memory limit (-M) specified, but can't be used with inputs from stdin; using %d files instead.\n", fileLimit);
} else {
@@ -135,7 +136,7 @@ computeIIDperBucket(uint32 fileLimit,
fclose(C);
- fprintf(stderr, "Summing overlap counts for %u reads from '%s'.\n", perLen, countsName);
+ //fprintf(stderr, "Summing overlap counts for %u reads from '%s'.\n", perLen, countsName);
assert(perLen <= maxIID);
@@ -161,7 +162,7 @@ computeIIDperBucket(uint32 fileLimit,
// Partition the overlaps into buckets.
- uint64 olapsPerBucketMax = 0;
+ uint64 olapsPerBucketMax = 1;
double GBperOlap = ovOverlapSortSize / 1024.0 / 1024.0 / 1024.0;
// If a file limit, distribute the overlaps to equal sized files.
@@ -172,17 +173,51 @@ computeIIDperBucket(uint32 fileLimit,
}
// If a memory limit, distribute the overlaps to files no larger than the limit.
- if (memoryLimit > 0) {
- // iterate until we can fit the files into file system limits, give up if we hit our max limit
+ //
+ // This will pick the smallest memory size that uses fewer than maxFiles buckets. Unreasonable
+ // values can break this - either too low memory or too high allowed open files (an OS limit).
+
+ if (maxMemory > 0) {
+ fprintf(stderr, "Configuring for %.2f GB to %.2f GB memory.\n",
+ minMemory / 1024.0 / 1024.0 / 1024.0,
+ maxMemory / 1024.0 / 1024.0 / 1024.0);
+
+ if (minMemory < MEMORY_OVERHEAD + ovOverlapSortSize)
+ minMemory = MEMORY_OVERHEAD + ovOverlapSortSize;
+
+ uint64 incr = (maxMemory - minMemory) / 1000;
+ if (incr < 1)
+ incr = 1;
+
+ // iterate until we can fit the files into file system limits.
+
do {
- olapsPerBucketMax = (memoryLimit - MEMORY_OVERHEAD) / ovOverlapSortSize;
- fprintf(stderr, "Will sort using "F_U64" files; "F_U64" (%.2f million) overlaps per bucket; %.2f GB memory per bucket\n",
- numOverlaps / olapsPerBucketMax + 1,
- olapsPerBucketMax,
- olapsPerBucketMax / 1000000.0,
- olapsPerBucketMax * GBperOlap);
- memoryLimit += 1024 * 1024 * 1024;
- } while (memoryLimit <= maxMemoryLimit && ( numOverlaps / olapsPerBucketMax + 1) > maxFiles / 2);
+ olapsPerBucketMax = (minMemory - MEMORY_OVERHEAD) / ovOverlapSortSize;
+ minMemory += incr;
+ } while ((minMemory <= maxMemory) &&
+ (numOverlaps / olapsPerBucketMax + 1 > 0.50 * maxFiles));
+
+ // Should we prefer finding 0.50 * maxFiles/2 (as above) but allow up to, say, 0.75 * maxFiles if 0.50 can't be satisfied?
+ // Is the 0.5 scaling because we open two files per bucket? Seems very tight if so.
+
+ // Give up if we hit our max limit.
+
+ if ((minMemory > maxMemory) ||
+ (numOverlaps / olapsPerBucketMax + 1) > 0.50 * maxFiles) {
+ fprintf(stderr, "ERROR: Cannot sort %.2f million overlaps using %.2f GB memory; too few file handles available.\n",
+ numOverlaps / 1000000.0,
+ maxMemory / 1024.0 / 1024.0 / 1024.0);
+ fprintf(stderr, "ERROR: olapsPerBucket "F_U64"\n", olapsPerBucketMax);
+ fprintf(stderr, "ERROR: buckets "F_U64"\n", numOverlaps / olapsPerBucketMax + 1);
+ fprintf(stderr, "ERROR: Increase memory size (in canu, ovsMemory; in ovStoreBuild, -M)\n");
+ exit(1);
+ }
+
+ fprintf(stderr, "Will sort using "F_U64" files; "F_U64" (%.2f million) overlaps per bucket; %.2f GB memory per bucket\n",
+ numOverlaps / olapsPerBucketMax + 1,
+ olapsPerBucketMax,
+ olapsPerBucketMax / 1000000.0,
+ olapsPerBucketMax * GBperOlap + MEMORY_OVERHEAD / 1024.0 / 1024.0 / 1024.0);
}
// Given the limit on each bucket, count the number of buckets needed, then reset the limit on
@@ -226,7 +261,9 @@ computeIIDperBucket(uint32 fileLimit,
}
fprintf(stderr, "Will sort %.3f million overlaps per bucket, using %u buckets %.2f GB per bucket.\n",
- olapsPerBucketMax / 1000000.0, iidToBucket[maxIID-1], olapsPerBucketMax * GBperOlap);
+ olapsPerBucketMax / 1000000.0,
+ iidToBucket[maxIID-1],
+ olapsPerBucketMax * GBperOlap + MEMORY_OVERHEAD / 1024.0 / 1024.0 / 1024.0);
delete [] overlapsPerRead;
@@ -267,8 +304,8 @@ main(int argc, char **argv) {
char *ovlName = NULL;
char *gkpName = NULL;
uint32 fileLimit = 0;
- uint64 memoryLimit = (uint64)4 * 1024 * 1024 * 1024;
- uint64 maxMemoryLimit = memoryLimit;
+ uint64 minMemory = (uint64)1 * 1024 * 1024 * 1024;
+ uint64 maxMemory = (uint64)4 * 1024 * 1024 * 1024;
double maxError = 1.0;
uint32 minOverlap = 0;
@@ -293,13 +330,17 @@ main(int argc, char **argv) {
} else if (strcmp(argv[arg], "-F") == 0) {
fileLimit = atoi(argv[++arg]);
- memoryLimit = 0;
+ minMemory = 0;
+ maxMemory = 0;
} else if (strcmp(argv[arg], "-M") == 0) {
- fileLimit = 0;
- AS_UTL_decodeRange(argv[++arg], memoryLimit, maxMemoryLimit);
- memoryLimit = (uint64)ceil(memoryLimit) * 1024.0 * 1024.0 * 1024.0;
- maxMemoryLimit = (uint64)ceil(maxMemoryLimit) * 1024.0 * 1024.0 * 1024.0;
+ double lo=0.0, hi=0.0;
+
+ AS_UTL_decodeRange(argv[++arg], lo, hi);
+
+ minMemory = (uint64)ceil(lo * 1024.0 * 1024.0 * 1024.0);
+ maxMemory = (uint64)ceil(hi * 1024.0 * 1024.0 * 1024.0);
+ fileLimit = 0;
} else if (strcmp(argv[arg], "-e") == 0) {
maxError = atof(argv[++arg]);
@@ -336,7 +377,7 @@ main(int argc, char **argv) {
err++;
if (fileLimit > sysconf(_SC_OPEN_MAX) - 16)
err++;
- if (memoryLimit < MEMORY_OVERHEAD)
+ if (maxMemory < MEMORY_OVERHEAD)
err++;
if (err) {
fprintf(stderr, "usage: %s -O asm.ovlStore -G asm.gkpStore [opts] [-L fileList | *.ovb.gz]\n", argv[0]);
@@ -347,7 +388,7 @@ main(int argc, char **argv) {
fprintf(stderr, "\n");
fprintf(stderr, " -F f use up to 'f' files for store creation\n");
fprintf(stderr, " -M g use up to 'g' gigabytes memory for sorting overlaps\n");
- fprintf(stderr, " default 4; g-0.125 gb is available for sorting overlaps\n");
+ fprintf(stderr, " default 4; g-0.25 gb is available for sorting overlaps\n");
fprintf(stderr, "\n");
fprintf(stderr, " -e e filter overlaps above e fraction error\n");
fprintf(stderr, " -l l filter overlaps below l bases overlap length (needs gkpStore to get read lengths!)\n");
@@ -365,8 +406,8 @@ main(int argc, char **argv) {
fprintf(stderr, "ERROR: No input overlap files (-L or last on the command line) supplied.\n");
if (fileLimit > sysconf(_SC_OPEN_MAX) - 16)
fprintf(stderr, "ERROR: Too many jobs (-F); only "F_SIZE_T" supported on this architecture.\n", sysconf(_SC_OPEN_MAX) - 16);
- if (memoryLimit < MEMORY_OVERHEAD)
- fprintf(stderr, "ERROR: Memory (-M) must be at least %.3f to account for overhead.\n", MEMORY_OVERHEAD / 1024.0 / 1024.0 / 1024.0);
+ if (maxMemory < MEMORY_OVERHEAD)
+ fprintf(stderr, "ERROR: Memory (-M) must be at least %.3f GB to account for overhead.\n", MEMORY_OVERHEAD / 1024.0 / 1024.0 / 1024.0);
exit(1);
}
@@ -414,12 +455,11 @@ main(int argc, char **argv) {
-
// Open reads, figure out a partitioning scheme.
gkStore *gkp = gkStore::gkStore_open(gkpName);
uint64 maxIID = gkp->gkStore_getNumReads() + 1;
- uint32 *iidToBucket = computeIIDperBucket(fileLimit, memoryLimit, maxMemoryLimit, maxIID, fileList);
+ uint32 *iidToBucket = computeIIDperBucket(fileLimit, minMemory, maxMemory, maxIID, fileList);
uint32 maxFiles = sysconf(_SC_OPEN_MAX);
@@ -432,6 +472,7 @@ main(int argc, char **argv) {
}
+
// Dump the configuration if told to.
if (configOut) {
diff --git a/src/stores/ovStoreDump.C b/src/stores/ovStoreDump.C
index a2ca3a0..853d7ec 100644
--- a/src/stores/ovStoreDump.C
+++ b/src/stores/ovStoreDump.C
@@ -40,11 +40,18 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-11
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
#include "AS_global.H"
+#include "AS_UTL_decodeRange.H"
+#include "splitToWords.H"
+
#include "gkStore.H"
#include "ovStore.H"
@@ -57,13 +64,152 @@ enum dumpOp {
enum dumpFlags {
- DUMP_5p = 1,
- DUMP_3p = 2,
- DUMP_CONTAINED = 4,
- DUMP_CONTAINS = 8
+ NO_5p = 1,
+ NO_3p = 2,
+ NO_CONTAINED = 4,
+ NO_CONTAINS = 8,
+ NO_CONTAINED_READS = 16,
+ NO_SUSPICIOUS_READS = 32,
+ NO_SINGLETON_READS = 64
+};
+
+
+struct readStatus {
+ uint64 best5id : 29;
+ uint64 best53p : 1; // Unwieldy - best edge from my 5' is to the 3' of 'best5id'.
+
+ uint64 best3id : 29;
+ uint64 best33p : 1;
+
+ uint64 unused : 1;
+ uint64 isSingleton : 1;
+ uint64 isContained : 1;
+ uint64 isSuspicious : 1;
};
+class bogartStatus {
+public:
+ bogartStatus(const char *prefix, uint32 nReads);
+ ~bogartStatus() {
+ delete [] _status;
+ };
+
+ uint32 getBest5id(uint32 id) { return((_status) ? (_status[id].best5id) : 0); };
+ bool getBest53p(uint32 id) { return((_status) ? (_status[id].best53p) : 0); };
+
+ uint32 getBest3id(uint32 id) { return((_status) ? (_status[id].best3id) : false); };
+ bool getBest33p(uint32 id) { return((_status) ? (_status[id].best33p) : false); };
+
+ bool getSingleton(uint32 id) { return((_status) ? (_status[id].isSingleton) : false); };
+ bool getContained(uint32 id) { return((_status) ? (_status[id].isContained) : false); };
+ bool getSuspicious(uint32 id) { return((_status) ? (_status[id].isSuspicious) : false); };
+
+private:
+ readStatus *_status;
+};
+
+
+
+bogartStatus::bogartStatus(const char *prefix, uint32 nReads) {
+ char N[FILENAME_MAX];
+ splitToWords W;
+
+ _status = NULL;
+
+ if (prefix == NULL)
+ return;
+
+ errno = 0;
+
+ sprintf(N, "%s.edges", prefix);
+ FILE *E = fopen(N, "r");
+ if (errno)
+ fprintf(stderr, "Failed to open '%s' for reading: %s\n", N, strerror(errno)), exit(1);
+
+ sprintf(N, "%s.edges.suspicious", prefix);
+ FILE *S = fopen(N, "r");
+ if (errno)
+ fprintf(stderr, "Failed to open '%s' for reading: %s\n", N, strerror(errno)), exit(1);
+
+ sprintf(N, "%s.singletons", prefix);
+ FILE *G = fopen(N, "r");
+ if (errno)
+ fprintf(stderr, "Failed to open '%s' for reading: %s\n", N, strerror(errno)), exit(1);
+
+ _status = new readStatus [nReads+1];
+
+ memset(_status, 0, sizeof(readStatus) * (nReads+1));
+
+
+ fgets(N, FILENAME_MAX, E);
+ while (!feof(E)) {
+ W.split(N);
+
+ uint32 id = W(0);
+
+ _status[id].best5id = W(2);
+ _status[id].best53p = (W[3][0] == '3');
+
+ _status[id].best3id = W(4);
+ _status[id].best33p = (W[5][0] == '3');
+
+ _status[id].isSingleton = false;
+ _status[id].isContained = ((W.numWords() > 10) && (W[10][0] == 'c'));
+ _status[id].isSuspicious = false;
+
+ fgets(N, FILENAME_MAX, E);
+ }
+ fclose(E);
+
+
+ fgets(N, FILENAME_MAX, S);
+ while (!feof(S)) {
+ W.split(N);
+
+ uint32 id = W(0);
+
+ _status[id].best5id = W(2);
+ _status[id].best53p = (W[3][0] == '3');
+
+ _status[id].best3id = W(4);
+ _status[id].best33p = (W[5][0] == '3');
+
+ _status[id].isSingleton = false;
+ _status[id].isContained = ((W.numWords() > 10) && (W[10][0] == 'c'));
+ _status[id].isSuspicious = true;
+
+ fgets(N, FILENAME_MAX, S);
+ }
+ fclose(S);
+
+
+ fgets(N, FILENAME_MAX, G);
+ while (!feof(G)) {
+ W.split(N);
+
+ uint32 id = W(0);
+
+ _status[id].best5id = 0;
+ _status[id].best53p = 0;
+
+ _status[id].best3id = 0;
+ _status[id].best33p = 0;
+
+ _status[id].isSingleton = true;
+ _status[id].isContained = false;
+ _status[id].isSuspicious = false;
+
+ fgets(N, FILENAME_MAX, G);
+ }
+ fclose(G);
+}
+
+
+
+
+
+
//
// Also accept a single ovStoreFile (output from overlapper) and dump.
//
@@ -86,7 +232,9 @@ dumpStore(ovStore *ovlStore,
uint32 endID,
uint32 qryID,
ovOverlapDisplayType type,
- bool beVerbose) {
+ bool beVerbose,
+ bool oneSided,
+ char *bestPrefix) {
ovOverlap overlap(gkpStore);
uint64 evalue = AS_OVS_encodeEvalue(dumpERate);
@@ -97,6 +245,7 @@ dumpStore(ovStore *ovlStore,
uint32 ovlNot3p = 0;
uint32 ovlNotContainer = 0;
uint32 ovlNotContainee = 0;
+ uint32 ovlNotUnique = 0;
uint32 ovlDumped = 0;
uint32 obtTooHighError = 0;
uint32 obtDumped = 0;
@@ -127,26 +276,31 @@ dumpStore(ovStore *ovlStore,
int32 ahang = overlap.a_hang();
int32 bhang = overlap.b_hang();
- if (((dumpType & DUMP_5p) == 1) && (ahang < 0) && (bhang < 0)) {
+ if ((dumpType & NO_5p) && (ahang < 0) && (bhang < 0)) {
ovlNot5p++;
continue;
}
- if (((dumpType & DUMP_3p) == 1) && (ahang > 0) && (bhang > 0)) {
+ if ((dumpType & NO_3p) && (ahang > 0) && (bhang > 0)) {
ovlNot3p++;
continue;
}
- if (((dumpType & DUMP_CONTAINS) == 1) && (ahang >= 0) && (bhang <= 0)) {
+ if ((dumpType & NO_CONTAINS) && (ahang >= 0) && (bhang <= 0)) {
ovlNotContainer++;
continue;
}
- if (((dumpType & DUMP_CONTAINED) == 1) && (ahang <= 0) && (bhang >= 0)) {
+ if ((dumpType & NO_CONTAINED) && (ahang <= 0) && (bhang >= 0)) {
ovlNotContainee++;
continue;
}
+ if (oneSided == true && overlap.a_iid >= overlap.b_iid) {
+ ovlNotUnique++;
+ continue;
+ }
+
ovlDumped++;
// The toString() method is quite slow, all from sprintf().
@@ -166,7 +320,7 @@ dumpStore(ovStore *ovlStore,
if (asCounts)
for (uint32 ii=bgnID; ii<=endID; ii++)
- fprintf(stdout, "%u\t%u\n", ii + bgnID, counts[ii]);
+ fprintf(stdout, "%u\t%u\n", ii, counts[ii - bgnID]);
delete [] counts;
@@ -209,15 +363,17 @@ sortOBT(const void *a, const void *b) {
void
-dumpPicture(ovOverlap *overlaps,
- uint64 novl,
- gkStore *gkpStore,
- uint32 qryID) {
+dumpPicture(ovOverlap *overlaps,
+ uint64 novl,
+ gkStore *gkpStore,
+ uint32 qryID,
+ bogartStatus *bogart) {
char ovl[256] = {0};
uint32 MHS = 7; // Max Hang Size, amount of padding for "+### "
- gkRead *A = gkpStore->gkStore_getRead(qryID);
+ uint32 Aid = qryID;
+ gkRead *A = gkpStore->gkStore_getRead(Aid);
uint32 frgLenA = A->gkRead_sequenceLength();
for (int32 i=0; i<256; i++)
@@ -228,17 +384,20 @@ dumpPicture(ovOverlap *overlaps,
ovl[ 99 + MHS] = '>';
ovl[100 + MHS] = 0;
- fprintf(stdout, "%8d A: %5d %5d %s\n",
- qryID,
+ fprintf(stdout, "%8d A: %5d %5d %s %s%s\n",
+ Aid,
0, frgLenA,
- ovl);
+ ovl,
+ bogart->getContained(Aid) ? "contained" : "",
+ bogart->getSuspicious(Aid) ? "suspicious" : "");
qsort(overlaps, novl, sizeof(ovOverlap), sortOBT);
// Build ascii representations for each overlapping read.
for (uint32 o=0; o<novl; o++) {
- gkRead *B = gkpStore->gkStore_getRead(overlaps[o].b_iid);
+ uint32 Bid = overlaps[o].b_iid;
+ gkRead *B = gkpStore->gkStore_getRead(Bid);
uint32 frgLenB = B->gkRead_sequenceLength();
// Find bgn/end points on each read. If the overlap is reverse complement,
@@ -268,14 +427,47 @@ dumpPicture(ovOverlap *overlaps,
for (int32 i=0; i<256; i++)
ovl[i] = ' ';
- for (uint32 i=ovlStrBgn; i<ovlStrEnd; i++)
- ovl[i] = '-';
+ // Decide how to draw this overlap.
+ // For best edges, use '='.
+ // For contained, use '-', alternating with spaces.
+ // For suspicious, use '*', alternating with dashes.
+ // For edges, use '-', solid.
+
+ bool isBest = (((bogart->getBest5id(Aid) == Bid) && (overlaps[o].overlapAEndIs5prime() == true)) ||
+ ((bogart->getBest3id(Aid) == Bid) && (overlaps[o].overlapAEndIs3prime() == true)));
+ bool isCont = (bogart->getContained(Bid));
+ bool isSusp = (bogart->getSuspicious(Bid));
+
+ // This bit of confusion makes sure that the alternating overlap lines (especially '- - - -')
+ // end with a dash.
+
+ bool oddEven = (overlaps[o].flipped() == false) ? (false) : (((ovlStrEnd - ovlStrBgn) % 2) == false);
+
+ if (isCont == true) {
+ for (uint32 i=ovlStrBgn; i<ovlStrEnd; i++)
+ ovl[i] = (oddEven = !oddEven) ? '-' : ' ';
+ }
+
+ else if (isSusp == true) {
+ for (uint32 i=ovlStrBgn; i<ovlStrEnd; i++)
+ ovl[i] = (oddEven = !oddEven) ? '-' : '*';
+ }
+
+ else {
+ char c = (isBest) ? '=' : '-';
+
+ for (uint32 i=ovlStrBgn; i<ovlStrEnd; i++)
+ ovl[i] = c;
+ }
if (overlaps[o].flipped() == true)
ovl[ovlStrBgn] = '<';
else
ovl[ovlStrEnd-1] = '>';
+ assert(ovl[ovlStrBgn] != ' ');
+ assert(ovl[ovlStrEnd-1] != ' ');
+
ovl[ovlStrEnd] = 0;
// For the B read, find how much is unaliged on each end. Though the store directly keeps this information,
@@ -292,6 +484,7 @@ dumpPicture(ovOverlap *overlaps,
ovlEndHang = ovlEndB;
}
+ // Paste the bgn hang into the overlap string.
if (ovlBgnHang > 0) {
char str[256];
int32 len;
@@ -303,17 +496,60 @@ dumpPicture(ovOverlap *overlaps,
ovl[ovlStrBgn - len - 1 + i] = str[i];
}
+ // Append the end hang.
if (ovlEndHang > 0) {
sprintf(ovl + ovlStrEnd, " +%d", ovlEndHang);
}
+ // Set flags for best edge and singleton/contained/suspicious. Left in for when I get annoyed with the different lines.
+
+ char olapClass[4] = { 0, ' ', 0, 0 };
+
+#if 0
+ if ((bogart->getBest5id(Aid) == Bid) &&
+ (overlaps[o].overlapAEndIs5prime() == true)) {
+ olapClass[0] = ' ';
+ olapClass[2] = 'B';
+ }
+
+ if ((bogart->getBest3id(Aid) == Bid) &&
+ (overlaps[o].overlapAEndIs3prime() == true)) {
+ olapClass[0] = ' ';
+ olapClass[2] = 'B';
+ }
+
+ if (olapClass[2] == 'B')
+ for (uint32 ii=0; ovl[ii]; ii++)
+ if (ovl[ii] == '-')
+ ovl[ii] = '=';
+
+
+
+ if (bogart->getSingleton(Bid)) {
+ olapClass[0] = ' ';
+ olapClass[1] = 'S';
+ }
+
+ if (bogart->getContained(Bid)) {
+ olapClass[0] = ' ';
+ olapClass[1] = 'C';
+ }
- fprintf(stdout, "%8d A: %5d %5d (%5d) B: %5d %5d (%5d) %5.2f%% %s\n",
- overlaps[o].b_iid,
+ if (bogart->getSuspicious(Bid)) {
+ olapClass[0] = ' ';
+ olapClass[1] = '!';
+ }
+#endif
+
+ // Report!
+
+ fprintf(stdout, "%8d A: %5d %5d (%5d) B: %5d %5d (%5d) %5.2f%% %s%s\n",
+ Bid,
ovlBgnA, ovlEndA, frgLenA,
ovlBgnB, ovlEndB, frgLenB,
overlaps[o].erate() * 100.0,
- ovl);
+ ovl,
+ olapClass);
}
}
@@ -326,48 +562,72 @@ dumpPicture(ovStore *ovlStore,
double dumpERate,
uint32 dumpLength,
uint32 dumpType,
- uint32 qryID) {
+ uint32 qryID,
+ char *bestPrefix) {
//fprintf(stderr, "DUMPING PICTURE for ID "F_U32" in store %s (gkp %s)\n",
// qryID, ovlName, gkpName);
- gkRead *A = gkpStore->gkStore_getRead(qryID);
+ uint32 Aid = qryID;
+ gkRead *A = gkpStore->gkStore_getRead(Aid);
uint32 frgLenA = A->gkRead_sequenceLength();
- ovlStore->setRange(qryID, qryID);
+ ovlStore->setRange(Aid, Aid);
uint64 novl = 0;
ovOverlap overlap(gkpStore);
ovOverlap *overlaps = ovOverlap::allocateOverlaps(gkpStore, ovlStore->numOverlapsInRange());
uint64 evalue = AS_OVS_encodeEvalue(dumpERate);
+ // Load bogart status, if supplied.
+
+ bogartStatus *bogart = new bogartStatus(bestPrefix, gkpStore->gkStore_getNumReads());
+
// Load all the overlaps so we can sort by the A begin position.
while (ovlStore->readOverlap(&overlap) == TRUE) {
+ // Filter out garbage overlaps.
if (overlap.evalue() > evalue)
continue;
- if (((dumpType & DUMP_5p) == 0) &&
+ // Filter out 5' overlaps.
+ if ((dumpType & NO_5p) &&
(overlap.a_hang() < 0) && (overlap.b_hang() < 0))
continue;
- if (((dumpType & DUMP_3p) == 0) &&
+ // Filter out 3' overlaps.
+ if ((dumpType & NO_3p) &&
(overlap.a_hang() > 0) && (overlap.b_hang() > 0))
continue;
- if (((dumpType & DUMP_CONTAINS) == 0) &&
+ // Filter out contained overlaps (B-read is contained)
+ if ((dumpType & NO_CONTAINS) &&
(overlap.a_hang() >= 0) && (overlap.b_hang() <= 0))
continue;
- if (((dumpType & DUMP_CONTAINED) == 0) &&
+ // Filter out container overlaps (A-read is contained)
+ if ((dumpType & NO_CONTAINED) &&
(overlap.a_hang() <= 0) && (overlap.b_hang() >= 0))
continue;
- if (overlap.b_end() - overlap.b_bgn() < dumpLength)
+ // Filter out short overlaps.
+ if ((overlap.b_end() - overlap.b_bgn() < dumpLength) ||
+ (overlap.a_end() - overlap.a_bgn() < dumpLength))
+ continue;
+
+ // If bogart data is supplied, filter out contained or suspicious overlaps.
+
+ if ((dumpType & NO_CONTAINED_READS) &&
+ (bogart->getContained(overlap.b_iid)))
+ continue;
+
+ if ((dumpType & NO_SUSPICIOUS_READS) &&
+ (bogart->getSuspicious(overlap.b_iid)))
continue;
- if (overlap.a_end() - overlap.a_bgn() < dumpLength)
+ if ((dumpType & NO_SINGLETON_READS) &&
+ (bogart->getSingleton(overlap.b_iid)))
continue;
overlaps[novl++] = overlap;
@@ -377,7 +637,7 @@ dumpPicture(ovStore *ovlStore,
if (novl == 0)
fprintf(stderr, "no overlaps to show.\n");
else
- dumpPicture(overlaps, novl, gkpStore, qryID);
+ dumpPicture(overlaps, novl, gkpStore, Aid, bogart);
delete [] overlaps;
}
@@ -408,6 +668,9 @@ main(int argc, char **argv) {
uint32 qryID = 0;
bool beVerbose = false;
+ bool oneSided = false;
+
+ char *bestPrefix = NULL;
ovOverlapDisplayType type = ovOverlapAsCoords;
@@ -423,19 +686,16 @@ main(int argc, char **argv) {
else if (strcmp(argv[arg], "-O") == 0)
ovlName = argv[++arg];
- else if (strcmp(argv[arg], "-b") == 0)
- bgnID = atoi(argv[++arg]);
-
- else if (strcmp(argv[arg], "-e") == 0)
- endID = atoi(argv[++arg]);
-
// Standard bulk dump of overlaps
- else if (strcmp(argv[arg], "-d") == 0)
+ else if (strcmp(argv[arg], "-d") == 0) {
operation = OP_DUMP;
- // Dump as a picture, the next ID
- // Should be easy to extend to using -b -e range
+ if ((arg+1 < argc) && (argv[arg+1][0] != '-'))
+ AS_UTL_decodeRange(argv[++arg], bgnID, endID);
+ }
+
+ // Dump as a picture
else if (strcmp(argv[arg], "-p") == 0) {
operation = OP_DUMP_PICTURE;
bgnID = atoi(argv[++arg]);
@@ -461,6 +721,8 @@ main(int argc, char **argv) {
else if (strcmp(argv[arg], "-raw") == 0)
type = ovOverlapAsRaw;
+ else if (strcmp(argv[arg], "-paf") == 0)
+ type = ovOverlapAsPaf;
else if (strcmp(argv[arg], "-binary") == 0)
asBinary = true;
@@ -468,7 +730,6 @@ main(int argc, char **argv) {
else if (strcmp(argv[arg], "-counts") == 0)
asCounts = true;
-
// standard bulk dump options
else if (strcmp(argv[arg], "-E") == 0)
dumpERate = atof(argv[++arg]);
@@ -477,20 +738,35 @@ main(int argc, char **argv) {
dumpLength = atoi(argv[++arg]);
else if (strcmp(argv[arg], "-d5") == 0)
- dumpType |= DUMP_5p;
+ dumpType |= NO_5p;
else if (strcmp(argv[arg], "-d3") == 0)
- dumpType |= DUMP_3p;
+ dumpType |= NO_3p;
else if (strcmp(argv[arg], "-dC") == 0)
- dumpType |= DUMP_CONTAINS;
+ dumpType |= NO_CONTAINS;
else if (strcmp(argv[arg], "-dc") == 0)
- dumpType |= DUMP_CONTAINED;
+ dumpType |= NO_CONTAINED;
else if (strcmp(argv[arg], "-v") == 0)
beVerbose = true;
+ else if (strcmp(argv[arg], "-unique") == 0)
+ oneSided = true;
+
+ else if (strcmp(argv[arg], "-best") == 0)
+ bestPrefix = argv[++arg];
+
+ else if (strcmp(argv[arg], "-noc") == 0)
+ dumpType |= NO_CONTAINED_READS;
+
+ else if (strcmp(argv[arg], "-nos") == 0)
+ dumpType |= NO_SUSPICIOUS_READS;
+
+ else if (strcmp(argv[arg], "-nosi") == 0)
+ dumpType |= NO_SINGLETON_READS;
+
else {
fprintf(stderr, "%s: unknown option '%s'.\n", argv[0], argv[arg]);
@@ -508,10 +784,10 @@ main(int argc, char **argv) {
err++;
if (err) {
- fprintf(stderr, "usage: %s -G gkpStore -O ovlStore [-b bgnID] [-e endID] ...\n", argv[0]);
+ fprintf(stderr, "usage: %s -G gkpStore -O ovlStore ...\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "There are three modes of operation:\n");
- fprintf(stderr, " -d dump a store (range selected with -b and -e)\n");
+ fprintf(stderr, " -d [a[-b]] dump overlaps for reads a to b, inclusive\n");
fprintf(stderr, " -q a b report the a,b overlap, if it exists.\n");
fprintf(stderr, " -p a dump a picture of overlaps to fragment 'a'.\n");
fprintf(stderr, "\n");
@@ -520,6 +796,7 @@ main(int argc, char **argv) {
fprintf(stderr, " -coords dump overlap showing coordinates in the reads (default)\n");
fprintf(stderr, " -hangs dump overlap showing dovetail hangs unaligned\n");
fprintf(stderr, " -raw dump overlap showing its raw native format (four hangs)\n");
+ fprintf(stderr, " -paf dump overlaps in miniasm/minimap format\n");
fprintf(stderr, " -binary dump overlap as raw binary data\n");
fprintf(stderr, " -counts dump the number of overlaps per read\n");
fprintf(stderr, "\n");
@@ -532,6 +809,11 @@ main(int argc, char **argv) {
fprintf(stderr, " -dC Dump only overlaps that are contained in the A frag (B contained in A).\n");
fprintf(stderr, " -dc Dump only overlaps that are containing the A frag (A contained in B).\n");
fprintf(stderr, " -v Report statistics (to stderr) on some dumps (-d).\n");
+ fprintf(stderr, " -unique Report only overlaps where A id is < B id, do not report both A to B and B to A overlap\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -best prefix Annotate picture with status from bogart outputs prefix.edges, prefix.singletons, prefix.edges.suspicious\n");
+ fprintf(stderr, " -noc With -best data, don't show overlaps to contained reads.\n");
+ fprintf(stderr, " -nos With -best data, don't show overlaps to suspicious reads.\n");
fprintf(stderr, "\n");
if (operation == OP_NONE)
@@ -544,9 +826,6 @@ main(int argc, char **argv) {
exit(1);
}
- if (dumpType == 0)
- dumpType = DUMP_5p | DUMP_3p | DUMP_CONTAINED | DUMP_CONTAINS;
-
gkStore *gkpStore = gkStore::gkStore_open(gkpName);
ovStore *ovlStore = new ovStore(ovlName, gkpStore);
@@ -558,11 +837,11 @@ main(int argc, char **argv) {
switch (operation) {
case OP_DUMP:
- dumpStore(ovlStore, gkpStore, asBinary, asCounts, dumpERate, dumpLength, dumpType, bgnID, endID, qryID, type, beVerbose);
+ dumpStore(ovlStore, gkpStore, asBinary, asCounts, dumpERate, dumpLength, dumpType, bgnID, endID, qryID, type, beVerbose, oneSided, bestPrefix);
break;
case OP_DUMP_PICTURE:
for (qryID=bgnID; qryID <= endID; qryID++)
- dumpPicture(ovlStore, gkpStore, dumpERate, dumpLength, dumpType, qryID);
+ dumpPicture(ovlStore, gkpStore, dumpERate, dumpLength, dumpType, qryID, bestPrefix);
break;
default:
break;
diff --git a/src/stores/ovStoreStats.C b/src/stores/ovStoreStats.C
index b263b44..eccb440 100644
--- a/src/stores/ovStoreStats.C
+++ b/src/stores/ovStoreStats.C
@@ -19,6 +19,10 @@
* are a 'United States Government Work', and
* are released in the public domain
*
+ * Sergey Koren beginning on 2016-MAR-31
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
* File 'README.licenses' in the root directory of this distribution contains
* full conditions and disclaimers for each license.
*/
@@ -440,6 +444,8 @@ main(int argc, char **argv) {
endi--;
endi++;
+ delete[] classification;
+
// All the same classification?
if (bgni == endi) {
@@ -581,26 +587,65 @@ main(int argc, char **argv) {
fprintf(stderr, "Failed to open '%s' for writing: %s\n", N, strerror(errno)), exit(1);
}
- fprintf(LOG, "category reads read length feature size or coverage analysis\n");
- fprintf(LOG, "---------------- ------- ---------------------- ------------------------ --------------------\n");
- fprintf(LOG, "middle-missing %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readHole->numberOfObjects(), readHole->mean(), readHole->stddev(), olapHole->mean(), olapHole->stddev());
- fprintf(LOG, "middle-hump %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readHump->numberOfObjects(), readHump->mean(), readHump->stddev(), olapHump->mean(), olapHump->stddev());
- fprintf(LOG, "no-5-prime %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readNo5->numberOfObjects(), readNo5->mean(), readNo5->stddev(), olapNo5->mean(), olapNo5->stddev());
- fprintf(LOG, "no-3-prime %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readNo3->numberOfObjects(), readNo3->mean(), readNo3->stddev(), olapNo3->mean(), olapNo3->stddev());
+ fprintf(LOG, "category reads %% read length feature size or coverage analysis\n");
+ fprintf(LOG, "---------------- ------- ------- ---------------------- ------------------------ --------------------\n");
+ fprintf(LOG, "middle-missing %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readHole->numberOfObjects(), (float)readHole->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readHole->mean(), readHole->stddev(), olapHole->mean(), olapHole->stddev());
+ fprintf(LOG, "middle-hump %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readHump->numberOfObjects(), (float)readHump->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readHump->mean(), readHump->stddev(), olapHump->mean(), olapHump->stddev());
+ fprintf(LOG, "no-5-prime %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readNo5->numberOfObjects(), (float)readNo5->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readNo5->mean(), readNo5->stddev(), olapNo5->mean(), olapNo5->stddev());
+ fprintf(LOG, "no-3-prime %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (bad trimming)\n", readNo3->numberOfObjects(), (float)readNo3->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readNo3->mean(), readNo3->stddev(), olapNo3->mean(), olapNo3->stddev());
fprintf(LOG, "\n");
- fprintf(LOG, "low-coverage %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (easy to assemble, potential for lower quality consensus)\n", readLowCov->numberOfObjects(), readLowCov->mean(), readLowCov->stddev(), covrLowCov->mean(), covrLowCov->stddev());
- fprintf(LOG, "unique %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (easy to assemble, perfect, yay)\n", readUnique->numberOfObjects(), readUnique->mean(), readUnique->stddev(), covrUnique->mean(), covrUnique->stddev());
- fprintf(LOG, "repeat-cont %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (potential for consensus errors, no impact on assembly)\n", readRepeatCont->numberOfObjects(), readRepeatCont->mean(), readRepeatCont->stddev(), covrRepeatCont->mean(), covrRepeatCont->stddev());
- fprintf(LOG, "repeat-dove %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (hard to assemble, likely won't assemble correctly or even at all)\n", readRepeatDove->numberOfObjects(), readRepeatDove->mean(), readRepeatDove->stddev(), covrRepeatDove->mean(), covrRepeatDove->stddev());
+ fprintf(LOG, "low-coverage %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (easy to assemble, potential for lower quality consensus)\n", readLowCov->numberOfObjects(), (float)readLowCov->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readLowCov->mean(), readLowCov->stddev(), covrLowCov->mean(), covrLowCov->stddev());
+ fprintf(LOG, "unique %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (easy to assemble, perfect, yay)\n", readUnique->numberOfObjects(), (float)readUnique->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readUnique->mean(), readUnique->stddev(), covrUnique->mean(), covrUnique->stddev());
+ fprintf(LOG, "repeat-cont %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (potential for consensus errors, no impact on assembly)\n", readRepeatCont->numberOfObjects(), (float)readRepeatCont->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readRepeatCont->mean(), readRepeatCont->stddev(), covrRepeatCont->mean(), covrRepeatCont->stddev());
+ fprintf(LOG, "repeat-dove %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (hard to assemble, likely won't assemble correctly or even at all)\n", readRepeatDove->numberOfObjects(), (float)readRepeatDove->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readRepeatDove->mean(), readRepeatDove->stddev(), covrRepeatDove->mean(), covrRepeatDove->stddev());
fprintf(LOG, "\n");
- fprintf(LOG, "span-repeat %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (read spans a large repeat, usually easy to assemble)\n", readSpanRepeat->numberOfObjects(), readSpanRepeat->mean(), readSpanRepeat->stddev(), olapSpanRepeat->mean(), olapSpanRepeat->stddev());
- fprintf(LOG, "uniq-repeat-cont %7"F_U64P" %10.2f +- %-8.2f (should be uniquely placed, low potential for consensus errors, no impact on assembly)\n", readUniqRepeatCont->numberOfObjects(), readUniqRepeatCont->mean(), readUniqRepeatCont->stddev());
- fprintf(LOG, "uniq-repeat-dove %7"F_U64P" %10.2f +- %-8.2f (will end contigs, potential to misassemble)\n", readUniqRepeatDove->numberOfObjects(), readUniqRepeatDove->mean(), readUniqRepeatDove->stddev());
- fprintf(LOG, "uniq-anchor %7"F_U64P" %10.2f +- %-8.2f %10.2f +- %-8.2f (repeat read, with unique section, probable bad read)\n", readUniqAnchor->numberOfObjects(), readUniqAnchor->mean(), readUniqAnchor->stddev(), olapUniqAnchor->mean(), olapUniqAnchor->stddev());
+ fprintf(LOG, "span-repeat %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (read spans a large repeat, usually easy to assemble)\n", readSpanRepeat->numberOfObjects(), (float)readSpanRepeat->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readSpanRepeat->mean(), readSpanRepeat->stddev(), olapSpanRepeat->mean(), olapSpanRepeat->stddev());
+ fprintf(LOG, "uniq-repeat-cont %7"F_U64P" %6.2f %10.2f +- %-8.2f (should be uniquely placed, low potential for consensus errors, no impact on assembly)\n", readUniqRepeatCont->numberOfObjects(), (float)readUniqRepeatCont->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readUniqRepeatCont->mean(), readUniqRepeatCont->stddev());
+ fprintf(LOG, "uniq-repeat-dove %7"F_U64P" %6.2f %10.2f +- %-8.2f (will end contigs, potential to misassemble)\n", readUniqRepeatDove->numberOfObjects(), (float)readUniqRepeatDove->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readUniqRepeatDove->mean(), readUniqRepeatDove->stddev());
+ fprintf(LOG, "uniq-anchor %7"F_U64P" %6.2f %10.2f +- %-8.2f %10.2f +- %-8.2f (repeat read, with unique section, probable bad read)\n", readUniqAnchor->numberOfObjects(), (float)readUniqAnchor->numberOfObjects()/gkpStore->gkStore_getNumReads()*100, readUniqAnchor->mean(), readUniqAnchor->stddev(), olapUniqAnchor->mean(), olapUniqAnchor->stddev());
if (toFile == true)
fclose(LOG);
+ // Clean up the histograms
+ delete readNoOlaps;
+ delete readHole;
+ delete readHump;
+ delete readNo5;
+ delete readNo3;
+
+ delete olapHole;
+ delete olapHump;
+ delete olapNo5;
+ delete olapNo3;
+
+ delete readLowCov;
+ delete readUnique;
+ delete readRepeatCont;
+ delete readRepeatDove;
+ delete readSpanRepeat;
+ delete readUniqRepeatCont;
+ delete readUniqRepeatDove;
+ delete readUniqAnchor;
+
+ delete covrLowCov;
+ delete covrUnique;
+ delete covrRepeatCont;
+ delete covrRepeatDove;
+ delete covrSpanRepeat;
+ delete covrUniqRepeatCont;
+ delete covrUniqRepeatDove;
+ delete covrUniqAnchor;
+
+ delete olapLowCov;
+ delete olapUnique;
+ delete olapRepeatCont;
+ delete olapRepeatDove;
+ delete olapSpanRepeat;
+ delete olapUniqRepeatCont;
+ delete olapUniqRepeatDove;
+ delete olapUniqAnchor;
+
delete ovlStore;
gkpStore->gkStore_close();
diff --git a/src/stores/tgStoreCompress.C b/src/stores/tgStoreCompress.C
new file mode 100644
index 0000000..32fe427
--- /dev/null
+++ b/src/stores/tgStoreCompress.C
@@ -0,0 +1,170 @@
+
+/******************************************************************************
+ *
+ * This file is part of canu, a software program that assembles whole-genome
+ * sequencing reads into contigs.
+ *
+ * This software is based on:
+ * 'Celera Assembler' (http://wgs-assembler.sourceforge.net)
+ * the 'kmer package' (http://kmer.sourceforge.net)
+ * both originally distributed by Applera Corporation under the GNU General
+ * Public License, version 2.
+ *
+ * Canu branched from Celera Assembler at its revision 4587.
+ * Canu branched from the kmer project at its revision 1994.
+ *
+ * This file is derived from:
+ *
+ * src/stores/tgStore.C
+ *
+ * Modifications by:
+ *
+ * Brian P. Walenz beginning on 2016-APR-18
+ * are a 'United States Government Work', and
+ * are released in the public domain
+ *
+ * File 'README.licenses' in the root directory of this distribution contains
+ * full conditions and disclaimers for each license.
+ */
+
+#include "AS_global.H"
+#include "gkStore.H"
+#include "tgStore.H"
+
+
+void
+operationCompress(char *tigName, int tigVers) {
+ tgStore *tigStore = new tgStore(tigName, tigVers);
+ uint32 nErrors = 0;
+ uint32 nCompress = 0;
+
+ // Fail if this isn't the latest version. If we try to compress something that isn't the latest
+ // version, versions after this still point to the uncompressed tigs.
+ //
+ // Function never written - is this still a problem? (18 APR 2018)
+
+
+ // Check that we aren't going to pull a tig out of the future and place it in the past.
+
+ for (uint32 ti=0; ti<tigStore->numTigs(); ti++) {
+ if (tigStore->isDeleted(ti))
+ continue;
+
+ if (tigStore->getVersion(ti) > tigVers) {
+ fprintf(stderr, "WARNING: Attempt to move future unitig "F_U32" from version "F_U32" to previous version %d.\n",
+ ti, tigStore->getVersion(ti), tigVers);
+ nErrors++;
+ } else if (tigStore->getVersion(ti) < tigVers) {
+ nCompress++;
+ }
+ }
+
+ if (nErrors > 0) {
+ fprintf(stderr, "Store can't be compressed; probably trying to compress to something that isn't the latest version.\n");
+ fprintf(stderr, " "F_U32" tigs failed; "F_U32" compressable\n", nErrors, nCompress);
+ delete tigStore;
+ exit(1);
+ }
+
+
+ // Actually do the moves.
+
+ if (nCompress > 0) {
+ delete tigStore;
+ tigStore = new tgStore(tigName, tigVers, tgStoreModify);
+ }
+
+ if (nCompress > 0) {
+ fprintf(stderr, "Compressing "F_U32" tigs into version %d\n", nCompress, tigVers);
+
+ for (uint32 ti=0; ti<tigStore->numTigs(); ti++) {
+ if ((ti % 1000000) == 0)
+ fprintf(stderr, "tig %d\n", ti);
+
+ if (tigStore->isDeleted(ti)) {
+ continue;
+ }
+
+ if (tigStore->getVersion(ti) == tigVers)
+ continue;
+
+ tgTig *tig = tigStore->loadTig(ti);
+
+ if (tig == NULL)
+ continue;
+
+ tigStore->insertTig(tig, true);
+ tigStore->unloadTig(ti);
+ }
+ }
+
+ // Clean up the older files.
+
+ if (nCompress > 0) {
+ for (uint32 version=1; version<tigVers; version++) {
+ fprintf(stderr, "Purge version "F_U32".\n", version);
+ tigStore->purgeVersion(version);
+ }
+ }
+
+ // And the newer files.
+
+ delete tigStore;
+}
+
+
+
+
+
+
+int
+main (int argc, char **argv) {
+ char *gkpName = NULL;
+ char *tigName = NULL;
+ int32 tigVers = -1;
+ vector<char *> tigInputs;
+ tgStoreType tigType = tgStoreModify;
+
+ argc = AS_configure(argc, argv);
+
+ int arg=1;
+ int err=0;
+ while (arg < argc) {
+ if (strcmp(argv[arg], "-G") == 0) {
+ gkpName = argv[++arg];
+
+ } else if (strcmp(argv[arg], "-T") == 0) {
+ tigName = argv[++arg];
+ tigVers = atoi(argv[++arg]);
+
+ } else {
+ fprintf(stderr, "%s: unknown option '%s'\n", argv[0], argv[arg]);
+ err++;
+ }
+
+ arg++;
+ }
+ if ((err) || (gkpName == NULL) || (tigName == NULL) || (tigInputs.size() == 0)) {
+ fprintf(stderr, "usage: %s -G <gkpStore> -T <tigStore> <v>\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -G <gkpStore> Path to the gatekeeper store\n");
+ fprintf(stderr, " -T <tigStore> <v> Path to the tigStore and version to add tigs to\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " Remove store versions before <v>. Data present in versions before <v>\n");
+ fprintf(stderr, " are copied to version <v>. Files for the earlier versions are removed.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " WARNING! This code HAS NOT been tested with canu.\n");
+ fprintf(stderr, "\n");
+
+ if (gkpName == NULL)
+ fprintf(stderr, "ERROR: no gatekeeper store (-G) supplied.\n");
+ if (tigName == NULL)
+ fprintf(stderr, "ERROR: no tig store (-T) supplied.\n");
+
+ exit(1);
+ }
+
+ operationCompress(tigName, tigVers);
+
+ exit(0);
+}
diff --git a/src/mhap/mhap.mk b/src/stores/tgStoreCompress.mk
similarity index 61%
copy from src/mhap/mhap.mk
copy to src/stores/tgStoreCompress.mk
index f00bd5b..88ef7c6 100644
--- a/src/mhap/mhap.mk
+++ b/src/stores/tgStoreCompress.mk
@@ -1,3 +1,4 @@
+
# If 'make' isn't run from the root directory, we need to set these to
# point to the upper level build directory.
ifeq "$(strip ${BUILD_DIR})" ""
@@ -7,6 +8,13 @@ ifeq "$(strip ${TARGET_DIR})" ""
TARGET_DIR := ../$(OSTYPE)-$(MACHINETYPE)/bin
endif
-TARGET := mhap-2.0.jar
-SOURCES := mhap-2.0.tar
+TARGET := tgStoreCompress
+SOURCES := tgStoreCompress.C
+
+SRC_INCDIRS := .. ../AS_UTL
+
+TGT_LDFLAGS := -L${TARGET_DIR}
+TGT_LDLIBS := -lcanu
+TGT_PREREQS := libcanu.a
+SUBMAKEFILES :=
diff --git a/src/stores/tgStoreCoverageStat.C b/src/stores/tgStoreCoverageStat.C
index 31aec67..9c4caa4 100644
--- a/src/stores/tgStoreCoverageStat.C
+++ b/src/stores/tgStoreCoverageStat.C
@@ -158,24 +158,24 @@ double
getGlobalArrivalRate(tgStore *tigStore,
FILE *outSTA,
uint64 genomeSize,
- bool useN50) {
- double globalRate = 0;
- double recalRate = 0;
+ bool useN50) {
+ double globalRate = 0;
+ double recalRate = 0;
- double sumRho = 0;
+ double sumRho = 0;
- int32 arLen = 0;
- double *ar = NULL;
- uint32 *allRho = NULL;
+ int32 arLen = 0;
+ double *ar = NULL;
uint32 NF;
uint64 totalRandom = 0;
uint64 totalNF = 0;
int32 BIG_SPAN = 10000;
+
int32 big_spans_in_unitigs = 0; // formerly arMax
// Go through all the unitigs to sum rho and unitig arrival frags
- allRho = new uint32 [tigStore->numTigs()];
+ uint32 *allRho = new uint32 [tigStore->numTigs()];
for (uint32 i=0; i<tigStore->numTigs(); i++) {
tgTig *tig = tigStore->loadTig(i);
@@ -188,6 +188,8 @@ getGlobalArrivalRate(tgStore *tigStore,
double rho = computeRho(tig);
int32 numRandom = numRandomFragments(tig);
+ tigStore->unloadTig(i);
+
sumRho += rho;
big_spans_in_unitigs += (int32) (rho / BIG_SPAN); // Keep integral portion of fraction.
totalRandom += numRandom;
@@ -261,6 +263,8 @@ getGlobalArrivalRate(tgStore *tigStore,
keepNF += (numRandom == 0) ? (0) : (numRandom - 1);
keepRho += rho;
+
+ tigStore->unloadTig(i);
}
fprintf(outSTA, "BASED ON UNITIGS > N50:\n");
@@ -311,7 +315,7 @@ getGlobalArrivalRate(tgStore *tigStore,
assert(0 < rhoDiv10k);
- for (uint32 i=0; i<rhoDiv10k; i++)
+ for (uint32 aa=0; aa<rhoDiv10k; aa++)
ar[arLen++] = localArrivalRate;
assert(arLen <= big_spans_in_unitigs);
@@ -358,6 +362,8 @@ getGlobalArrivalRate(tgStore *tigStore,
recalRate = MIN(recalRate, ar[maxDiffIdx]);
globalRate = MAX(globalRate, recalRate);
+
+ tigStore->unloadTig(i);
}
delete [] ar;
@@ -461,11 +467,7 @@ main(int argc, char **argv) {
exit(1);
}
- gkStore *gkpStore = gkStore::gkStore_open(gkpName, gkStore_readOnly);
- tgStore *tigStore = new tgStore(tigName, tigVers, tgStoreModify);
-
- if (endID == 0)
- endID = tigStore->numTigs();
+ // Open output files first, so we can fail before getting too far along.
{
char outName[FILENAME_MAX];
@@ -485,17 +487,21 @@ main(int argc, char **argv) {
fprintf(stderr, "Failed to open '%s': %s\n", outName, strerror(errno)), exit(1);
}
-
//
// Load fragment data
//
- double globalRate = 0;
+ fprintf(stderr, "Opening gkpStore '%s'\n", gkpName);
+
+ gkStore *gkpStore = gkStore::gkStore_open(gkpName, gkStore_readOnly);
+
+ fprintf(stderr, "Reading read lengths and randomness for %u reads.\n",
+ gkpStore->gkStore_getNumReads());
isNonRandom = new bool [gkpStore->gkStore_getNumReads() + 1];
readLength = new uint32 [gkpStore->gkStore_getNumReads() + 1];
- for (uint32 ii=0; ii<gkpStore->gkStore_getNumReads(); ii++) {
+ for (uint32 ii=0; ii<=gkpStore->gkStore_getNumReads(); ii++) {
gkRead *read = gkpStore->gkStore_getRead(ii);
gkLibrary *libr = gkpStore->gkStore_getLibrary(read->gkRead_libraryID());
@@ -503,11 +509,29 @@ main(int argc, char **argv) {
readLength[ii] = read->gkRead_sequenceLength();
}
+ fprintf(stderr, "Closing gkpStore.\n");
+
+ gkpStore->gkStore_close();
+ gkpStore = NULL;
+
+ //
+ // Open tigs. Kind of important to do this.
+ //
+
+ fprintf(stderr, "Opening tigStore '%s'\n", tigName);
+
+ tgStore *tigStore = new tgStore(tigName, tigVers, tgStoreModify);
+
+ if (endID == 0)
+ endID = tigStore->numTigs();
+
//
// Compute global arrival rate. This ain't cheap.
//
- globalRate = getGlobalArrivalRate(tigStore, outSTA, genomeSize, use_N50);
+ fprintf(stderr, "Computing global arrival rate.\n");
+
+ double globalRate = getGlobalArrivalRate(tigStore, outSTA, genomeSize, use_N50);
//
// Compute coverage stat for each unitig, populate histograms, write logging.
@@ -525,6 +549,8 @@ main(int argc, char **argv) {
//
// They were removed 13 Aug 2015.
+ fprintf(stderr, "Computing coverage stat for tigs %u-%u.\n", bgnID, endID-1);
+
for (uint32 i=bgnID; i<endID; i++) {
tgTig *tig = tigStore->loadTig(i);
@@ -570,6 +596,8 @@ main(int argc, char **argv) {
if (doUpdate)
tigStore->setCoverageStat(tig->tigID(), covStat);
+
+ tigStore->unloadTig(tig->tigID());
}
@@ -578,8 +606,6 @@ main(int argc, char **argv) {
delete [] isNonRandom;
delete [] readLength;
- gkpStore->gkStore_close();
-
delete tigStore;
exit(0);
diff --git a/src/stores/tgStoreLoad.C b/src/stores/tgStoreLoad.C
index 69cfb4a..027c7ae 100644
--- a/src/stores/tgStoreLoad.C
+++ b/src/stores/tgStoreLoad.C
@@ -179,7 +179,7 @@ main (int argc, char **argv) {
// Handle insertion.
if (tig->numberOfChildren() > 0) {
- fprintf(stderr, "INSERTING tig %d\n", tig->tigID());
+ //fprintf(stderr, "INSERTING tig %d\n", tig->tigID());
tigStore->insertTig(tig, false);
continue;
}
@@ -187,13 +187,13 @@ main (int argc, char **argv) {
// Deleted already?
if (tigStore->isDeleted(tig->tigID()) == true) {
- fprintf(stderr, "DELETING tig %d -- ALREADY DELETED\n", tig->tigID());
+ //fprintf(stderr, "DELETING tig %d -- ALREADY DELETED\n", tig->tigID());
continue;
}
// Really delete it then.
- fprintf(stderr, "DELETING tig %d\n", tig->tigID());
+ //fprintf(stderr, "DELETING tig %d\n", tig->tigID());
tigStore->deleteTig(tig->tigID());
}
diff --git a/src/utgcns/libNDFalcon/dw.C b/src/utgcns/libNDFalcon/dw.C
old mode 100755
new mode 100644
diff --git a/src/utgcns/libNDFalcon/dw.H b/src/utgcns/libNDFalcon/dw.H
old mode 100755
new mode 100644
diff --git a/src/utgcns/libcns/NOTES b/src/utgcns/libcns/NOTES
new file mode 100644
index 0000000..4d65cb8
--- /dev/null
+++ b/src/utgcns/libcns/NOTES
@@ -0,0 +1,38 @@
+
+
+BeadLinks
+
+(prev/this/next) M=max
+
+#0 <--> 0/0/0 <--> 0/0/0 <-->
+#1 <\ M/1/1 <--> 1/1/1 <-->
+#2 <\\> 1/2/M /> 3/2/2 <-->
+#3 \> 2/3/2 </
+
+
+
+
+ApplyAbacus
+
+RightShift
+ col = end
+ while colCount < windowWidth
+ getBase in abacus()
+
+ if base == n
+ move up
+ unalign trailing gap beads
+ else if
+ base != bead
+ "look for matching beat and exchange, adding gap beads if needed"
+
+UnalignTrailingGapBeads
+ move bead right while gap
+ if hit end, move bead left while gap
+ while bead != anchor
+ remove bead
+ if no next
+ goto prevBead
+ else
+ goto nextBead
+
diff --git a/src/utgcns/utgcns.C b/src/utgcns/utgcns.C
index 9900ae5..b20a371 100644
--- a/src/utgcns/utgcns.C
+++ b/src/utgcns/utgcns.C
@@ -49,6 +49,7 @@
#include "unitigConsensus.H"
+#include <omp.h>
#include <map>
#include <algorithm>
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/canu.git
More information about the debian-med-commit
mailing list