[med-svn] [Git][med-team/nanocall][master] 3 commits: New upstream version 0.7.4
Steffen Möller
gitlab at salsa.debian.org
Mon Apr 30 22:43:26 BST 2018
Steffen Möller pushed to branch master at Debian Med / nanocall
Commits:
b3c9819f by Steffen Moeller at 2018-04-30T23:26:55+02:00
New upstream version 0.7.4
- - - - -
fa74ec02 by Steffen Moeller at 2018-04-30T23:26:58+02:00
Update upstream source from tag 'upstream/0.7.4'
Update to upstream version '0.7.4'
with Debian dir 5098cd12de08cf95eaf9f23f677b425d5d8d24cc
- - - - -
a7d2b4d8 by Steffen Moeller at 2018-04-30T23:42:12+02:00
Adjusting to new version - still not compiling
- - - - -
11 changed files:
- Dockerfile
- README.org
- VERSION
- debian/changelog
- − debian/patches/remove_broken_makefile.patch
- debian/patches/series
- − src/Makefile
- src/nanocall/Event.hpp
- src/nanocall/Fast5_Summary.hpp
- src/nanocall/Parameter_Trainer.hpp
- src/nanocall/nanocall.cpp
Changes:
=====================================
Dockerfile
=====================================
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,11 +3,17 @@ MAINTAINER Matei David <matei.david.at.oicr.on.ca>
ARG DEBIAN_FRONTEND=noninteractive
# install prerequisites
-RUN apt-get update && \
- apt-get install -y \
- build-essential \
- cmake \
- libhdf5-dev
+RUN for i in 1 2 3; do \
+ apt-get update \
+ && break; sleep 1; \
+ done && \
+ for i in 1 2 3; do \
+ apt-get install -y \
+ build-essential \
+ cmake \
+ libhdf5-dev \
+ && break; sleep 1; \
+ done
# if necessary, specify compiler
#RUN apt-get install -y g++-4.9 g++-5 g++-6
=====================================
README.org
=====================================
--- a/README.org
+++ b/README.org
@@ -2,7 +2,7 @@
*** Nanocall: An Oxford Nanopore Basecaller
-[[http://travis-ci.org/mateidavid/nanocall][http://travis-ci.org/mateidavid/nanocall.svg?branch=master]]
+[[http://travis-ci.org/mateidavid/nanocall][http://travis-ci.org/mateidavid/nanocall.svg?branch=master]] [[https://tldrlegal.com/license/mit-license][http://img.shields.io/:license-mit-blue.svg]]
**** Installation
=====================================
VERSION
=====================================
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.6.14
+0.7.4
=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,16 @@
+nanocall (0.7.4-1) UNRELEASED; urgency=medium
+
+ * New upstream release - throws same compile error as 0.6.14 which is not the same as reported
+ in that issue 11:
+
+ In file included from /home/moeller/git/nanocall/src/nanocall/nanocall.cpp:10:0:
+ /home/moeller/git/nanocall/src/nanocall/Pore_Model.hpp:99:48: error: ‘Model_Entry’ in namespace ‘fast5’ does not name a type
+ Pore_Model_State& operator = (const fast5::Model_Entry& e)
+ ^~~~~~~~~~~
+ compilation terminated due to -fmax-errors=1.
+
+ -- Steffen Moeller <moeller at debian.org> Mon, 30 Apr 2018 23:39:17 +0200
+
nanocall (0.6.14-1) UNRELEASED; urgency=medium
* Initial release (Closes: #<bug>)
=====================================
debian/patches/remove_broken_makefile.patch deleted
=====================================
--- a/debian/patches/remove_broken_makefile.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Tue, 13 Sep 2016 14:28:24 +0200
-Description: Delete Makefile which does more harm than good - try cmake instead
-
---- a/src/Makefile
-+++ /dev/null
-@@ -1,35 +0,0 @@
--SHELL := /bin/bash
--
--HDF_ROOT = /usr
--
--OPT_FLAG = -O0
--CXXFLAGS = -Wall -Wextra -pedantic -g3 -ggdb -fno-eliminate-unused-debug-types ${OPT_FLAG}
--M_CXXFLAGS = -std=c++11 -pthread
--CPPFLAGS = -isystem ${HDF_ROOT}/include -I fast5/src -I tclap/include -I hpptools/include
--
--TARGETS = compute-state-transitions compute-scaled-pore-model run-fwbw run-viterbi nanocall
--
--.PHONY: all test clean
--
--all: ${TARGETS}
--
--clean:
-- rm -rf ${TARGETS}
--
--compute-state-transitions: compute-state-transitions.cpp
-- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
--
--compute-scaled-pore-model: compute-scaled-pore-model.cpp
-- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5
--
--run-fwbw: run-fwbw.cpp
-- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
--
--run-viterbi: run-viterbi.cpp
-- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
--
--nanocall: nanocall.cpp Builtin_Model.cpp
-- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5 -lz
--
--list-directory: list-directory.cpp
-- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
=====================================
debian/patches/series
=====================================
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,4 @@
-remove_broken_makefile.patch
use_debian_packaged_libs.patch
no_rpath.patch
tclap.patch
+circumvent_compile_error.patch
=====================================
src/Makefile deleted
=====================================
--- a/src/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-SHELL := /bin/bash
-
-HDF_ROOT = /usr
-
-OPT_FLAG = -O0
-CXXFLAGS = -Wall -Wextra -pedantic -g3 -ggdb -fno-eliminate-unused-debug-types ${OPT_FLAG}
-M_CXXFLAGS = -std=c++11 -pthread
-CPPFLAGS = -isystem ${HDF_ROOT}/include -I fast5/src -I tclap/include -I hpptools/include
-
-TARGETS = compute-state-transitions compute-scaled-pore-model run-fwbw run-viterbi nanocall
-
-.PHONY: all test clean
-
-all: ${TARGETS}
-
-clean:
- rm -rf ${TARGETS}
-
-compute-state-transitions: compute-state-transitions.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
-
-compute-scaled-pore-model: compute-scaled-pore-model.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5
-
-run-fwbw: run-fwbw.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
-
-run-viterbi: run-viterbi.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
-
-nanocall: nanocall.cpp Builtin_Model.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5 -lz
-
-list-directory: list-directory.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
=====================================
src/nanocall/Event.hpp
=====================================
--- a/src/nanocall/Event.hpp
+++ b/src/nanocall/Event.hpp
@@ -22,7 +22,7 @@ public:
Float_Type log_mean;
Float_Type log_corrected_mean;
Float_Type log_stdv;
- Float_Type log_start;
+ //Float_Type log_start;
//
Float_Type orig_mean;
Float_Type p_model_state;
@@ -32,10 +32,16 @@ public:
//
void update_logs()
{
+ assert(mean > 0);
log_mean = std::log(mean);
+ assert(corrected_mean > 0);
log_corrected_mean = std::log(corrected_mean);
+ if (stdv == 0.0)
+ {
+ stdv = 0.01;
+ }
log_stdv = std::log(stdv);
- log_start = std::log(start);
+ //log_start = std::log(start);
}
void set_model_state(const std::string& s)
{
=====================================
src/nanocall/Fast5_Summary.hpp
=====================================
--- a/src/nanocall/Fast5_Summary.hpp
+++ b/src/nanocall/Fast5_Summary.hpp
@@ -73,7 +73,7 @@ public:
static unsigned& min_ed_events()
{
- static unsigned _min_ed_events = 1000;
+ static unsigned _min_ed_events = 10;
return _min_ed_events;
}
@@ -117,6 +117,20 @@ public:
return _hairpin_island_window_load;
}
+ // if set, do not split strands
+ static unsigned& template_only()
+ {
+ static unsigned _template_only = 0;
+ return _template_only;
+ }
+
+ // trim margins: after start, before end, before hairpin start, after hairpin end
+ static std::array< unsigned, 4 >& trim_margins()
+ {
+ static std::array< unsigned, 4 > _trim_margins = {{ 50u, 50u, 50u, 50u }};
+ return _trim_margins;
+ }
+
Fast5_Summary() : valid(false) {}
Fast5_Summary(const std::string fn, const Pore_Model_Dict_Type& models, bool sst)
: valid(false) { summarize(fn, models, sst); }
@@ -167,17 +181,11 @@ public:
{
read_id = ed_params.read_id;
}
- load_ed_events(&f);
- num_ed_events = ed_events().size();
- if (num_ed_events < 100 + min_ed_events())
+ load_ed_events(&f); // also sets num_ed_events
+ if (num_ed_events < trim_margins()[0] + trim_margins()[1] + min_ed_events())
{
- LOG("Fast5_Summary", info) << file_name << ": not enough eventdetection events: " << num_ed_events << std::endl;
- num_ed_events = 0;
- break;
- }
- if (num_ed_events > max_ed_events())
- {
- LOG("Fast5_Summary", info) << file_name << ": too many eventdetection events: " << num_ed_events << std::endl;
+ LOG("Fast5_Summary", info)
+ << file_name << ": not enough eventdetection events: " << num_ed_events << std::endl;
num_ed_events = 0;
break;
}
@@ -185,12 +193,14 @@ public:
abasic_level = detect_abasic_level();
if (abasic_level <= 1.0)
{
- LOG("Fast5_Summary", info) << file_name << ": abasic level too low: " << abasic_level << std::endl;
+ LOG("Fast5_Summary", info)
+ << file_name << ": abasic level too low: " << abasic_level << std::endl;
num_ed_events = 0;
break;
}
// detect strands
- detect_strands();
+ strand_bounds = {{ trim_margins()[0], num_ed_events - trim_margins()[1], 0, 0 }};
+ if (not template_only()) detect_strands();
if (strand_bounds[1] <= strand_bounds[0])
{
LOG("Fast5_Summary", info) << file_name << ": no template strand detected" << std::endl;
@@ -497,15 +507,26 @@ private:
ed_events_ptr = decltype(ed_events_ptr)(
new typename decltype(ed_events_ptr)::element_type(
f_p->get_eventdetection_events(eventdetection_group())));
+ if (num_ed_events == 0)
+ {
+ if (ed_events().size() > max_ed_events())
+ {
+ LOG("Fast5_Summary", info)
+ << file_name << ": using only " << max_ed_events()
+ << " of " << ed_events().size() << " events" << std::endl;
+ num_ed_events = max_ed_events();
+ }
+ else
+ {
+ num_ed_events = ed_events().size();
+ }
+ }
+ ed_events().resize(num_ed_events);
}
// crude detection of abasic level
Float_Type detect_abasic_level()
{
- if (ed_events().size() < min_ed_events())
- {
- return 0.0;
- }
//
// exclude top abasic_level_top_percent() levels
// add abasic_level_top_offset()
@@ -631,11 +652,6 @@ private:
// crude detection of strands in event sequence
void detect_strands()
{
- if (ed_events().size() < 100u)
- {
- return;
- }
- strand_bounds = { { 50, static_cast< unsigned >(ed_events().size() - 50), 0, 0 } };
LOG("Fast5_Summary", debug)
<< "num_events=" << ed_events().size()
<< " abasic_level=" << abasic_level << std::endl;
@@ -648,7 +664,7 @@ private:
//
for (unsigned i = 1; i < islands.size(); ++i)
{
- if (islands[i - 1].second + 50 >= islands[i].first)
+ if (islands[i - 1].second + std::max(trim_margins()[2], trim_margins()[3]) >= islands[i].first)
{
LOG("Fast5_Summary", debug) << "merge_islands "
<< "[" << islands[i - 1].first << "," << islands[i - 1].second << "] with "
@@ -699,15 +715,15 @@ private:
{
LOG("Fast5_Summary", debug)
<< "hairpin_island [" << it->first << "," << it->second << "]" << std::endl;
- strand_bounds[0] = 50;
- if (islands[0].first < 100)
+ strand_bounds[0] = trim_margins()[0];
+ if (islands[0].first < trim_margins()[0] + trim_margins()[2])
{
strand_bounds[0] = std::max(strand_bounds[0], islands[0].second);
}
- strand_bounds[1] = it->first - 50;
- strand_bounds[2] = it->first + 50;
- strand_bounds[3] = ed_events().size() - 50;
- if (islands[islands.size() - 1].second > ed_events().size() - 100)
+ strand_bounds[1] = it->first - trim_margins()[2];
+ strand_bounds[2] = it->first + trim_margins()[3];
+ strand_bounds[3] = ed_events().size() - trim_margins()[1];
+ if (islands[islands.size() - 1].second > ed_events().size() - (trim_margins()[3] + trim_margins()[1]))
{
strand_bounds[3] = std::min(strand_bounds[3], islands[islands.size() - 1].first);
}
=====================================
src/nanocall/Parameter_Trainer.hpp
=====================================
--- a/src/nanocall/Parameter_Trainer.hpp
+++ b/src/nanocall/Parameter_Trainer.hpp
@@ -62,6 +62,12 @@ struct Parameter_Trainer
return _st_train_kmers;
}
+ static unsigned& pm_train_drift()
+ {
+ static unsigned _pm_train_drift = 1;
+ return _pm_train_drift;
+ }
+
/**
* Struct used for training rounds.
* @event_seq_ptr_v Vector of pairs, first: an event sequence, second: strand from which it comes
@@ -290,13 +296,16 @@ struct Parameter_Trainer
} // for j
A[0][0] += s[0];
A[0][1] += s[1];
- A[0][2] += s[0] * t_i;
A[1][1] += s[2];
- A[1][2] += s[1] * t_i;
- A[2][2] += s[0] * t_i * t_i;
B[0] += s[0] * x_i;
B[1] += s[1] * x_i;
- B[2] += s[0] * x_i * t_i;
+ if (pm_train_drift())
+ {
+ A[0][2] += s[0] * t_i;
+ A[1][2] += s[1] * t_i;
+ A[2][2] += s[0] * t_i * t_i;
+ B[2] += s[0] * x_i * t_i;
+ }
D += s[0] * x_i * x_i;
V_numer += l[2] * y_i;
V_denom += l[1];
@@ -306,6 +315,10 @@ struct Parameter_Trainer
A[1][0] = A[0][1];
A[2][0] = A[0][2];
A[2][1] = A[1][2];
+ if (not pm_train_drift())
+ {
+ A[2][2] = 1.0;
+ }
auto A_copy = A;
auto B_copy = B;
// compute scaling vector used for scaled partial pivoting
@@ -384,7 +397,7 @@ struct Parameter_Trainer
double x = (A_copy[i][0] * a_hat
+ A_copy[i][1] * b_hat
+ A_copy[i][2] * c_hat);
- ASSERT((x - B_copy[i])/std::max(x, B_copy[i]) < 1e-3);
+ ASSERT((x - B_copy[i])/std::max(x, B_copy[i]) < pm_train_drift()? 1e-3 : 1e-2);
}
#endif
//
=====================================
src/nanocall/nanocall.cpp
=====================================
--- a/src/nanocall/nanocall.cpp
+++ b/src/nanocall/nanocall.cpp
@@ -57,6 +57,11 @@ namespace opts
ValueArg< unsigned > chunk_size("", "chunk-size", "Thread chunk size.", false, 1, "int", cmd_parser);
MultiArg< string > log_level("", "log", "Log level. (default: info)", false, "string", cmd_parser);
ValueArg< string > stats_fn("", "stats", "Stats.", false, "", "file", cmd_parser);
+ ValueArg< string > train_drift("", "train-drift", "Train drift parameter. (default: yes for R73, no for R9)", false, "", "0|1", cmd_parser);
+ ValueArg< unsigned > trim_ed_hp_end("", "trim-ed-hp-end", "Number of events to trim after hairpin end.", false, 50, "int", cmd_parser);
+ ValueArg< unsigned > trim_ed_hp_start("", "trim-ed-hp-start", "Number of events to trim before hairpin start.", false, 50, "int", cmd_parser);
+ ValueArg< unsigned > trim_ed_sq_end("", "trim-ed-sq-end", "Number of events to trim before sequence end.", false, 50, "int", cmd_parser);
+ ValueArg< unsigned > trim_ed_sq_start("", "trim-ed-sq-start", "Number of events to trim after sequence start.", false, 50, "int", cmd_parser);
ValueArg< unsigned > max_ed_events("", "max-ed-events", "Maximum EventDetection events.", false, 100000, "int", cmd_parser);
ValueArg< unsigned > min_ed_events("", "min-ed-events", "Minimum EventDetection events.", false, 10, "int", cmd_parser);
ValueArg< unsigned > fasta_line_width("", "fasta-line-width", "Maximum fasta line width.", false, 80, "int", cmd_parser);
@@ -66,6 +71,7 @@ namespace opts
ValueArg< unsigned > scaling_max_rounds("", "scaling-max-rounds", "Maximum scaling rounds.", false, 10, "int", cmd_parser);
ValueArg< unsigned > scaling_num_events("", "scaling-num-events", "Number of events used for model scaling.", false, 200, "int", cmd_parser);
//
+ SwitchArg template_only("", "1d", "Interpret entire read as 1D template only.", cmd_parser);
SwitchArg single_strand_scaling("", "single-strand-scaling", "Train scaling parameters per strand.", cmd_parser);
SwitchArg double_strand_scaling("", "double-strand-scaling", "Train scaling parameters per read. (default)", cmd_parser);
SwitchArg no_train_transitions("", "no-train-transitions", "Do not train state transitions.", cmd_parser);
@@ -919,18 +925,31 @@ int main(int argc, char * argv[])
Fast5_Summary_Type::min_ed_events() = opts::min_ed_events;
Fast5_Summary_Type::max_ed_events() = opts::max_ed_events;
Fast5_Summary_Type::eventdetection_group() = opts::ed_group;
+ Fast5_Summary_Type::template_only() = opts::template_only;
+ Fast5_Summary_Type::trim_margins() = {{ opts::trim_ed_sq_start, opts::trim_ed_sq_end, opts::trim_ed_hp_start, opts::trim_ed_hp_end }};
LOG (info) << "eventdetection_group=" << (Fast5_Summary_Type::eventdetection_group().empty()
? string("smallest")
: Fast5_Summary_Type::eventdetection_group()) << endl;
//
// set pore-related options
//
+ if (not opts::train_drift.get().empty()
+ and opts::train_drift.get() != "0"
+ and opts::train_drift.get() != "1")
+ {
+ LOG(error) << "train-drift not understdood: " << opts::train_drift.get() << endl;
+ return EXIT_FAILURE;
+ }
if (opts::pore.get() == "r9")
{
Fast5_Summary_Type::abasic_level_top_percent() = 1.0;
Fast5_Summary_Type::abasic_level_top_offset() = 0.0;
Fast5_Summary_Type::hairpin_island_window_size() = 10;
Fast5_Summary_Type::hairpin_island_window_load() = 5;
+ if (opts::train_drift.get().empty())
+ {
+ opts::train_drift.get() = "0";
+ }
}
else if (opts::pore.get() == "r73")
{
@@ -938,19 +957,38 @@ int main(int argc, char * argv[])
Fast5_Summary_Type::abasic_level_top_offset() = 5.0;
Fast5_Summary_Type::hairpin_island_window_size() = 5;
Fast5_Summary_Type::hairpin_island_window_load() = 5;
+ if (opts::train_drift.get().empty())
+ {
+ opts::train_drift.get() = "1";
+ }
}
else
{
LOG(error) << "unknown pore type: " << opts::pore.get() << endl;
return EXIT_FAILURE;
}
+ Parameter_Trainer_Type::pm_train_drift() = opts::train_drift.get() == "1";
LOG(info)
- << "hairpin_detection:"
- << " abasic_level_top_percent=" << Fast5_Summary_Type::abasic_level_top_percent()
- << " abasic_level_top_offset=" << Fast5_Summary_Type::abasic_level_top_offset()
- << " hairpin_island_window_size=" << Fast5_Summary_Type::hairpin_island_window_size()
- << " hairpin_island_window_load=" << Fast5_Summary_Type::hairpin_island_window_load()
- << endl;
+ << "ed_event_trimming: "
+ << " sq_start=" << Fast5_Summary_Type::trim_margins()[0]
+ << " sq_end=" << Fast5_Summary_Type::trim_margins()[1]
+ << " hp_start=" << Fast5_Summary_Type::trim_margins()[2]
+ << " hp_end=" << Fast5_Summary_Type::trim_margins()[3] << endl;
+ if (not opts::template_only.get())
+ {
+ LOG(info)
+ << "hairpin_detection:"
+ << " abasic_level_top_percent=" << Fast5_Summary_Type::abasic_level_top_percent()
+ << " abasic_level_top_offset=" << Fast5_Summary_Type::abasic_level_top_offset()
+ << " hairpin_island_window_size=" << Fast5_Summary_Type::hairpin_island_window_size()
+ << " hairpin_island_window_load=" << Fast5_Summary_Type::hairpin_island_window_load()
+ << endl;
+ }
+ else
+ {
+ LOG(info)
+ << "hairpin_detection: disabled" << endl;
+ }
//
// set training option
//
@@ -1034,6 +1072,7 @@ int main(int argc, char * argv[])
LOG(info) << "scaling_max_rounds=" << opts::scaling_max_rounds.get() << endl;
LOG(info) << "scaling_min_progress=" << opts::scaling_min_progress.get() << endl;
LOG(info) << "scaling_select_threshold=" << opts::scaling_select_threshold.get() << endl;
+ LOG(info) << "train_drift=" << opts::train_drift.get() << endl;
}
}
LOG(info) << "basecall=" << opts::basecall.get() << endl;
View it on GitLab: https://salsa.debian.org/med-team/nanocall/compare/51dea237b9c754355db28772f141037cb1b6d4c9...a7d2b4d83dd60d698ed25437d07f3028340fd4c6
---
View it on GitLab: https://salsa.debian.org/med-team/nanocall/compare/51dea237b9c754355db28772f141037cb1b6d4c9...a7d2b4d83dd60d698ed25437d07f3028340fd4c6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180430/b6692af2/attachment-0001.html>
More information about the debian-med-commit
mailing list