[med-svn] [deepnano] 01/01: added autopkgtest
Çağrı ULAŞ
cagriulas-guest at moszumanska.debian.org
Sat Dec 17 05:05:52 UTC 2016
This is an automated email from the git hooks/post-receive script.
cagriulas-guest pushed a commit to branch master
in repository deepnano.
commit 4e029beab17cf15380d395aa23436d229be7e477
Author: Çağrı Ulaş <cagriulas at gmail.com>
Date: Sat Dec 17 07:47:20 2016 +0300
added autopkgtest
---
debian/control | 9 +
.../2016_3_4_3507_1_ch120_read521_strand.fast5.gz | Bin 0 -> 861690 bytes
.../2016_3_4_3507_1_ch13_read1130_strand.fast5.gz | Bin 0 -> 1066806 bytes
.../2016_3_4_3507_1_ch13_read1132_strand.fast5.gz | Bin 0 -> 1320364 bytes
debian/deepnano-data.install | 2 +
debian/deepnano/DEBIAN/control | 15 +
debian/deepnano/DEBIAN/md5sums | 25 ++
debian/deepnano/DEBIAN/postinst | 9 +
debian/deepnano/DEBIAN/prerm | 14 +
debian/deepnano/usr/bin/deepnano_basecall | 5 +
.../usr/bin/deepnano_basecall_no_metrichor | 1 +
debian/deepnano/usr/lib/deepnano/align_2d | Bin 0 -> 43096 bytes
debian/deepnano/usr/lib/deepnano/realign | Bin 0 -> 39000 bytes
debian/deepnano/usr/share/deepnano/basecall.py | 185 ++++++++++
.../usr/share/deepnano/basecall_no_metrichor.py | 277 +++++++++++++++
.../share/deepnano/basecall_no_metrichor_devel.py | 371 +++++++++++++++++++++
debian/deepnano/usr/share/deepnano/helpers.py | 76 +++++
debian/deepnano/usr/share/deepnano/rnn_fin.py | 81 +++++
.../usr/share/doc/deepnano/changelog.Debian.gz | Bin 0 -> 271 bytes
debian/deepnano/usr/share/doc/deepnano/copyright | 36 ++
.../doc/deepnano/examples/nets_data/map5-2d.npz.gz | Bin 0 -> 5082272 bytes
.../deepnano/examples/nets_data/map5comp.npz.gz | Bin 0 -> 1592095 bytes
.../deepnano/examples/nets_data/map5temp.npz.gz | Bin 0 -> 1592084 bytes
.../deepnano/examples/nets_data/map6-2d-big.npz.gz | Bin 0 -> 14015984 bytes
.../examples/nets_data/map6-2d-no-metr.npz.gz | Bin 0 -> 14015890 bytes
.../examples/nets_data/map6-2d-no-metr10.npz.gz | Bin 0 -> 14016340 bytes
.../examples/nets_data/map6-2d-no-metr20.npz.gz | Bin 0 -> 14015359 bytes
.../examples/nets_data/map6-2d-no-metr23.npz.gz | Bin 0 -> 14016230 bytes
.../doc/deepnano/examples/nets_data/map6-2d.npz.gz | Bin 0 -> 5081800 bytes
.../deepnano/examples/nets_data/map6comp.npz.gz | Bin 0 -> 1592557 bytes
.../deepnano/examples/nets_data/map6temp.npz.gz | Bin 0 -> 1592875 bytes
.../2016_3_4_3507_1_ch120_read521_strand.fast5.gz | Bin 0 -> 861647 bytes
.../2016_3_4_3507_1_ch13_read1130_strand.fast5.gz | Bin 0 -> 1066763 bytes
.../2016_3_4_3507_1_ch13_read1132_strand.fast5.gz | Bin 0 -> 1320321 bytes
.../usr/share/python/runtime.d/deepnano.rtupdate | 7 +
debian/source/include-binaries | 3 +
debian/tests/control | 3 +
debian/tests/run-test.sh | 24 ++
38 files changed, 1143 insertions(+)
diff --git a/debian/control b/debian/control
index 77bd8cc..8a20128 100644
--- a/debian/control
+++ b/debian/control
@@ -27,3 +27,12 @@ Description: alternative basecaller for MinION reads of genomic sequences
.
Currently it works with SQK-MAP-006 and SQK-MAP-005 chemistry and as a
postprocessor for Metrichor.
+
+Package: deepnano-data
+Architecture: any
+Depends: deepnano
+Description: alternative basecaller for MinION reads of genomic sequences
+ DeepNano is alternative basecaller for Oxford Nanopore MinION reads
+ based on deep recurrent neural networks.
+ .
+ This package contains deepnanos test data.
diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
new file mode 100644
index 0000000..89e17d1
Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz differ
diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
new file mode 100644
index 0000000..37234d1
Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz differ
diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
new file mode 100644
index 0000000..2c60372
Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz differ
diff --git a/debian/deepnano-data.install b/debian/deepnano-data.install
new file mode 100644
index 0000000..3f05815
--- /dev/null
+++ b/debian/deepnano-data.install
@@ -0,0 +1,2 @@
+nets_data/ usr/share/deepnano-data/
+debian/deepnano-data-files/test_data/ usr/share/deepnano-data/
diff --git a/debian/deepnano/DEBIAN/control b/debian/deepnano/DEBIAN/control
new file mode 100644
index 0000000..40bb851
--- /dev/null
+++ b/debian/deepnano/DEBIAN/control
@@ -0,0 +1,15 @@
+Package: deepnano
+Version: 0.0+20110617-1
+Architecture: amd64
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+Installed-Size: 87902
+Depends: python:any (>= 2.7.5-5~), libc6 (>= 2.2.5), libgcc1 (>= 1:3.0), libstdc++6 (>= 5.2), python-h5py, python-numpy, python-dateutil, python-theano
+Section: science
+Priority: optional
+Homepage: https://bitbucket.org/vboza/deepnano
+Description: alternative basecaller for MinION reads of genomic sequences
+ DeepNano is alternative basecaller for Oxford Nanopore MinION reads
+ based on deep recurrent neural networks.
+ .
+ Currently it works with SQK-MAP-006 and SQK-MAP-005 chemistry and as a
+ postprocessor for Metrichor.
diff --git a/debian/deepnano/DEBIAN/md5sums b/debian/deepnano/DEBIAN/md5sums
new file mode 100644
index 0000000..64127b6
--- /dev/null
+++ b/debian/deepnano/DEBIAN/md5sums
@@ -0,0 +1,25 @@
+cba2f62f9fc586043fc00938b0e932b6 usr/bin/deepnano_basecall
+2b88df4d884e7afa2f22870458c97757 usr/lib/deepnano/align_2d
+bdb5eb7d2d0b3d70145310b7131c8d02 usr/lib/deepnano/realign
+bce23353ab354f2528a5de9661a5230c usr/share/deepnano/basecall.py
+5e1fe3018daa7b36e249c2157411812a usr/share/deepnano/basecall_no_metrichor.py
+3a4ae91d811983676c1f6237c8fec97e usr/share/deepnano/basecall_no_metrichor_devel.py
+115ccfa267eb418b79d57a4aad9b039e usr/share/deepnano/helpers.py
+e9bb97314500d839bb0ec8315a7a4ef9 usr/share/deepnano/rnn_fin.py
+cdf6a037be6f655d9c83430fbcc6f9d4 usr/share/doc/deepnano/changelog.Debian.gz
+35b0edea4c50091a781a9385b8c7705f usr/share/doc/deepnano/copyright
+702509a2bdf2369f5ea14062d5ae7762 usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz
+e6b1b2969b7448accf054142b846ab62 usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz
+fe10cb4e2efb306594eea797ceba70e4 usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz
+fb3755161d24834453c9d9d2f7db9353 usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz
+818c6b69c501943804cf2aca1b5203c3 usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz
+d93a44348cc5b454b15338dccec70b0f usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz
+7872e4100faa2dd13e21549174b0f171 usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz
+a672d7cba84ba1f8aacb36f998dc6866 usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz
+273653b4f06a1529a2448c53a8dcc94c usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz
+af5b1570fe91051b69e013d63bc5d446 usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz
+3e5342e80bad5a6e7193db9956c6380a usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz
+c9a6911fe747ab12be4721e4f543a609 usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
+2f64706324cd5e8f10666f6b19fac14c usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
+3113c8f6d453c1619ea606e7f768e10d usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
+788eec3c08bb9ed41061cccd5f6d9d05 usr/share/python/runtime.d/deepnano.rtupdate
diff --git a/debian/deepnano/DEBIAN/postinst b/debian/deepnano/DEBIAN/postinst
new file mode 100755
index 0000000..5aac91b
--- /dev/null
+++ b/debian/deepnano/DEBIAN/postinst
@@ -0,0 +1,9 @@
+#!/bin/sh
+set -e
+
+# Automatically added by dh_python2:
+if which pycompile >/dev/null 2>&1; then
+ pycompile -p deepnano /usr/share/deepnano
+fi
+
+# End automatically added section
diff --git a/debian/deepnano/DEBIAN/prerm b/debian/deepnano/DEBIAN/prerm
new file mode 100755
index 0000000..a4c1086
--- /dev/null
+++ b/debian/deepnano/DEBIAN/prerm
@@ -0,0 +1,14 @@
+#!/bin/sh
+set -e
+
+# Automatically added by dh_python2:
+if which pyclean >/dev/null 2>&1; then
+ pyclean -p deepnano
+else
+ dpkg -L deepnano | grep \.py$ | while read file
+ do
+ rm -f "${file}"[co] >/dev/null
+ done
+fi
+
+# End automatically added section
diff --git a/debian/deepnano/usr/bin/deepnano_basecall b/debian/deepnano/usr/bin/deepnano_basecall
new file mode 100755
index 0000000..1d79c0a
--- /dev/null
+++ b/debian/deepnano/usr/bin/deepnano_basecall
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+SCRIPT=`basename $0 | sed 's/^deepnano_//'`
+
+/usr/share/deepnano/${SCRIPT}.py $@
diff --git a/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor b/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor
new file mode 120000
index 0000000..2041646
--- /dev/null
+++ b/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor
@@ -0,0 +1 @@
+deepnano_basecall
\ No newline at end of file
diff --git a/debian/deepnano/usr/lib/deepnano/align_2d b/debian/deepnano/usr/lib/deepnano/align_2d
new file mode 100755
index 0000000..6ce2cda
Binary files /dev/null and b/debian/deepnano/usr/lib/deepnano/align_2d differ
diff --git a/debian/deepnano/usr/lib/deepnano/realign b/debian/deepnano/usr/lib/deepnano/realign
new file mode 100755
index 0000000..47dbc8d
Binary files /dev/null and b/debian/deepnano/usr/lib/deepnano/realign differ
diff --git a/debian/deepnano/usr/share/deepnano/basecall.py b/debian/deepnano/usr/share/deepnano/basecall.py
new file mode 100755
index 0000000..aa81f75
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/basecall.py
@@ -0,0 +1,185 @@
+#!/usr/bin/python
+import argparse
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+from helpers import *
+
+def load_read_data(read_file):
+ h5 = h5py.File(read_file, "r")
+ ret = {}
+
+ extract_timing(h5, ret)
+
+ base_loc = get_base_loc(h5)
+
+ try:
+ ret["called_template"] = h5[base_loc+"/BaseCalled_template/Fastq"][()].split('\n')[1]
+ ret["called_complement"] = h5[base_loc+"/BaseCalled_complement/Fastq"][()].split('\n')[1]
+ ret["called_2d"] = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Fastq"][()].split('\n')[1]
+ except Exception as e:
+ pass
+ try:
+ events = h5[base_loc+"/BaseCalled_template/Events"]
+ tscale, tscale_sd, tshift, tdrift = extract_scaling(h5, "template", base_loc)
+ ret["temp_events"] = extract_1d_event_data(
+ h5, "template", base_loc, tscale, tscale_sd, tshift, tdrift)
+ except:
+ pass
+
+ try:
+ cscale, cscale_sd, cshift, cdrift = extract_scaling(h5, "complement", base_loc)
+ ret["comp_events"] = extract_1d_event_data(
+ h5, "complement", base_loc, cscale, cscale_sd, cshift, cdrift)
+ except Exception as e:
+ pass
+
+ try:
+ al = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Alignment"]
+ temp_events = h5[base_loc+"/BaseCalled_template/Events"]
+ comp_events = h5[base_loc+"/BaseCalled_complement/Events"]
+ ret["2d_events"] = []
+ for a in al:
+ ev = []
+ if a[0] == -1:
+ ev += [0, 0, 0, 0, 0]
+ else:
+ e = temp_events[a[0]]
+ mean = (e["mean"] - tshift) / cscale
+ stdv = e["stdv"] / tscale_sd
+ length = e["length"]
+ ev += [1] + preproc_event(mean, stdv, length)
+ if a[1] == -1:
+ ev += [0, 0, 0, 0, 0]
+ else:
+ e = comp_events[a[1]]
+ mean = (e["mean"] - cshift) / cscale
+ stdv = e["stdv"] / cscale_sd
+ length = e["length"]
+ ev += [1] + preproc_event(mean, stdv, length)
+ ret["2d_events"].append(ev)
+ ret["2d_events"] = np.array(ret["2d_events"], dtype=np.float32)
+ except Exception as e:
+ print e
+ pass
+
+ h5.close()
+ return ret
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz")
+parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz")
+parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-big.npz")
+parser.add_argument('reads', type=str, nargs='*')
+parser.add_argument('--timing', action='store_true', default=False)
+parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement")
+parser.add_argument('--output', type=str, default="output.fasta")
+parser.add_argument('--output_orig', action='store_true', default=False)
+parser.add_argument('--directory', type=str, default='', help="Directory where read files are stored")
+
+args = parser.parse_args()
+types = args.type.split(',')
+do_template = False
+do_complement = False
+do_2d = False
+
+if "all" in types or "template" in types:
+ do_template = True
+if "all" in types or "complement" in types:
+ do_complement = True
+if "all" in types or "2d" in types:
+ do_2d = True
+
+assert do_template or do_complement or do_2d, "Nothing to do"
+assert len(args.reads) != 0 or len(args.directory) != 0, "Nothing to basecall"
+
+if do_template:
+ print "loading template net"
+ temp_net = RnnPredictor(args.template_net)
+ print "done"
+if do_complement:
+ print "loading complement net"
+ comp_net = RnnPredictor(args.complement_net)
+ print "done"
+if do_2d:
+ print "loading 2D net"
+ big_net = RnnPredictor(args.big_net)
+ print "done"
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+fo = open(args.output, "w")
+
+total_bases = [0, 0, 0]
+
+files = args.reads
+if len(args.directory):
+ files += [os.path.join(args.directory, x) for x in os.listdir(args.directory)]
+
+for i, read in enumerate(files):
+ basename = os.path.basename(read)
+ try:
+ data = load_read_data(read)
+ except Exception as e:
+ print "error at file", read
+ print e
+ continue
+ if not data:
+ continue
+ print "\rcalling read %d/%d %s" % (i, len(files), read),
+ sys.stdout.flush()
+ if args.output_orig:
+ try:
+ if "called_template" in data:
+ print >>fo, ">%s_template" % basename
+ print >>fo, data["called_template"]
+ if "called_complement" in data:
+ print >>fo, ">%s_complement" % basename
+ print >>fo, data["called_complement"]
+ if "called_2d" in data:
+ print >>fo, ">%s_2d" % basename
+ print >>fo, data["called_2d"]
+ except:
+ pass
+
+ temp_start = datetime.datetime.now()
+ if do_template and "temp_events" in data:
+ predict_and_write(data["temp_events"], temp_net, fo, "%s_template_rnn" % basename)
+ temp_time = datetime.datetime.now() - temp_start
+
+ comp_start = datetime.datetime.now()
+ if do_complement and "comp_events" in data:
+ predict_and_write(data["comp_events"], comp_net, fo, "%s_complement_rnn" % basename)
+ comp_time = datetime.datetime.now() - comp_start
+
+ start_2d = datetime.datetime.now()
+ if do_2d and "2d_events" in data:
+ predict_and_write(data["2d_events"], big_net, fo, "%s_2d_rnn" % basename)
+ time_2d = datetime.datetime.now() - start_2d
+
+ if args.timing:
+ try:
+ print "Events: %d/%d" % (len(data["temp_events"]), len(data["comp_events"]))
+ print "Our times: %f/%f/%f" % (temp_time.total_seconds(), comp_time.total_seconds(),
+ time_2d.total_seconds())
+ print "Our times per base: %f/%f/%f" % (
+ temp_time.total_seconds() / len(data["temp_events"]),
+ comp_time.total_seconds() / len(data["comp_events"]),
+ time_2d.total_seconds() / (len(data["comp_events"]) + len(data["temp_events"])))
+ print "Their times: %f/%f/%f" % (data["temp_time"].total_seconds(), data["comp_time"].total_seconds(), data["2d_time"].total_seconds())
+ print "Their times per base: %f/%f/%f" % (
+ data["temp_time"].total_seconds() / len(data["temp_events"]),
+ data["comp_time"].total_seconds() / len(data["comp_events"]),
+ data["2d_time"].total_seconds() / (len(data["comp_events"]) + len(data["temp_events"])))
+ except:
+ # Don't let timing throw us out
+ pass
+ fo.flush()
+fo.close()
diff --git a/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py
new file mode 100755
index 0000000..50b8dbc
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python
+import argparse
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+from helpers import *
+import subprocess
+import time
+
+def get_scaling_template(events, has_std):
+ down = 48.4631279889
+ up = 65.7312554591
+ our_down = np.percentile(events["mean"], 10)
+ our_up = np.percentile(events["mean"], 90)
+ scale = (our_up - our_down) / (up - down)
+ shift = (our_up / scale - up) * scale
+
+ sd = 0.807981325017
+ if has_std:
+ return scale, np.percentile(events["stdv"], 50) / sd, shift
+ else:
+ return scale, np.sqrt(np.percentile(events["variance"], 50)) / sd, shift
+
+
+def get_scaling_complement(events, has_std):
+ down = 49.2638926877
+ up = 69.0192568072
+ our_down = np.percentile(events["mean"], 10)
+ our_up = np.percentile(events["mean"], 90)
+ scale = (our_up - our_down) / (up - down)
+ shift = (our_up / scale - up) * scale
+
+ sd = 1.04324844612
+ if has_std:
+ return scale, np.percentile(events["stdv"], 50) / sd, shift
+ else:
+ return scale, np.sqrt(np.percentile(events["variance"], 50)) / sd, shift
+
+def template_complement_loc(events):
+ abasic_level = np.percentile(events["mean"], 99) + 5
+ abasic_locs = (events["mean"] > abasic_level).nonzero()[0]
+ last = -47
+ run_len = 1
+ runs = []
+ for x in abasic_locs:
+ if x - last == 1:
+ run_len += 1
+ else:
+ if run_len >= 5:
+ if len(runs) and last - runs[-1][0] < 50:
+ run_len = last - runs[-1][0]
+ run_len += runs[-1][1]
+ runs[-1] = (last, run_len)
+ else:
+ runs.append((last, run_len))
+ run_len = 1
+ last = x
+ to_sort = []
+ mid = len(events) / 2
+ low_third = len(events) / 3
+ high_third = len(events) / 3 * 2
+ for r in runs:
+ if r[0] < low_third:
+ continue
+ if r[0] > high_third:
+ continue
+ to_sort.append((abs(r[0] - mid), r[0] - r[1], r[0]))
+ to_sort.sort()
+ if len(to_sort) == 0:
+ return None
+ trim_size = 10
+ return {"temp": (trim_size, to_sort[0][1] - trim_size),
+ "comp": (to_sort[0][2] + trim_size, len(events) - trim_size)}
+
+def load_read_data(read_file):
+ h5 = h5py.File(read_file, "r")
+ ret = {}
+
+ read_key = h5["Analyses/EventDetection_000/Reads"].keys()[0]
+ base_events = h5["Analyses/EventDetection_000/Reads"][read_key]["Events"]
+ temp_comp_loc = template_complement_loc(base_events)
+ sampling_rate = h5["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
+
+ if temp_comp_loc:
+ events = base_events[temp_comp_loc["temp"][0]:temp_comp_loc["temp"][1]]
+ else:
+ events = base_events
+ has_std = True
+ try:
+ std = events[0]["stdv"]
+ except:
+ has_std = False
+ tscale2, tscale_sd2, tshift2 = get_scaling_template(events, has_std)
+
+ index = 0.0
+ ret["temp_events2"] = []
+ for e in events:
+ mean = (e["mean"] - tshift2) / tscale2
+ if has_std:
+ stdv = e["stdv"] / tscale_sd2
+ else:
+ stdv = np.sqrt(e["variance"]) / tscale_sd2
+ length = e["length"] / sampling_rate
+ ret["temp_events2"].append(preproc_event(mean, stdv, length))
+
+ ret["temp_events2"] = np.array(ret["temp_events2"], dtype=np.float32)
+
+ if not temp_comp_loc:
+ return ret
+
+ events = base_events[temp_comp_loc["comp"][0]:temp_comp_loc["comp"][1]]
+ cscale2, cscale_sd2, cshift2 = get_scaling_complement(events, has_std)
+
+ index = 0.0
+ ret["comp_events2"] = []
+ for e in events:
+ mean = (e["mean"] - cshift2) / cscale2
+ if has_std:
+ stdv = e["stdv"] / cscale_sd2
+ else:
+ stdv = np.sqrt(e["variance"]) / cscale_sd2
+ length = e["length"] / sampling_rate
+ ret["comp_events2"].append(preproc_event(mean, stdv, length))
+
+ ret["comp_events2"] = np.array(ret["comp_events2"], dtype=np.float32)
+
+ return ret
+
+def basecall(read_file_name, fo):
+ basename = os.path.basename(read_file_name)
+ try:
+ data = load_read_data(read_file_name)
+ except Exception as e:
+ print e
+ print "error at file", read_file_name
+ return
+
+ if do_template or do_2d:
+ o1, o2 = predict_and_write(
+ data["temp_events2"], temp_net,
+ fo if do_template else None,
+ "%s_template_rnn" % basename)
+
+ if (do_complement or do_2d) and "comp_events2" in data:
+ o1c, o2c = predict_and_write(
+ data["comp_events2"], comp_net,
+ fo if do_complement else None,
+ "%s_complement_rnn" % basename)
+
+ if do_2d and "comp_events2" in data and\
+ len(data["comp_events2"]) <= args.max_2d_length and\
+ len(data["temp_events2"]) <= args.max_2d_length:
+ p = subprocess.Popen("/usr/lib/deepnano/align_2d", stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ f2d = p.stdin
+ print >>f2d, len(o1)+len(o2)
+ for a, b in zip(o1, o2):
+ print >>f2d, " ".join(map(str, a))
+ print >>f2d, " ".join(map(str, b))
+ print >>f2d, len(o1c)+len(o2c)
+ for a, b in zip(o1c, o2c):
+ print >>f2d, " ".join(map(str, a))
+ print >>f2d, " ".join(map(str, b))
+ f2do, f2de = p.communicate()
+ if p.returncode != 0:
+ return
+ lines = f2do.strip().split('\n')
+ print >>fo, ">%s_2d_rnn_simple" % basename
+ print >>fo, lines[0].strip()
+ events_2d = []
+ for l in lines[1:]:
+ temp_ind, comp_ind = map(int, l.strip().split())
+ e = []
+ if temp_ind == -1:
+ e += [0, 0, 0, 0, 0]
+ else:
+ e += [1] + list(data["temp_events2"][temp_ind])
+ if comp_ind == -1:
+ e += [0, 0, 0, 0, 0]
+ else:
+ e += [1] + list(data["comp_events2"][comp_ind])
+ events_2d.append(e)
+ events_2d = np.array(events_2d, dtype=np.float32)
+ predict_and_write(events_2d, big_net, fo, "%s_2d_rnn" % basename)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz")
+parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz")
+parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-no-metr23.npz")
+parser.add_argument('--max_2d_length', type=int, default=10000, help='Max length for 2d basecall')
+parser.add_argument('reads', type=str, nargs='*')
+parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement")
+parser.add_argument('--output', type=str, default="output.fasta")
+parser.add_argument('--directory', type=str, default='', help="Directory where read files are stored")
+parser.add_argument('--watch', type=str, default='', help='Watched directory')
+
+
+args = parser.parse_args()
+types = args.type.split(',')
+do_template = False
+do_complement = False
+do_2d = False
+
+if "all" in types or "template" in types:
+ do_template = True
+if "all" in types or "complement" in types:
+ do_complement = True
+if "all" in types or "2d" in types:
+ do_2d = True
+
+assert do_template or do_complement or do_2d, "Nothing to do"
+assert len(args.reads) != 0 or len(args.directory) != 0 or len(args.watch) != 0, "Nothing to basecall"
+
+if do_template or do_2d:
+ print "loading template net"
+ temp_net = RnnPredictor(args.template_net)
+ print "done"
+if do_complement or do_2d:
+ print "loading complement net"
+ comp_net = RnnPredictor(args.complement_net)
+ print "done"
+if do_2d:
+ print "loading 2D net"
+ big_net = RnnPredictor(args.big_net)
+ print "done"
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+if len(args.reads) or len(args.directory) != 0:
+ fo = open(args.output, "w")
+
+ files = args.reads
+ if len(args.directory):
+ files += [os.path.join(args.directory, x) for x in os.listdir(args.directory)]
+
+ for i, read in enumerate(files):
+ basecall(read, fo)
+
+ fo.close()
+
+if len(args.watch) != 0:
+ try:
+ from watchdog.observers import Observer
+ from watchdog.events import PatternMatchingEventHandler
+ except:
+ print "Please install watchdog to watch directories"
+ sys.exit()
+
+ class Fast5Handler(PatternMatchingEventHandler):
+ """Class for handling creation fo fast5-files"""
+ patterns = ["*.fast5"]
+ def on_created(self, event):
+ print "Calling", event
+ file_name = str(os.path.basename(event.src_path))
+ fasta_file_name = os.path.splitext(event.src_path)[0] + '.fasta'
+ with open(fasta_file_name, "w") as fo:
+ basecall(event.src_path, fo)
+ print('Watch dir: ' + args.watch)
+ observer = Observer()
+ print('Starting Observerer')
+ # start watching directory for fast5-files
+ observer.start()
+ observer.schedule(Fast5Handler(), path=args.watch)
+ try:
+ while True:
+ time.sleep(1)
+ # quit script using ctrl+c
+ except KeyboardInterrupt:
+ observer.stop()
+
+ observer.join()
diff --git a/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py
new file mode 100644
index 0000000..488fee3
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py
@@ -0,0 +1,371 @@
+import argparse
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+
+def preproc_event(mean, std, length):
+ mean = mean / 100.0 - 0.66
+ std = std - 1
+ return [mean, mean*mean, std, length]
+
+def get_scaling_template(events):
+ down = 48.4631279889
+ up = 65.7312554591
+ our_down = np.percentile(events["mean"], 10)
+ our_up = np.percentile(events["mean"], 90)
+ scale = (our_up - our_down) / (up - down)
+ shift = (our_up / scale - up) * scale
+
+ sd = 0.807981325017
+ return scale, np.percentile(events["stdv"], 50) / sd, shift
+
+def get_scaling_complement(events):
+ down = 49.2638926877
+ up = 69.0192568072
+ our_down = np.percentile(events["mean"], 10)
+ our_up = np.percentile(events["mean"], 90)
+ scale = (our_up - our_down) / (up - down)
+ shift = (our_up / scale - up) * scale
+
+ sd = 1.04324844612
+ return scale, np.percentile(events["stdv"], 50) / sd, shift
+
+def template_complement_loc(events):
+ abasic_level = np.percentile(events["mean"], 99) + 5
+ abasic_locs = (events["mean"] > abasic_level).nonzero()[0]
+ last = -47
+ run_len = 1
+ runs = []
+ for x in abasic_locs:
+ if x - last == 1:
+ run_len += 1
+ else:
+ if run_len >= 5:
+ if len(runs) and last - runs[-1][0] < 50:
+ run_len = last - runs[-1][0]
+ run_len += runs[-1][1]
+ runs[-1] = (last, run_len)
+ else:
+ runs.append((last, run_len))
+ run_len = 1
+ last = x
+ to_sort = []
+ mid = len(events) / 2
+ low_third = len(events) / 3
+ high_third = len(events) / 3 * 2
+ for r in runs:
+ if r[0] < low_third:
+ continue
+ if r[0] > high_third:
+ continue
+ to_sort.append((abs(r[0] - mid), r[0] - r[1], r[0]))
+ to_sort.sort()
+ if len(to_sort) == 0:
+ return None
+ trim_size = 10
+ return {"temp": (trim_size, to_sort[0][1] - trim_size),
+ "comp": (to_sort[0][2] + trim_size, len(events) - trim_size)}
+
+def load_read_data(read_file):
+ h5 = h5py.File(read_file, "r")
+ ret = {}
+
+ read_key = h5["Analyses/EventDetection_000/Reads"].keys()[0]
+ base_events = h5["Analyses/EventDetection_000/Reads"][read_key]["Events"]
+ temp_comp_loc = template_complement_loc(base_events)
+ if not temp_comp_loc:
+ return None
+
+# print "temp_comp_loc", temp_comp_loc["temp"], temp_comp_loc["comp"]
+# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["start_index_temp"],
+# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["end_index_temp"],
+# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["start_index_comp"],
+# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["end_index_comp"]
+
+ sampling_rate = h5["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
+
+ try:
+ ret["called_template"] = h5["Analyses/Basecall_2D_000/BaseCalled_template/Fastq"][()].split('\n')[1]
+ ret["called_complement"] = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Fastq"][()].split('\n')[1]
+ ret["called_2d"] = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Fastq"][()].split('\n')[1]
+ except Exception as e:
+ print "wat", e
+ return None
+ events = base_events[temp_comp_loc["temp"][0]:temp_comp_loc["temp"][1]]
+ tscale2, tscale_sd2, tshift2 = get_scaling_template(events)
+
+ index = 0.0
+ ret["temp_events2"] = []
+ for e in events:
+ mean = (e["mean"] - tshift2) / tscale2
+ stdv = e["stdv"] / tscale_sd2
+ length = e["length"] / sampling_rate
+ ret["temp_events2"].append(preproc_event(mean, stdv, length))
+ events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"]
+ tscale = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["scale"]
+ tscale_sd = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["scale_sd"]
+ tshift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["shift"]
+ tdrift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["drift"]
+ index = 0.0
+ ret["temp_events"] = []
+ for e in events:
+ mean = (e["mean"] - tshift - index * tdrift) / tscale
+ stdv = e["stdv"] / tscale_sd
+ length = e["length"]
+ ret["temp_events"].append(preproc_event(mean, stdv, length))
+ index += e["length"]
+
+ events = base_events[temp_comp_loc["comp"][0]:temp_comp_loc["comp"][1]]
+ cscale2, cscale_sd2, cshift2 = get_scaling_complement(events)
+
+ index = 0.0
+ ret["comp_events2"] = []
+ for e in events:
+ mean = (e["mean"] - cshift2) / cscale2
+ stdv = e["stdv"] / cscale_sd2
+ length = e["length"] / sampling_rate
+ ret["comp_events2"].append(preproc_event(mean, stdv, length))
+
+ events = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Events"]
+ cscale = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["scale"]
+ cscale_sd = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["scale_sd"]
+ cshift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["shift"]
+ cdrift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["drift"]
+ index = 0.0
+ ret["comp_events"] = []
+ for e in events:
+ mean = (e["mean"] - cshift - index * cdrift) / cscale
+ stdv = e["stdv"] / cscale_sd
+ length = e["length"]
+ ret["comp_events"].append(preproc_event(mean, stdv, length))
+ index += e["length"]
+
+ ret["temp_events2"] = np.array(ret["temp_events2"], dtype=np.float32)
+ ret["comp_events2"] = np.array(ret["comp_events2"], dtype=np.float32)
+ ret["temp_events"] = np.array(ret["temp_events"], dtype=np.float32)
+ ret["comp_events"] = np.array(ret["comp_events"], dtype=np.float32)
+
+ al = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Alignment"]
+ ret["al"] = al
+ temp_events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"]
+ comp_events = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Events"]
+ ret["2d_events"] = []
+ for a in al:
+ ev = []
+ if a[0] == -1:
+ ev += [0, 0, 0, 0, 0]
+ else:
+ e = temp_events[a[0]]
+ mean = (e["mean"] - tshift - index * tdrift) / cscale
+ stdv = e["stdv"] / tscale_sd
+ length = e["length"]
+ ev += [1] + preproc_event(mean, stdv, length)
+ if a[1] == -1:
+ ev += [0, 0, 0, 0, 0]
+ else:
+ e = comp_events[a[1]]
+ mean = (e["mean"] - cshift - index * cdrift) / cscale
+ stdv = e["stdv"] / cscale_sd
+ length = e["length"]
+ ev += [1] + preproc_event(mean, stdv, length)
+ ret["2d_events"].append(ev)
+ ret["2d_events"] = np.array(ret["2d_events"], dtype=np.float32)
+ return ret
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz")
+parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz")
+parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-big.npz")
+parser.add_argument('reads', type=str, nargs='+')
+parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement")
+parser.add_argument('--output', type=str, default="output.fasta")
+parser.add_argument('--output_orig', action='store_true', default=True)
+
+args = parser.parse_args()
+types = args.type.split(',')
+do_template = False
+do_complement = False
+do_2d = False
+
+if "all" in types or "template" in types:
+ do_template = True
+if "all" in types or "complement" in types:
+ do_complement = True
+if "all" in types or "2d" in types:
+ do_2d = True
+
+assert do_template or do_complement or do_2d, "Nothing to do"
+
+if do_template or do_2d:
+ print "loading template net"
+ temp_net = RnnPredictor(args.template_net)
+ print "done"
+if do_complement or do_2d:
+ print "loading complement net"
+ comp_net = RnnPredictor(args.complement_net)
+ print "done"
+if do_2d:
+ print "loading 2D net"
+ big_net = RnnPredictor(args.big_net)
+ big_net_orig = RnnPredictor("nets_data/map6-2d-big.npz")
+ print "done"
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+fo = open(args.output, "w")
+
+total_bases = [0, 0, 0]
+
+for i, read in enumerate(args.reads):
+ if True:
+ data = load_read_data(read)
+# except Exception as e:
+# print e
+# print "error at file", read
+# continue
+ if not data:
+ continue
+ if args.output_orig:
+ print >>fo, ">%d_template" % i
+ print >>fo, data["called_template"]
+ print >>fo, ">%d_complement" % i
+ print >>fo, data["called_complement"]
+ print >>fo, ">%d_2d" % i
+ print >>fo, data["called_2d"]
+
+ if do_template or do_2d:
+ o1, o2 = temp_net.predict(data["temp_events"])
+ o1m = (np.argmax(o1, 1))
+ o2m = (np.argmax(o2, 1))
+ print >>fo, ">%d_temp_rnn" % i
+ for a, b in zip(o1m, o2m):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+ o1, o2 = temp_net.predict(data["temp_events2"])
+ o1m = (np.argmax(o1, 1))
+ o2m = (np.argmax(o2, 1))
+ if do_template:
+ print >>fo, ">%d_temp_rnn2" % i
+ for a, b in zip(o1m, o2m):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+
+ if do_complement or do_2d:
+ o1c, o2c = comp_net.predict(data["comp_events"])
+ o1cm = (np.argmax(o1c, 1))
+ o2cm = (np.argmax(o2c, 1))
+ print >>fo, ">%d_comp_rnn" % i
+ for a, b in zip(o1cm, o2cm):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+ o1c, o2c = comp_net.predict(data["comp_events2"])
+ o1cm = (np.argmax(o1c, 1))
+ o2cm = (np.argmax(o2c, 1))
+ if do_complement:
+ print >>fo, ">%d_comp_rnn2" % i
+ for a, b in zip(o1cm, o2cm):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+
+ if do_2d:
+ f2d = open("2d.in", "w")
+ print >>f2d, len(o1)+len(o2)
+ for a, b in zip(o1, o2):
+ print >>f2d, " ".join(map(str, a))
+ print >>f2d, " ".join(map(str, b))
+ print >>f2d, len(o1c)+len(o2c)
+ for a, b in zip(o1c, o2c):
+ print >>f2d, " ".join(map(str, a))
+ print >>f2d, " ".join(map(str, b))
+ f2d.close()
+ os.system("/usr/lib/deepnano/align_2d <2d.in >2d.out")
+ f2do = open("2d.out")
+ call2d = f2do.next().strip()
+ print >>fo, ">%d_2d_rnn_simple" % i
+ print >>fo, call2d
+
+ start_temp_ours = None
+ end_temp_ours = None
+ start_comp_ours = None
+ end_comp_ours = None
+ events_2d = []
+ for l in f2do:
+ temp_ind, comp_ind = map(int, l.strip().split())
+ e = []
+ if temp_ind == -1:
+ e += [0, 0, 0, 0, 0]
+ else:
+ e += [1] + list(data["temp_events2"][temp_ind])
+ if not start_temp_ours:
+ start_temp_ours = temp_ind
+ end_temp_ours = temp_ind
+ if comp_ind == -1:
+ e += [0, 0, 0, 0, 0]
+ else:
+ e += [1] + list(data["comp_events2"][comp_ind])
+ if not end_comp_ours:
+ end_comp_ours = comp_ind
+ start_comp_ours = comp_ind
+ events_2d.append(e)
+ events_2d = np.array(events_2d, dtype=np.float32)
+ o1c, o2c = big_net.predict(events_2d)
+ o1cm = (np.argmax(o1c, 1))
+ o2cm = (np.argmax(o2c, 1))
+ print >>fo, ">%d_2d_rnn2" % i
+ for a, b in zip(o1cm, o2cm):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+ o1c, o2c = big_net.predict(data["2d_events"])
+ o1cm = (np.argmax(o1c, 1))
+ o2cm = (np.argmax(o2c, 1))
+ print >>fo, ">%d_2d_rnn" % i
+ for a, b in zip(o1cm, o2cm):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+
+ start_temp_th = None
+ end_temp_th = None
+ start_comp_th = None
+ end_comp_th = None
+ for a in data["al"]:
+ if a[0] != -1:
+ if not start_temp_th:
+ start_temp_th = a[0]
+ end_temp_th = a[0]
+ if a[1] != -1:
+ if not end_comp_th:
+ end_comp_th = a[1]
+ start_comp_th = a[1]
+
+ print "Ours:",
+ print start_temp_ours, end_temp_ours, start_comp_ours, end_comp_ours,
+ print 1. * len(events_2d) / (end_temp_ours - start_temp_ours + end_comp_ours - start_comp_ours)
+ print "Their:",
+ print start_temp_th, end_temp_th, start_comp_th, end_comp_th,
+ print 1. * len(data["al"]) / (end_temp_th - start_temp_th + end_comp_th - start_comp_th)
+ print
diff --git a/debian/deepnano/usr/share/deepnano/helpers.py b/debian/deepnano/usr/share/deepnano/helpers.py
new file mode 100644
index 0000000..6808562
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/helpers.py
@@ -0,0 +1,76 @@
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+import argparse
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+def preproc_event(mean, std, length):
+ mean = mean / 100.0 - 0.66
+ std = std - 1
+ return [mean, mean*mean, std, length]
+
+def predict_and_write(events, ntwk, fo, read_name):
+ o1, o2 = ntwk.predict(events)
+ if fo:
+ o1m = (np.argmax(o1, 1))
+ o2m = (np.argmax(o2, 1))
+ print >>fo, ">%s" % read_name
+ for a, b in zip(o1m, o2m):
+ if a < 4:
+ fo.write(chars[a])
+ if b < 4:
+ fo.write(chars[b])
+ fo.write('\n')
+ return o1, o2
+
+def extract_timing(h5, ret):
+ try:
+ log = h5["Analyses/Basecall_2D_000/Log"][()]
+ temp_time = dateutil.parser.parse(re.search(r"(.*) Basecalling template.*", log).groups()[0])
+ comp_time = dateutil.parser.parse(re.search(r"(.*) Basecalling complement.*", log).groups()[0])
+ comp_end_time = dateutil.parser.parse(re.search(r"(.*) Aligning hairpin.*", log).groups()[0])
+
+ start_2d_time = dateutil.parser.parse(re.search(r"(.*) Performing full 2D.*", log).groups()[0])
+ end_2d_time = dateutil.parser.parse(re.search(r"(.*) Workflow completed.*", log).groups()[0])
+
+ ret["temp_time"] = comp_time - temp_time
+ ret["comp_time"] = comp_end_time - comp_time
+ ret["2d_time"] = end_2d_time - start_2d_time
+ except:
+ pass
+
+def get_base_loc(h5):
+ base_loc = "Analyses/Basecall_2D_000"
+ try:
+ events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"]
+ except:
+ base_loc = "Analyses/Basecall_1D_000"
+ return base_loc
+
+def extract_scaling(h5, read_type, base_loc):
+ scale = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["scale"]
+ scale_sd = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["scale_sd"]
+ shift = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["shift"]
+ drift = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["drift"]
+ return scale, scale_sd, shift, drift
+
+def extract_1d_event_data(h5, read_type, base_loc, scale, scale_sd, shift, drift):
+ events = h5[base_loc+"/BaseCalled_%s/Events" % read_type]
+ index = 0.0
+ data = []
+ for e in events:
+ mean = (e["mean"] - shift - index * drift) / scale
+ stdv = e["stdv"] / scale_sd
+ length = e["length"]
+ data.append(preproc_event(mean, stdv, length))
+ index += e["length"]
+ return np.array(data, dtype=np.float32)
+
diff --git a/debian/deepnano/usr/share/deepnano/rnn_fin.py b/debian/deepnano/usr/share/deepnano/rnn_fin.py
new file mode 100644
index 0000000..a1795e8
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/rnn_fin.py
@@ -0,0 +1,81 @@
+import theano as th
+import theano.tensor as T
+from theano.tensor.nnet import sigmoid
+import numpy as np
+import pickle
+
+def share(array, dtype=th.config.floatX, name=None):
+ return th.shared(value=np.asarray(array, dtype=dtype), name=name)
+
+class OutLayer:
+ def __init__(self, input, in_size, n_classes):
+ w = share(np.zeros((in_size, n_classes)))
+ b = share(np.zeros(n_classes))
+ eps = 0.0000001
+ self.output = T.clip(T.nnet.softmax(T.dot(input, w) + b), eps, 1-eps)
+ self.params = [w, b]
+
+class SimpleLayer:
+ def __init__(self, input, nin, nunits):
+ id = str(np.random.randint(0, 10000000))
+ wio = share(np.zeros((nin, nunits)), name="wio"+id) # input to output
+ wir = share(np.zeros((nin, nunits)), name="wir"+id) # input to output
+ wiu = share(np.zeros((nin, nunits)), name="wiu"+id) # input to output
+ woo = share(np.zeros((nunits, nunits)), name="woo"+id) # output to output
+ wou = share(np.zeros((nunits, nunits)), name="wou"+id) # output to output
+ wor = share(np.zeros((nunits, nunits)), name="wor"+id) # output to output
+ bo = share(np.zeros(nunits), name="bo"+id)
+ bu = share(np.zeros(nunits), name="bu"+id)
+ br = share(np.zeros(nunits), name="br"+id)
+ h0 = share(np.zeros(nunits), name="h0"+id)
+
+ def step(in_t, out_tm1):
+ update_gate = sigmoid(T.dot(out_tm1, wou) + T.dot(in_t, wiu) + bu)
+ reset_gate = sigmoid(T.dot(out_tm1, wor) + T.dot(in_t, wir) + br)
+ new_val = T.tanh(T.dot(in_t, wio) + reset_gate * T.dot(out_tm1, woo) + bo)
+ return update_gate * out_tm1 + (1 - update_gate) * new_val
+
+ self.output, _ = th.scan(
+ step, sequences=[input],
+ outputs_info=[h0])
+
+ self.params = [wio, woo, bo, wir, wiu, wor, wou, br, bu, h0]
+
+class BiSimpleLayer():
+ def __init__(self, input, nin, nunits):
+ fwd = SimpleLayer(input, nin, nunits)
+ bwd = SimpleLayer(input[::-1], nin, nunits)
+ self.params = fwd.params + bwd.params
+ self.output = T.concatenate([fwd.output, bwd.output[::-1]], axis=1)
+
+class RnnPredictor:
+ def __init__(self, filename):
+ package = np.load(filename)
+ assert(len(package.files) % 20 == 4)
+ n_layers = len(package.files) / 20
+
+ self.input = T.fmatrix()
+ last_output = self.input
+ last_size = package['arr_0'].shape[0]
+ hidden_size = package['arr_0'].shape[1]
+ par_index = 0
+ for i in range(n_layers):
+ layer = BiSimpleLayer(last_output, last_size, hidden_size)
+ for i in range(20):
+ layer.params[i].set_value(package['arr_%d' % par_index])
+ par_index += 1
+
+ last_output = layer.output
+ last_size = 2*hidden_size
+ out_layer1 = OutLayer(last_output, last_size, 5)
+ for i in range(2):
+ out_layer1.params[i].set_value(package['arr_%d' % par_index])
+ par_index += 1
+ out_layer2 = OutLayer(last_output, last_size, 5)
+ for i in range(2):
+ out_layer2.params[i].set_value(package['arr_%d' % par_index])
+ par_index += 1
+ output1 = out_layer1.output
+ output2 = out_layer2.output
+
+ self.predict = th.function(inputs=[self.input], outputs=[output1, output2])
diff --git a/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz b/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz
new file mode 100644
index 0000000..e9af2e1
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/copyright b/debian/deepnano/usr/share/doc/deepnano/copyright
new file mode 100644
index 0000000..573e566
--- /dev/null
+++ b/debian/deepnano/usr/share/doc/deepnano/copyright
@@ -0,0 +1,36 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: DeepNano
+Source: https://bitbucket.org/vboza/deepnano
+Files-Excluded: training/realign
+
+Files: *
+Copyright: 2016, Vladimir Boza, Comenius University
+License: BSD-3-clause
+
+Files: debian/*
+Copyright: 2016 Andreas Tille <tille at debian.org>
+License: BSD-3-clause
+
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the Comenius University nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL COMENIUS UNIVERSITY BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz
new file mode 100644
index 0000000..d08f7f0
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz
new file mode 100644
index 0000000..18ade24
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz
new file mode 100644
index 0000000..9ec060f
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz
new file mode 100644
index 0000000..3767dcb
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz
new file mode 100644
index 0000000..3593302
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz
new file mode 100644
index 0000000..aa6558f
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz
new file mode 100644
index 0000000..07ca3cc
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz
new file mode 100644
index 0000000..98b4293
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz
new file mode 100644
index 0000000..8c472c0
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz
new file mode 100644
index 0000000..f6e0bd4
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz
new file mode 100644
index 0000000..12e5a7a
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
new file mode 100644
index 0000000..44756f3
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
new file mode 100644
index 0000000..8aa7850
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
new file mode 100644
index 0000000..699f576
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz differ
diff --git a/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate b/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate
new file mode 100755
index 0000000..4563b9e
--- /dev/null
+++ b/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate
@@ -0,0 +1,7 @@
+#! /bin/sh
+set -e
+
+if [ "$1" = rtupdate ]; then
+ pyclean -p deepnano /usr/share/deepnano
+ pycompile -p deepnano /usr/share/deepnano
+fi
\ No newline at end of file
diff --git a/debian/source/include-binaries b/debian/source/include-binaries
new file mode 100644
index 0000000..1e5cce3
--- /dev/null
+++ b/debian/source/include-binaries
@@ -0,0 +1,3 @@
+debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
+debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
+debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
diff --git a/debian/tests/control b/debian/tests/control
new file mode 100644
index 0000000..a4ece15
--- /dev/null
+++ b/debian/tests/control
@@ -0,0 +1,3 @@
+Tests: run-test.sh
+Depends: @, deepnano-data
+Restrictions: allow-stderr
diff --git a/debian/tests/run-test.sh b/debian/tests/run-test.sh
new file mode 100644
index 0000000..7b7aaf6
--- /dev/null
+++ b/debian/tests/run-test.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+pkg_name="deepnano"
+test_required_pkg="deepnano-data"
+
+if [ "$AUTOPKGTEST_TMP" = "" ] ; then
+ AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
+ trap "rm -rf $AUTOPKGTEST_TMP" 0 INT QUIT ABRT PIPE TERM
+fi
+
+cp -a /usr/share/${test_required_pkg}/* $AUTOPKGTEST_TMP
+
+cd $AUTOPKGTEST_TMP
+find . -name "*gz" -exec gunzip \{\} \;
+
+echo -e "\n#1 - deepnano_basecall"
+OMP_NUM_THREADS=`nproc` deepnano_basecall test_data/*
+cat output.fasta
+
+echo -e "\n#2 - deepnano_basecall_no_metrichor"
+OMP_NUM_THREADS=`nproc` deepnano_basecall_no_metrichor test_data/*
+
+echo "PASS"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/deepnano.git
More information about the debian-med-commit
mailing list