[med-svn] [deepnano] 01/01: added autopkgtest

Çağrı ULAŞ cagriulas-guest at moszumanska.debian.org
Sat Dec 17 05:05:52 UTC 2016


This is an automated email from the git hooks/post-receive script.

cagriulas-guest pushed a commit to branch master
in repository deepnano.

commit 4e029beab17cf15380d395aa23436d229be7e477
Author: Çağrı Ulaş <cagriulas at gmail.com>
Date:   Sat Dec 17 07:47:20 2016 +0300

    added autopkgtest
---
 debian/control                                     |   9 +
 .../2016_3_4_3507_1_ch120_read521_strand.fast5.gz  | Bin 0 -> 861690 bytes
 .../2016_3_4_3507_1_ch13_read1130_strand.fast5.gz  | Bin 0 -> 1066806 bytes
 .../2016_3_4_3507_1_ch13_read1132_strand.fast5.gz  | Bin 0 -> 1320364 bytes
 debian/deepnano-data.install                       |   2 +
 debian/deepnano/DEBIAN/control                     |  15 +
 debian/deepnano/DEBIAN/md5sums                     |  25 ++
 debian/deepnano/DEBIAN/postinst                    |   9 +
 debian/deepnano/DEBIAN/prerm                       |  14 +
 debian/deepnano/usr/bin/deepnano_basecall          |   5 +
 .../usr/bin/deepnano_basecall_no_metrichor         |   1 +
 debian/deepnano/usr/lib/deepnano/align_2d          | Bin 0 -> 43096 bytes
 debian/deepnano/usr/lib/deepnano/realign           | Bin 0 -> 39000 bytes
 debian/deepnano/usr/share/deepnano/basecall.py     | 185 ++++++++++
 .../usr/share/deepnano/basecall_no_metrichor.py    | 277 +++++++++++++++
 .../share/deepnano/basecall_no_metrichor_devel.py  | 371 +++++++++++++++++++++
 debian/deepnano/usr/share/deepnano/helpers.py      |  76 +++++
 debian/deepnano/usr/share/deepnano/rnn_fin.py      |  81 +++++
 .../usr/share/doc/deepnano/changelog.Debian.gz     | Bin 0 -> 271 bytes
 debian/deepnano/usr/share/doc/deepnano/copyright   |  36 ++
 .../doc/deepnano/examples/nets_data/map5-2d.npz.gz | Bin 0 -> 5082272 bytes
 .../deepnano/examples/nets_data/map5comp.npz.gz    | Bin 0 -> 1592095 bytes
 .../deepnano/examples/nets_data/map5temp.npz.gz    | Bin 0 -> 1592084 bytes
 .../deepnano/examples/nets_data/map6-2d-big.npz.gz | Bin 0 -> 14015984 bytes
 .../examples/nets_data/map6-2d-no-metr.npz.gz      | Bin 0 -> 14015890 bytes
 .../examples/nets_data/map6-2d-no-metr10.npz.gz    | Bin 0 -> 14016340 bytes
 .../examples/nets_data/map6-2d-no-metr20.npz.gz    | Bin 0 -> 14015359 bytes
 .../examples/nets_data/map6-2d-no-metr23.npz.gz    | Bin 0 -> 14016230 bytes
 .../doc/deepnano/examples/nets_data/map6-2d.npz.gz | Bin 0 -> 5081800 bytes
 .../deepnano/examples/nets_data/map6comp.npz.gz    | Bin 0 -> 1592557 bytes
 .../deepnano/examples/nets_data/map6temp.npz.gz    | Bin 0 -> 1592875 bytes
 .../2016_3_4_3507_1_ch120_read521_strand.fast5.gz  | Bin 0 -> 861647 bytes
 .../2016_3_4_3507_1_ch13_read1130_strand.fast5.gz  | Bin 0 -> 1066763 bytes
 .../2016_3_4_3507_1_ch13_read1132_strand.fast5.gz  | Bin 0 -> 1320321 bytes
 .../usr/share/python/runtime.d/deepnano.rtupdate   |   7 +
 debian/source/include-binaries                     |   3 +
 debian/tests/control                               |   3 +
 debian/tests/run-test.sh                           |  24 ++
 38 files changed, 1143 insertions(+)

diff --git a/debian/control b/debian/control
index 77bd8cc..8a20128 100644
--- a/debian/control
+++ b/debian/control
@@ -27,3 +27,12 @@ Description: alternative basecaller for MinION reads of genomic sequences
  .
  Currently it works with SQK-MAP-006 and SQK-MAP-005 chemistry and as a
  postprocessor for Metrichor.
+
+Package: deepnano-data
+Architecture: any
+Depends: deepnano
+Description: alternative basecaller for MinION reads of genomic sequences
+ DeepNano is alternative basecaller for Oxford Nanopore MinION reads
+ based on deep recurrent neural networks.
+ .
+ This package contains deepnanos test data.
diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
new file mode 100644
index 0000000..89e17d1
Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz differ
diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
new file mode 100644
index 0000000..37234d1
Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz differ
diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
new file mode 100644
index 0000000..2c60372
Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz differ
diff --git a/debian/deepnano-data.install b/debian/deepnano-data.install
new file mode 100644
index 0000000..3f05815
--- /dev/null
+++ b/debian/deepnano-data.install
@@ -0,0 +1,2 @@
+nets_data/ usr/share/deepnano-data/
+debian/deepnano-data-files/test_data/ usr/share/deepnano-data/
diff --git a/debian/deepnano/DEBIAN/control b/debian/deepnano/DEBIAN/control
new file mode 100644
index 0000000..40bb851
--- /dev/null
+++ b/debian/deepnano/DEBIAN/control
@@ -0,0 +1,15 @@
+Package: deepnano
+Version: 0.0+20110617-1
+Architecture: amd64
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+Installed-Size: 87902
+Depends: python:any (>= 2.7.5-5~), libc6 (>= 2.2.5), libgcc1 (>= 1:3.0), libstdc++6 (>= 5.2), python-h5py, python-numpy, python-dateutil, python-theano
+Section: science
+Priority: optional
+Homepage: https://bitbucket.org/vboza/deepnano
+Description: alternative basecaller for MinION reads of genomic sequences
+ DeepNano is alternative basecaller for Oxford Nanopore MinION reads
+ based on deep recurrent neural networks.
+ .
+ Currently it works with SQK-MAP-006 and SQK-MAP-005 chemistry and as a
+ postprocessor for Metrichor.
diff --git a/debian/deepnano/DEBIAN/md5sums b/debian/deepnano/DEBIAN/md5sums
new file mode 100644
index 0000000..64127b6
--- /dev/null
+++ b/debian/deepnano/DEBIAN/md5sums
@@ -0,0 +1,25 @@
+cba2f62f9fc586043fc00938b0e932b6  usr/bin/deepnano_basecall
+2b88df4d884e7afa2f22870458c97757  usr/lib/deepnano/align_2d
+bdb5eb7d2d0b3d70145310b7131c8d02  usr/lib/deepnano/realign
+bce23353ab354f2528a5de9661a5230c  usr/share/deepnano/basecall.py
+5e1fe3018daa7b36e249c2157411812a  usr/share/deepnano/basecall_no_metrichor.py
+3a4ae91d811983676c1f6237c8fec97e  usr/share/deepnano/basecall_no_metrichor_devel.py
+115ccfa267eb418b79d57a4aad9b039e  usr/share/deepnano/helpers.py
+e9bb97314500d839bb0ec8315a7a4ef9  usr/share/deepnano/rnn_fin.py
+cdf6a037be6f655d9c83430fbcc6f9d4  usr/share/doc/deepnano/changelog.Debian.gz
+35b0edea4c50091a781a9385b8c7705f  usr/share/doc/deepnano/copyright
+702509a2bdf2369f5ea14062d5ae7762  usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz
+e6b1b2969b7448accf054142b846ab62  usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz
+fe10cb4e2efb306594eea797ceba70e4  usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz
+fb3755161d24834453c9d9d2f7db9353  usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz
+818c6b69c501943804cf2aca1b5203c3  usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz
+d93a44348cc5b454b15338dccec70b0f  usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz
+7872e4100faa2dd13e21549174b0f171  usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz
+a672d7cba84ba1f8aacb36f998dc6866  usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz
+273653b4f06a1529a2448c53a8dcc94c  usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz
+af5b1570fe91051b69e013d63bc5d446  usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz
+3e5342e80bad5a6e7193db9956c6380a  usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz
+c9a6911fe747ab12be4721e4f543a609  usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
+2f64706324cd5e8f10666f6b19fac14c  usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
+3113c8f6d453c1619ea606e7f768e10d  usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
+788eec3c08bb9ed41061cccd5f6d9d05  usr/share/python/runtime.d/deepnano.rtupdate
diff --git a/debian/deepnano/DEBIAN/postinst b/debian/deepnano/DEBIAN/postinst
new file mode 100755
index 0000000..5aac91b
--- /dev/null
+++ b/debian/deepnano/DEBIAN/postinst
@@ -0,0 +1,9 @@
+#!/bin/sh
+set -e
+
+# Automatically added by dh_python2:
+if which pycompile >/dev/null 2>&1; then
+	pycompile -p deepnano /usr/share/deepnano
+fi
+
+# End automatically added section
diff --git a/debian/deepnano/DEBIAN/prerm b/debian/deepnano/DEBIAN/prerm
new file mode 100755
index 0000000..a4c1086
--- /dev/null
+++ b/debian/deepnano/DEBIAN/prerm
@@ -0,0 +1,14 @@
+#!/bin/sh
+set -e
+
+# Automatically added by dh_python2:
+if which pyclean >/dev/null 2>&1; then
+	pyclean -p deepnano 
+else
+	dpkg -L deepnano | grep \.py$ | while read file
+	do
+		rm -f "${file}"[co] >/dev/null
+  	done
+fi
+
+# End automatically added section
diff --git a/debian/deepnano/usr/bin/deepnano_basecall b/debian/deepnano/usr/bin/deepnano_basecall
new file mode 100755
index 0000000..1d79c0a
--- /dev/null
+++ b/debian/deepnano/usr/bin/deepnano_basecall
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+SCRIPT=`basename $0 | sed 's/^deepnano_//'`
+
+/usr/share/deepnano/${SCRIPT}.py $@
diff --git a/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor b/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor
new file mode 120000
index 0000000..2041646
--- /dev/null
+++ b/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor
@@ -0,0 +1 @@
+deepnano_basecall
\ No newline at end of file
diff --git a/debian/deepnano/usr/lib/deepnano/align_2d b/debian/deepnano/usr/lib/deepnano/align_2d
new file mode 100755
index 0000000..6ce2cda
Binary files /dev/null and b/debian/deepnano/usr/lib/deepnano/align_2d differ
diff --git a/debian/deepnano/usr/lib/deepnano/realign b/debian/deepnano/usr/lib/deepnano/realign
new file mode 100755
index 0000000..47dbc8d
Binary files /dev/null and b/debian/deepnano/usr/lib/deepnano/realign differ
diff --git a/debian/deepnano/usr/share/deepnano/basecall.py b/debian/deepnano/usr/share/deepnano/basecall.py
new file mode 100755
index 0000000..aa81f75
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/basecall.py
@@ -0,0 +1,185 @@
+#!/usr/bin/python
+import argparse
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+from helpers import *
+
+def load_read_data(read_file):
+  h5 = h5py.File(read_file, "r")
+  ret = {}
+
+  extract_timing(h5, ret)
+
+  base_loc = get_base_loc(h5)
+
+  try:
+    ret["called_template"] = h5[base_loc+"/BaseCalled_template/Fastq"][()].split('\n')[1]
+    ret["called_complement"] = h5[base_loc+"/BaseCalled_complement/Fastq"][()].split('\n')[1]
+    ret["called_2d"] = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Fastq"][()].split('\n')[1]
+  except Exception as e:
+    pass
+  try:
+    events = h5[base_loc+"/BaseCalled_template/Events"]
+    tscale, tscale_sd, tshift, tdrift = extract_scaling(h5, "template", base_loc)
+    ret["temp_events"] = extract_1d_event_data(
+        h5, "template", base_loc, tscale, tscale_sd, tshift, tdrift)
+  except:
+    pass
+
+  try:
+    cscale, cscale_sd, cshift, cdrift = extract_scaling(h5, "complement", base_loc)
+    ret["comp_events"] = extract_1d_event_data(
+        h5, "complement", base_loc, cscale, cscale_sd, cshift, cdrift)
+  except Exception as e:
+    pass
+
+  try:
+    al = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Alignment"]
+    temp_events = h5[base_loc+"/BaseCalled_template/Events"]
+    comp_events = h5[base_loc+"/BaseCalled_complement/Events"]
+    ret["2d_events"] = []
+    for a in al:
+      ev = []
+      if a[0] == -1:
+        ev += [0, 0, 0, 0, 0]
+      else:
+        e = temp_events[a[0]]
+        mean = (e["mean"] - tshift) / cscale
+        stdv = e["stdv"] / tscale_sd
+        length = e["length"]
+        ev += [1] + preproc_event(mean, stdv, length)
+      if a[1] == -1:
+        ev += [0, 0, 0, 0, 0]
+      else:
+        e = comp_events[a[1]]
+        mean = (e["mean"] - cshift) / cscale
+        stdv = e["stdv"] / cscale_sd
+        length = e["length"]
+        ev += [1] + preproc_event(mean, stdv, length)
+      ret["2d_events"].append(ev) 
+    ret["2d_events"] = np.array(ret["2d_events"], dtype=np.float32)
+  except Exception as e:
+    print e
+    pass
+
+  h5.close()
+  return ret
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz")
+parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz")
+parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-big.npz")
+parser.add_argument('reads', type=str, nargs='*')
+parser.add_argument('--timing', action='store_true', default=False)
+parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement")
+parser.add_argument('--output', type=str, default="output.fasta")
+parser.add_argument('--output_orig', action='store_true', default=False)
+parser.add_argument('--directory', type=str, default='', help="Directory where read files are stored")
+
+args = parser.parse_args()
+types = args.type.split(',')
+do_template = False
+do_complement = False
+do_2d = False
+
+if "all" in types or "template" in types:
+  do_template = True
+if "all" in types or "complement" in types:
+  do_complement = True
+if "all" in types or "2d" in types:
+  do_2d = True
+
+assert do_template or do_complement or do_2d, "Nothing to do"
+assert len(args.reads) != 0 or len(args.directory) != 0, "Nothing to basecall"
+
+if do_template:
+  print "loading template net"
+  temp_net = RnnPredictor(args.template_net)
+  print "done"
+if do_complement:
+  print "loading complement net"
+  comp_net = RnnPredictor(args.complement_net)
+  print "done"
+if do_2d:
+  print "loading 2D net"
+  big_net = RnnPredictor(args.big_net)
+  print "done"
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+fo = open(args.output, "w")
+
+total_bases = [0, 0, 0]
+
+files = args.reads
+if len(args.directory):
+  files += [os.path.join(args.directory, x) for x in os.listdir(args.directory)]  
+
+for i, read in enumerate(files):
+  basename = os.path.basename(read)
+  try:
+    data = load_read_data(read)
+  except Exception as e:
+    print "error at file", read
+    print e
+    continue
+  if not data:  
+    continue
+  print "\rcalling read %d/%d %s" % (i, len(files), read),
+  sys.stdout.flush()
+  if args.output_orig:
+    try:
+      if "called_template" in data:
+        print >>fo, ">%s_template" % basename
+        print >>fo, data["called_template"]
+      if "called_complement" in data:
+        print >>fo, ">%s_complement" % basename
+        print >>fo, data["called_complement"]
+      if "called_2d" in data:
+        print >>fo, ">%s_2d" % basename
+        print >>fo, data["called_2d"]
+    except:
+      pass
+
+  temp_start = datetime.datetime.now()
+  if do_template and "temp_events" in data:
+    predict_and_write(data["temp_events"], temp_net, fo, "%s_template_rnn" % basename)
+  temp_time = datetime.datetime.now() - temp_start
+
+  comp_start = datetime.datetime.now()
+  if do_complement and "comp_events" in data:
+    predict_and_write(data["comp_events"], comp_net, fo, "%s_complement_rnn" % basename)
+  comp_time = datetime.datetime.now() - comp_start
+
+  start_2d = datetime.datetime.now()
+  if do_2d and "2d_events" in data:
+    predict_and_write(data["2d_events"], big_net, fo, "%s_2d_rnn" % basename) 
+  time_2d = datetime.datetime.now() - start_2d
+
+  if args.timing:
+    try:
+      print "Events: %d/%d" % (len(data["temp_events"]), len(data["comp_events"]))
+      print "Our times: %f/%f/%f" % (temp_time.total_seconds(), comp_time.total_seconds(),
+         time_2d.total_seconds())
+      print "Our times per base: %f/%f/%f" % (
+        temp_time.total_seconds() / len(data["temp_events"]),
+        comp_time.total_seconds() / len(data["comp_events"]),
+        time_2d.total_seconds() / (len(data["comp_events"]) + len(data["temp_events"])))
+      print "Their times: %f/%f/%f" % (data["temp_time"].total_seconds(), data["comp_time"].total_seconds(), data["2d_time"].total_seconds())
+      print "Their times per base: %f/%f/%f" % (
+        data["temp_time"].total_seconds() / len(data["temp_events"]),
+        data["comp_time"].total_seconds() / len(data["comp_events"]),
+        data["2d_time"].total_seconds() / (len(data["comp_events"]) + len(data["temp_events"])))
+    except:
+      # Don't let timing throw us out
+      pass
+  fo.flush()
+fo.close()
diff --git a/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py
new file mode 100755
index 0000000..50b8dbc
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python
+import argparse
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+from helpers import *
+import subprocess
+import time
+
+def get_scaling_template(events, has_std):
+  down = 48.4631279889
+  up = 65.7312554591
+  our_down = np.percentile(events["mean"], 10)
+  our_up = np.percentile(events["mean"], 90)
+  scale = (our_up - our_down) / (up - down)
+  shift = (our_up / scale - up) * scale
+
+  sd = 0.807981325017
+  if has_std:
+    return scale, np.percentile(events["stdv"], 50) / sd, shift
+  else:
+    return scale, np.sqrt(np.percentile(events["variance"], 50)) / sd, shift
+    
+
+def get_scaling_complement(events, has_std):
+  down = 49.2638926877
+  up = 69.0192568072
+  our_down = np.percentile(events["mean"], 10)
+  our_up = np.percentile(events["mean"], 90)
+  scale = (our_up - our_down) / (up - down)
+  shift = (our_up / scale - up) * scale
+
+  sd = 1.04324844612
+  if has_std:
+    return scale, np.percentile(events["stdv"], 50) / sd, shift
+  else:
+    return scale, np.sqrt(np.percentile(events["variance"], 50)) / sd, shift
+
+def template_complement_loc(events):
+  abasic_level = np.percentile(events["mean"], 99) + 5
+  abasic_locs = (events["mean"] > abasic_level).nonzero()[0]
+  last = -47
+  run_len = 1
+  runs = []
+  for x in abasic_locs:
+    if x - last == 1:
+      run_len += 1
+    else:
+      if run_len >= 5:
+        if len(runs) and last - runs[-1][0] < 50:
+          run_len = last - runs[-1][0]
+          run_len += runs[-1][1]
+          runs[-1] = (last, run_len)
+        else:
+          runs.append((last, run_len))
+      run_len = 1
+    last = x
+  to_sort = []
+  mid = len(events) / 2
+  low_third = len(events) / 3
+  high_third = len(events) / 3 * 2
+  for r in runs:
+    if r[0] < low_third:
+      continue
+    if r[0] > high_third:
+      continue
+    to_sort.append((abs(r[0] - mid), r[0] - r[1], r[0]))
+  to_sort.sort()
+  if len(to_sort) == 0:
+    return None
+  trim_size = 10
+  return {"temp": (trim_size, to_sort[0][1] - trim_size),
+          "comp": (to_sort[0][2] + trim_size, len(events) - trim_size)}
+
+def load_read_data(read_file):
+  h5 = h5py.File(read_file, "r")
+  ret = {}
+
+  read_key = h5["Analyses/EventDetection_000/Reads"].keys()[0]
+  base_events = h5["Analyses/EventDetection_000/Reads"][read_key]["Events"]
+  temp_comp_loc = template_complement_loc(base_events)
+  sampling_rate = h5["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
+
+  if temp_comp_loc:
+    events = base_events[temp_comp_loc["temp"][0]:temp_comp_loc["temp"][1]]
+  else:
+    events = base_events    
+  has_std = True
+  try:
+    std = events[0]["stdv"]
+  except:
+    has_std = False
+  tscale2, tscale_sd2, tshift2 = get_scaling_template(events, has_std)
+
+  index = 0.0
+  ret["temp_events2"] = []
+  for e in events:
+    mean = (e["mean"] - tshift2) / tscale2
+    if has_std:
+      stdv = e["stdv"] / tscale_sd2
+    else:
+      stdv = np.sqrt(e["variance"]) / tscale_sd2
+    length = e["length"] / sampling_rate
+    ret["temp_events2"].append(preproc_event(mean, stdv, length))
+
+  ret["temp_events2"] = np.array(ret["temp_events2"], dtype=np.float32)
+
+  if not temp_comp_loc:
+    return ret
+  
+  events = base_events[temp_comp_loc["comp"][0]:temp_comp_loc["comp"][1]]
+  cscale2, cscale_sd2, cshift2 = get_scaling_complement(events, has_std)
+
+  index = 0.0
+  ret["comp_events2"] = []
+  for e in events:
+    mean = (e["mean"] - cshift2) / cscale2
+    if has_std:
+      stdv = e["stdv"] / cscale_sd2
+    else:
+      stdv = np.sqrt(e["variance"]) / cscale_sd2
+    length = e["length"] / sampling_rate
+    ret["comp_events2"].append(preproc_event(mean, stdv, length))
+
+  ret["comp_events2"] = np.array(ret["comp_events2"], dtype=np.float32)
+
+  return ret
+
+def basecall(read_file_name, fo):
+  basename = os.path.basename(read_file_name)
+  try:
+    data = load_read_data(read_file_name)
+  except Exception as e:
+    print e
+    print "error at file", read_file_name
+    return
+
+  if do_template or do_2d:
+    o1, o2 = predict_and_write(
+        data["temp_events2"], temp_net, 
+        fo if do_template else None,
+        "%s_template_rnn" % basename)
+
+  if (do_complement or do_2d) and "comp_events2" in data:
+    o1c, o2c = predict_and_write(
+        data["comp_events2"], comp_net, 
+        fo if do_complement else None,
+        "%s_complement_rnn" % basename)
+
+  if do_2d and "comp_events2" in data and\
+     len(data["comp_events2"]) <= args.max_2d_length and\
+     len(data["temp_events2"]) <= args.max_2d_length:
+    p = subprocess.Popen("/usr/lib/deepnano/align_2d", stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    f2d = p.stdin
+    print >>f2d, len(o1)+len(o2)
+    for a, b in zip(o1, o2):
+      print >>f2d, " ".join(map(str, a))
+      print >>f2d, " ".join(map(str, b))
+    print >>f2d, len(o1c)+len(o2c)
+    for a, b in zip(o1c, o2c):
+      print >>f2d, " ".join(map(str, a))
+      print >>f2d, " ".join(map(str, b))
+    f2do, f2de = p.communicate()
+    if p.returncode != 0:
+      return
+    lines = f2do.strip().split('\n')
+    print >>fo, ">%s_2d_rnn_simple" % basename
+    print >>fo, lines[0].strip()
+    events_2d = []
+    for l in lines[1:]:
+      temp_ind, comp_ind = map(int, l.strip().split())
+      e = []
+      if temp_ind == -1:
+        e += [0, 0, 0, 0, 0]
+      else: 
+        e += [1] + list(data["temp_events2"][temp_ind])
+      if comp_ind == -1:
+        e += [0, 0, 0, 0, 0]
+      else:
+        e += [1] + list(data["comp_events2"][comp_ind])
+      events_2d.append(e)
+    events_2d = np.array(events_2d, dtype=np.float32)
+    predict_and_write(events_2d, big_net, fo, "%s_2d_rnn" % basename)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz")
+parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz")
+parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-no-metr23.npz")
+parser.add_argument('--max_2d_length', type=int, default=10000, help='Max length for 2d basecall')
+parser.add_argument('reads', type=str, nargs='*')
+parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement")
+parser.add_argument('--output', type=str, default="output.fasta")
+parser.add_argument('--directory', type=str, default='', help="Directory where read files are stored")
+parser.add_argument('--watch', type=str, default='', help='Watched directory')
+
+
+args = parser.parse_args()
+types = args.type.split(',')
+do_template = False
+do_complement = False
+do_2d = False
+
+if "all" in types or "template" in types:
+  do_template = True
+if "all" in types or "complement" in types:
+  do_complement = True
+if "all" in types or "2d" in types:
+  do_2d = True
+
+assert do_template or do_complement or do_2d, "Nothing to do"
+assert len(args.reads) != 0 or len(args.directory) != 0 or len(args.watch) != 0, "Nothing to basecall"
+
+if do_template or do_2d:
+  print "loading template net"
+  temp_net = RnnPredictor(args.template_net)
+  print "done"
+if do_complement or do_2d:
+  print "loading complement net"
+  comp_net = RnnPredictor(args.complement_net)
+  print "done"
+if do_2d:
+  print "loading 2D net"
+  big_net = RnnPredictor(args.big_net)
+  print "done"
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+if len(args.reads) or len(args.directory) != 0:
+  fo = open(args.output, "w")
+
+  files = args.reads
+  if len(args.directory):
+    files += [os.path.join(args.directory, x) for x in os.listdir(args.directory)]  
+
+  for i, read in enumerate(files):
+    basecall(read, fo)
+
+  fo.close()
+
+if len(args.watch) != 0:
+  try:
+    from watchdog.observers import Observer
+    from watchdog.events import PatternMatchingEventHandler
+  except:
+    print "Please install watchdog to watch directories"
+    sys.exit()
+
+  class Fast5Handler(PatternMatchingEventHandler):
+    """Class for handling creation fo fast5-files"""
+    patterns = ["*.fast5"]
+    def on_created(self, event):
+      print "Calling", event
+      file_name = str(os.path.basename(event.src_path))
+      fasta_file_name = os.path.splitext(event.src_path)[0] + '.fasta'
+      with open(fasta_file_name, "w") as fo:
+        basecall(event.src_path, fo)
+  print('Watch dir: ' + args.watch)
+  observer = Observer()
+  print('Starting Observerer')
+  # start watching directory for fast5-files
+  observer.start()
+  observer.schedule(Fast5Handler(), path=args.watch)
+  try:
+    while True:
+      time.sleep(1)
+  # quit script using ctrl+c
+  except KeyboardInterrupt:
+    observer.stop()
+
+  observer.join()
diff --git a/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py
new file mode 100644
index 0000000..488fee3
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py
@@ -0,0 +1,371 @@
+import argparse
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+
+def preproc_event(mean, std, length):
+  mean = mean / 100.0 - 0.66
+  std = std - 1
+  return [mean, mean*mean, std, length]
+
+def get_scaling_template(events):
+  down = 48.4631279889
+  up = 65.7312554591
+  our_down = np.percentile(events["mean"], 10)
+  our_up = np.percentile(events["mean"], 90)
+  scale = (our_up - our_down) / (up - down)
+  shift = (our_up / scale - up) * scale
+
+  sd = 0.807981325017
+  return scale, np.percentile(events["stdv"], 50) / sd, shift
+
+def get_scaling_complement(events):
+  down = 49.2638926877
+  up = 69.0192568072
+  our_down = np.percentile(events["mean"], 10)
+  our_up = np.percentile(events["mean"], 90)
+  scale = (our_up - our_down) / (up - down)
+  shift = (our_up / scale - up) * scale
+
+  sd = 1.04324844612
+  return scale, np.percentile(events["stdv"], 50) / sd, shift
+
+def template_complement_loc(events):
+  abasic_level = np.percentile(events["mean"], 99) + 5
+  abasic_locs = (events["mean"] > abasic_level).nonzero()[0]
+  last = -47
+  run_len = 1
+  runs = []
+  for x in abasic_locs:
+    if x - last == 1:
+      run_len += 1
+    else:
+      if run_len >= 5:
+        if len(runs) and last - runs[-1][0] < 50:
+          run_len = last - runs[-1][0]
+          run_len += runs[-1][1]
+          runs[-1] = (last, run_len)
+        else:
+          runs.append((last, run_len))
+      run_len = 1
+    last = x
+  to_sort = []
+  mid = len(events) / 2
+  low_third = len(events) / 3
+  high_third = len(events) / 3 * 2
+  for r in runs:
+    if r[0] < low_third:
+      continue
+    if r[0] > high_third:
+      continue
+    to_sort.append((abs(r[0] - mid), r[0] - r[1], r[0]))
+  to_sort.sort()
+  if len(to_sort) == 0:
+    return None
+  trim_size = 10
+  return {"temp": (trim_size, to_sort[0][1] - trim_size),
+          "comp": (to_sort[0][2] + trim_size, len(events) - trim_size)}
+
+def load_read_data(read_file):
+  h5 = h5py.File(read_file, "r")
+  ret = {}
+
+  read_key = h5["Analyses/EventDetection_000/Reads"].keys()[0]
+  base_events = h5["Analyses/EventDetection_000/Reads"][read_key]["Events"]
+  temp_comp_loc = template_complement_loc(base_events)
+  if not temp_comp_loc:
+    return None
+
+#  print "temp_comp_loc", temp_comp_loc["temp"], temp_comp_loc["comp"]
+#  print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["start_index_temp"],
+#  print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["end_index_temp"],
+#  print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["start_index_comp"],
+#  print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["end_index_comp"]
+
+  sampling_rate = h5["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
+
+  try:
+    ret["called_template"] = h5["Analyses/Basecall_2D_000/BaseCalled_template/Fastq"][()].split('\n')[1]
+    ret["called_complement"] = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Fastq"][()].split('\n')[1]
+    ret["called_2d"] = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Fastq"][()].split('\n')[1]
+  except Exception as e:
+    print "wat", e 
+    return None
+  events = base_events[temp_comp_loc["temp"][0]:temp_comp_loc["temp"][1]]
+  tscale2, tscale_sd2, tshift2 = get_scaling_template(events)
+
+  index = 0.0
+  ret["temp_events2"] = []
+  for e in events:
+    mean = (e["mean"] - tshift2) / tscale2
+    stdv = e["stdv"] / tscale_sd2
+    length = e["length"] / sampling_rate
+    ret["temp_events2"].append(preproc_event(mean, stdv, length))
+  events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"]
+  tscale = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["scale"]
+  tscale_sd = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["scale_sd"]
+  tshift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["shift"]
+  tdrift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["drift"]
+  index = 0.0
+  ret["temp_events"] = []
+  for e in events:
+    mean = (e["mean"] - tshift - index * tdrift) / tscale
+    stdv = e["stdv"] / tscale_sd
+    length = e["length"]
+    ret["temp_events"].append(preproc_event(mean, stdv, length))
+    index += e["length"]
+
+  events = base_events[temp_comp_loc["comp"][0]:temp_comp_loc["comp"][1]]
+  cscale2, cscale_sd2, cshift2 = get_scaling_complement(events)
+
+  index = 0.0
+  ret["comp_events2"] = []
+  for e in events:
+    mean = (e["mean"] - cshift2) / cscale2
+    stdv = e["stdv"] / cscale_sd2
+    length = e["length"] / sampling_rate
+    ret["comp_events2"].append(preproc_event(mean, stdv, length))
+
+  events = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Events"]
+  cscale = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["scale"]
+  cscale_sd = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["scale_sd"]
+  cshift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["shift"]
+  cdrift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["drift"]
+  index = 0.0
+  ret["comp_events"] = []
+  for e in events:
+    mean = (e["mean"] - cshift - index * cdrift) / cscale
+    stdv = e["stdv"] / cscale_sd
+    length = e["length"]
+    ret["comp_events"].append(preproc_event(mean, stdv, length))
+    index += e["length"]
+
+  ret["temp_events2"] = np.array(ret["temp_events2"], dtype=np.float32)
+  ret["comp_events2"] = np.array(ret["comp_events2"], dtype=np.float32)
+  ret["temp_events"] = np.array(ret["temp_events"], dtype=np.float32)
+  ret["comp_events"] = np.array(ret["comp_events"], dtype=np.float32)
+
+  al = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Alignment"]
+  ret["al"] = al
+  temp_events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"]
+  comp_events = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Events"]
+  ret["2d_events"] = []
+  for a in al:
+    ev = []
+    if a[0] == -1:
+      ev += [0, 0, 0, 0, 0]
+    else:
+      e = temp_events[a[0]]
+      mean = (e["mean"] - tshift - index * tdrift) / cscale
+      stdv = e["stdv"] / tscale_sd
+      length = e["length"]
+      ev += [1] + preproc_event(mean, stdv, length)
+    if a[1] == -1:
+      ev += [0, 0, 0, 0, 0]
+    else:
+      e = comp_events[a[1]]
+      mean = (e["mean"] - cshift - index * cdrift) / cscale
+      stdv = e["stdv"] / cscale_sd
+      length = e["length"]
+      ev += [1] + preproc_event(mean, stdv, length)
+    ret["2d_events"].append(ev) 
+  ret["2d_events"] = np.array(ret["2d_events"], dtype=np.float32)
+  return ret
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz")
+parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz")
+parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-big.npz")
+parser.add_argument('reads', type=str, nargs='+')
+parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement")
+parser.add_argument('--output', type=str, default="output.fasta")
+parser.add_argument('--output_orig', action='store_true', default=True)
+
+args = parser.parse_args()
+types = args.type.split(',')
+do_template = False
+do_complement = False
+do_2d = False
+
+if "all" in types or "template" in types:
+  do_template = True
+if "all" in types or "complement" in types:
+  do_complement = True
+if "all" in types or "2d" in types:
+  do_2d = True
+
+assert do_template or do_complement or do_2d, "Nothing to do"
+
+if do_template or do_2d:
+  print "loading template net"
+  temp_net = RnnPredictor(args.template_net)
+  print "done"
+if do_complement or do_2d:
+  print "loading complement net"
+  comp_net = RnnPredictor(args.complement_net)
+  print "done"
+if do_2d:
+  print "loading 2D net"
+  big_net = RnnPredictor(args.big_net)
+  big_net_orig = RnnPredictor("nets_data/map6-2d-big.npz")
+  print "done"
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+fo = open(args.output, "w")
+
+total_bases = [0, 0, 0]
+
+for i, read in enumerate(args.reads):
+  if True:
+    data = load_read_data(read)
+#  except Exception as e:
+#    print e
+#    print "error at file", read
+#    continue
+  if not data:  
+    continue
+  if args.output_orig:
+    print >>fo, ">%d_template" % i
+    print >>fo, data["called_template"]
+    print >>fo, ">%d_complement" % i
+    print >>fo, data["called_complement"]
+    print >>fo, ">%d_2d" % i
+    print >>fo, data["called_2d"]
+
+  if do_template or do_2d:
+    o1, o2 = temp_net.predict(data["temp_events"]) 
+    o1m = (np.argmax(o1, 1))
+    o2m = (np.argmax(o2, 1))
+    print >>fo, ">%d_temp_rnn" % i
+    for a, b in zip(o1m, o2m):
+      if a < 4:
+        fo.write(chars[a])
+      if b < 4:
+        fo.write(chars[b])
+    fo.write('\n')
+    o1, o2 = temp_net.predict(data["temp_events2"]) 
+    o1m = (np.argmax(o1, 1))
+    o2m = (np.argmax(o2, 1))
+    if do_template:
+      print >>fo, ">%d_temp_rnn2" % i
+      for a, b in zip(o1m, o2m):
+        if a < 4:
+          fo.write(chars[a])
+        if b < 4:
+          fo.write(chars[b])
+      fo.write('\n')
+
+  if do_complement or do_2d:
+    o1c, o2c = comp_net.predict(data["comp_events"]) 
+    o1cm = (np.argmax(o1c, 1))
+    o2cm = (np.argmax(o2c, 1))
+    print >>fo, ">%d_comp_rnn" % i
+    for a, b in zip(o1cm, o2cm):
+      if a < 4:
+        fo.write(chars[a])
+      if b < 4:
+        fo.write(chars[b])
+    fo.write('\n')
+    o1c, o2c = comp_net.predict(data["comp_events2"]) 
+    o1cm = (np.argmax(o1c, 1))
+    o2cm = (np.argmax(o2c, 1))
+    if do_complement:
+      print >>fo, ">%d_comp_rnn2" % i
+      for a, b in zip(o1cm, o2cm):
+        if a < 4:
+          fo.write(chars[a])
+        if b < 4:
+          fo.write(chars[b])
+      fo.write('\n')
+
+  if do_2d:
+    f2d = open("2d.in", "w")
+    print >>f2d, len(o1)+len(o2)
+    for a, b in zip(o1, o2):
+      print >>f2d, " ".join(map(str, a))
+      print >>f2d, " ".join(map(str, b))
+    print >>f2d, len(o1c)+len(o2c)
+    for a, b in zip(o1c, o2c):
+      print >>f2d, " ".join(map(str, a))
+      print >>f2d, " ".join(map(str, b))
+    f2d.close()
+    os.system("/usr/lib/deepnano/align_2d <2d.in >2d.out")
+    f2do = open("2d.out")
+    call2d = f2do.next().strip()
+    print >>fo, ">%d_2d_rnn_simple" % i
+    print >>fo, call2d
+
+    start_temp_ours = None
+    end_temp_ours = None
+    start_comp_ours = None
+    end_comp_ours = None
+    events_2d = []
+    for l in f2do:
+      temp_ind, comp_ind = map(int, l.strip().split())
+      e = []
+      if temp_ind == -1:
+        e += [0, 0, 0, 0, 0]
+      else: 
+        e += [1] + list(data["temp_events2"][temp_ind])
+        if not start_temp_ours:
+          start_temp_ours = temp_ind
+        end_temp_ours = temp_ind
+      if comp_ind == -1:
+        e += [0, 0, 0, 0, 0]
+      else:
+        e += [1] + list(data["comp_events2"][comp_ind])
+        if not end_comp_ours:
+          end_comp_ours = comp_ind
+        start_comp_ours = comp_ind
+      events_2d.append(e)
+    events_2d = np.array(events_2d, dtype=np.float32)
+    o1c, o2c = big_net.predict(events_2d) 
+    o1cm = (np.argmax(o1c, 1))
+    o2cm = (np.argmax(o2c, 1))
+    print >>fo, ">%d_2d_rnn2" % i
+    for a, b in zip(o1cm, o2cm):
+      if a < 4:
+        fo.write(chars[a])
+      if b < 4:
+        fo.write(chars[b])
+    fo.write('\n')
+    o1c, o2c = big_net.predict(data["2d_events"]) 
+    o1cm = (np.argmax(o1c, 1))
+    o2cm = (np.argmax(o2c, 1))
+    print >>fo, ">%d_2d_rnn" % i
+    for a, b in zip(o1cm, o2cm):
+      if a < 4:
+        fo.write(chars[a])
+      if b < 4:
+        fo.write(chars[b])
+    fo.write('\n')
+
+    start_temp_th = None
+    end_temp_th = None
+    start_comp_th = None
+    end_comp_th = None
+    for a in data["al"]: 
+      if a[0] != -1:
+        if not start_temp_th:
+          start_temp_th = a[0]
+        end_temp_th = a[0]
+      if a[1] != -1:
+        if not end_comp_th:
+          end_comp_th = a[1]
+        start_comp_th = a[1]
+
+    print "Ours:",
+    print start_temp_ours, end_temp_ours, start_comp_ours, end_comp_ours,
+    print 1. * len(events_2d) / (end_temp_ours - start_temp_ours + end_comp_ours - start_comp_ours) 
+    print "Their:",
+    print start_temp_th, end_temp_th, start_comp_th, end_comp_th,
+    print 1. * len(data["al"]) / (end_temp_th - start_temp_th + end_comp_th - start_comp_th) 
+    print
diff --git a/debian/deepnano/usr/share/deepnano/helpers.py b/debian/deepnano/usr/share/deepnano/helpers.py
new file mode 100644
index 0000000..6808562
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/helpers.py
@@ -0,0 +1,76 @@
+from rnn_fin import RnnPredictor
+import h5py
+import sys
+import numpy as np
+import theano as th
+import os
+import re
+import dateutil.parser
+import datetime
+import argparse
+
+chars = "ACGT"
+mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
+
+def preproc_event(mean, std, length):
+  mean = mean / 100.0 - 0.66
+  std = std - 1
+  return [mean, mean*mean, std, length]
+
+def predict_and_write(events, ntwk, fo, read_name):
+  o1, o2 = ntwk.predict(events) 
+  if fo:
+    o1m = (np.argmax(o1, 1))
+    o2m = (np.argmax(o2, 1))
+    print >>fo, ">%s" % read_name
+    for a, b in zip(o1m, o2m):
+      if a < 4:
+        fo.write(chars[a])
+      if b < 4:
+        fo.write(chars[b])
+    fo.write('\n')
+  return o1, o2
+
+def extract_timing(h5, ret):
+  try:
+    log = h5["Analyses/Basecall_2D_000/Log"][()]
+    temp_time = dateutil.parser.parse(re.search(r"(.*) Basecalling template.*", log).groups()[0])
+    comp_time = dateutil.parser.parse(re.search(r"(.*) Basecalling complement.*", log).groups()[0])
+    comp_end_time = dateutil.parser.parse(re.search(r"(.*) Aligning hairpin.*", log).groups()[0])
+
+    start_2d_time = dateutil.parser.parse(re.search(r"(.*) Performing full 2D.*", log).groups()[0])
+    end_2d_time = dateutil.parser.parse(re.search(r"(.*) Workflow completed.*", log).groups()[0])
+
+    ret["temp_time"] = comp_time - temp_time
+    ret["comp_time"] = comp_end_time - comp_time
+    ret["2d_time"] = end_2d_time - start_2d_time
+  except:
+    pass
+
+def get_base_loc(h5):
+  base_loc = "Analyses/Basecall_2D_000"
+  try:
+    events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"]
+  except:
+    base_loc = "Analyses/Basecall_1D_000"
+  return base_loc
+
+def extract_scaling(h5, read_type, base_loc):
+  scale = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["scale"]
+  scale_sd = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["scale_sd"]
+  shift = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["shift"]
+  drift = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["drift"]
+  return scale, scale_sd, shift, drift
+
+def extract_1d_event_data(h5, read_type, base_loc, scale, scale_sd, shift, drift):
+  events = h5[base_loc+"/BaseCalled_%s/Events" % read_type]
+  index = 0.0
+  data = []
+  for e in events:
+    mean = (e["mean"] - shift - index * drift) / scale
+    stdv = e["stdv"] / scale_sd
+    length = e["length"]
+    data.append(preproc_event(mean, stdv, length))
+    index += e["length"]
+  return np.array(data, dtype=np.float32)
+
diff --git a/debian/deepnano/usr/share/deepnano/rnn_fin.py b/debian/deepnano/usr/share/deepnano/rnn_fin.py
new file mode 100644
index 0000000..a1795e8
--- /dev/null
+++ b/debian/deepnano/usr/share/deepnano/rnn_fin.py
@@ -0,0 +1,81 @@
+import theano as th
+import theano.tensor as T
+from theano.tensor.nnet import sigmoid
+import numpy as np
+import pickle
+
+def share(array, dtype=th.config.floatX, name=None):
+  return th.shared(value=np.asarray(array, dtype=dtype), name=name)
+
+class OutLayer:
+  def __init__(self, input, in_size, n_classes):
+    w = share(np.zeros((in_size, n_classes)))
+    b = share(np.zeros(n_classes))
+    eps = 0.0000001
+    self.output = T.clip(T.nnet.softmax(T.dot(input, w) + b), eps, 1-eps)
+    self.params = [w, b]
+
+class SimpleLayer:
+  def __init__(self, input, nin, nunits):
+    id = str(np.random.randint(0, 10000000))
+    wio = share(np.zeros((nin, nunits)), name="wio"+id)  # input to output
+    wir = share(np.zeros((nin, nunits)), name="wir"+id)  # input to output
+    wiu = share(np.zeros((nin, nunits)), name="wiu"+id)  # input to output
+    woo = share(np.zeros((nunits, nunits)), name="woo"+id)  # output to output
+    wou = share(np.zeros((nunits, nunits)), name="wou"+id)  # output to output
+    wor = share(np.zeros((nunits, nunits)), name="wor"+id)  # output to output
+    bo = share(np.zeros(nunits), name="bo"+id)
+    bu = share(np.zeros(nunits), name="bu"+id)
+    br = share(np.zeros(nunits), name="br"+id)
+    h0 = share(np.zeros(nunits), name="h0"+id)
+
+    def step(in_t, out_tm1):
+      update_gate = sigmoid(T.dot(out_tm1, wou) + T.dot(in_t, wiu) + bu)
+      reset_gate = sigmoid(T.dot(out_tm1, wor) + T.dot(in_t, wir) + br)
+      new_val = T.tanh(T.dot(in_t, wio) + reset_gate * T.dot(out_tm1, woo) + bo)
+      return update_gate * out_tm1 + (1 - update_gate) * new_val
+    
+    self.output, _ = th.scan(
+      step, sequences=[input],
+      outputs_info=[h0])
+
+    self.params = [wio, woo, bo, wir, wiu, wor, wou, br, bu, h0]
+
+class BiSimpleLayer():
+  def __init__(self, input, nin, nunits):
+    fwd = SimpleLayer(input, nin, nunits)
+    bwd = SimpleLayer(input[::-1], nin, nunits)
+    self.params = fwd.params + bwd.params
+    self.output = T.concatenate([fwd.output, bwd.output[::-1]], axis=1)
+
+class RnnPredictor:
+  def __init__(self, filename):
+    package = np.load(filename)
+    assert(len(package.files) % 20 == 4)
+    n_layers = len(package.files) / 20
+
+    self.input = T.fmatrix()
+    last_output = self.input
+    last_size = package['arr_0'].shape[0]
+    hidden_size = package['arr_0'].shape[1]
+    par_index = 0
+    for i in range(n_layers):
+      layer = BiSimpleLayer(last_output, last_size, hidden_size)
+      for i in range(20):
+        layer.params[i].set_value(package['arr_%d' % par_index])
+        par_index += 1
+
+      last_output = layer.output
+      last_size = 2*hidden_size
+    out_layer1 = OutLayer(last_output, last_size, 5)
+    for i in range(2):
+      out_layer1.params[i].set_value(package['arr_%d' % par_index])
+      par_index += 1
+    out_layer2 = OutLayer(last_output, last_size, 5)
+    for i in range(2):
+      out_layer2.params[i].set_value(package['arr_%d' % par_index])
+      par_index += 1
+    output1 = out_layer1.output
+    output2 = out_layer2.output
+
+    self.predict = th.function(inputs=[self.input], outputs=[output1, output2])
diff --git a/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz b/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz
new file mode 100644
index 0000000..e9af2e1
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/copyright b/debian/deepnano/usr/share/doc/deepnano/copyright
new file mode 100644
index 0000000..573e566
--- /dev/null
+++ b/debian/deepnano/usr/share/doc/deepnano/copyright
@@ -0,0 +1,36 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: DeepNano
+Source: https://bitbucket.org/vboza/deepnano
+Files-Excluded: training/realign
+
+Files: *
+Copyright: 2016, Vladimir Boza, Comenius University
+License: BSD-3-clause
+
+Files: debian/*
+Copyright: 2016 Andreas Tille <tille at debian.org>
+License: BSD-3-clause
+
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Comenius University nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL COMENIUS UNIVERSITY BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz
new file mode 100644
index 0000000..d08f7f0
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz
new file mode 100644
index 0000000..18ade24
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz
new file mode 100644
index 0000000..9ec060f
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz
new file mode 100644
index 0000000..3767dcb
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz
new file mode 100644
index 0000000..3593302
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz
new file mode 100644
index 0000000..aa6558f
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz
new file mode 100644
index 0000000..07ca3cc
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz
new file mode 100644
index 0000000..98b4293
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz
new file mode 100644
index 0000000..8c472c0
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz
new file mode 100644
index 0000000..f6e0bd4
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz
new file mode 100644
index 0000000..12e5a7a
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
new file mode 100644
index 0000000..44756f3
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
new file mode 100644
index 0000000..8aa7850
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz differ
diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
new file mode 100644
index 0000000..699f576
Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz differ
diff --git a/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate b/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate
new file mode 100755
index 0000000..4563b9e
--- /dev/null
+++ b/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate
@@ -0,0 +1,7 @@
+#! /bin/sh
+set -e
+
+if [ "$1" = rtupdate ]; then
+	pyclean -p deepnano /usr/share/deepnano
+	pycompile -p deepnano  /usr/share/deepnano
+fi
\ No newline at end of file
diff --git a/debian/source/include-binaries b/debian/source/include-binaries
new file mode 100644
index 0000000..1e5cce3
--- /dev/null
+++ b/debian/source/include-binaries
@@ -0,0 +1,3 @@
+debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz
+debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz
+debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz
diff --git a/debian/tests/control b/debian/tests/control
new file mode 100644
index 0000000..a4ece15
--- /dev/null
+++ b/debian/tests/control
@@ -0,0 +1,3 @@
+Tests: run-test.sh
+Depends: @, deepnano-data
+Restrictions: allow-stderr
diff --git a/debian/tests/run-test.sh b/debian/tests/run-test.sh
new file mode 100644
index 0000000..7b7aaf6
--- /dev/null
+++ b/debian/tests/run-test.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+pkg_name="deepnano"
+test_required_pkg="deepnano-data"
+
+if [ "$AUTOPKGTEST_TMP" = "" ] ; then
+  AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
+  trap "rm -rf $AUTOPKGTEST_TMP" 0 INT QUIT ABRT PIPE TERM
+fi
+
+cp -a /usr/share/${test_required_pkg}/* $AUTOPKGTEST_TMP
+
+cd $AUTOPKGTEST_TMP
+find . -name "*gz" -exec gunzip \{\} \;
+
+echo -e "\n#1 - deepnano_basecall"
+OMP_NUM_THREADS=`nproc` deepnano_basecall test_data/*
+cat output.fasta
+
+echo -e "\n#2 - deepnano_basecall_no_metrichor"
+OMP_NUM_THREADS=`nproc` deepnano_basecall_no_metrichor test_data/*
+
+echo "PASS"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/deepnano.git



More information about the debian-med-commit mailing list