[med-svn] [iva] 01/03: New upstream version 1.0.8+ds
Sascha Steinbiss
satta at debian.org
Thu Jan 12 15:17:05 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository iva.
commit 83186eb50d4c413acc5deea4b1afff2c27fd1de1
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Thu Jan 12 16:09:48 2017 +0100
New upstream version 1.0.8+ds
---
iva/common.py | 2 +-
iva/gage/getMummerStats.sh | 2 ++
iva/kraken.py | 16 +++++++++++
iva/tests/data/kraken_count_cds_from_embl.0 | 27 ++++++++++++++++++
iva/tests/data/kraken_count_cds_from_embl.1 | 34 ++++++++++++++++++++++
iva/tests/data/kraken_count_cds_from_embl.2 | 44 +++++++++++++++++++++++++++++
iva/tests/kraken_test.py | 9 +++++-
scripts/iva_qc_make_db | 2 +-
setup.py | 2 +-
9 files changed, 134 insertions(+), 4 deletions(-)
diff --git a/iva/common.py b/iva/common.py
index 648425a..25aa16f 100644
--- a/iva/common.py
+++ b/iva/common.py
@@ -16,7 +16,7 @@ import argparse
import os
import sys
import subprocess
-version = '1.0.7'
+version = '1.0.8'
class abspathAction(argparse.Action):
def __call__(self, parser, namespace, value, option_string):
diff --git a/iva/gage/getMummerStats.sh b/iva/gage/getMummerStats.sh
index de73d67..1dda2c3 100755
--- a/iva/gage/getMummerStats.sh
+++ b/iva/gage/getMummerStats.sh
@@ -1,3 +1,5 @@
+#!/bin/sh
+
FILENAME=$1
SCRIPT_PATH=$2
JAVA_PATH=$2:.
diff --git a/iva/kraken.py b/iva/kraken.py
index 9d96859..b01ead4 100644
--- a/iva/kraken.py
+++ b/iva/kraken.py
@@ -69,6 +69,18 @@ class Database:
self.done_files = {x:os.path.join(self.rootdir, 'progress.' + x + '.done') for x in self.tasks}
+ @classmethod
+ def count_cds_from_embl(cls, infile):
+ count = 0
+
+ with open(infile) as f:
+ for line in f:
+ if line.startswith('FT CDS '):
+ count += 1
+
+ return count
+
+
def _mkdir(self, d, rmtree=False):
if rmtree and os.path.exists(d):
shutil.rmtree(d)
@@ -253,6 +265,10 @@ class Database:
self._replace_fasta_header(fa_file, 'gi|' + str(new_gi) + '|x')
embl_file = os.path.join(embl_dir, gi + '.embl')
self._genbank2embl(gb_file, embl_file)
+ number_of_cds = Database.count_cds_from_embl(embl_file)
+ print('GI', gi, ' CDS:', number_of_cds)
+ if number_of_cds < 1:
+ raise Error('No CDS found for GI ' + gi + '. Can only use references that have at least one CDS. Cannot continue.')
self._get_parent_taxons(real_taxon_ids)
diff --git a/iva/tests/data/kraken_count_cds_from_embl.0 b/iva/tests/data/kraken_count_cds_from_embl.0
new file mode 100644
index 0000000..45a1192
--- /dev/null
+++ b/iva/tests/data/kraken_count_cds_from_embl.0
@@ -0,0 +1,27 @@
+ID NC_123456; SV 1; linear; unassigned DNA; STD; VRL; 100 BP.
+XX
+AC NC_123456;
+XX
+DT 25-MAY-2001
+XX
+DE Hitchhiker virus 42, complete genome.
+XX
+KW DASeq
+XX
+DR BioProject; PRJNA12345.
+XX
+FH Key Location/Qualifiers
+FH
+FT source 1..100
+FT /mol_type="alien RNA"
+FT /db_xref="taxon:424242"
+FT /organism="Unknown"
+FT gene 42..52
+FT /locus_tag="Vogon1"
+FT /db_xref="GeneID:42"
+FT /gene="poetic"
+XX
+SQ Sequence 100 BP; 42 A; 42 C; 42 G; 42 T; 0 other;
+ gctatgctga caggtacgta cgcgcgcgtc gcagtcagcg tcgatgtccct cagtctgcga 60
+ cgatcgtagc cagttgcggc ccccctctct tatatataaa 100
+//
diff --git a/iva/tests/data/kraken_count_cds_from_embl.1 b/iva/tests/data/kraken_count_cds_from_embl.1
new file mode 100644
index 0000000..a83967f
--- /dev/null
+++ b/iva/tests/data/kraken_count_cds_from_embl.1
@@ -0,0 +1,34 @@
+ID NC_123456; SV 1; linear; unassigned DNA; STD; VRL; 100 BP.
+XX
+AC NC_123456;
+XX
+DT 25-MAY-2001
+XX
+DE Hitchhiker virus 42, complete genome.
+XX
+KW DASeq
+XX
+DR BioProject; PRJNA12345.
+XX
+FH Key Location/Qualifiers
+FH
+FT source 1..100
+FT /mol_type="alien RNA"
+FT /db_xref="taxon:424242"
+FT /organism="Unknown"
+FT gene 42..52
+FT /locus_tag="Vogon1"
+FT /db_xref="GeneID:42"
+FT /gene="poetic"
+FT CDS join(42..44,47..52)
+FT /locus_tag="Vogon1"
+FT /protein_id="43"
+FT /gene="poetic"
+FT /note="do not listen to this gene"
+FT /codon_start=1
+FT /product="poem"
+XX
+SQ Sequence 100 BP; 42 A; 42 C; 42 G; 42 T; 0 other;
+ gctatgctga caggtacgta cgcgcgcgtc gcagtcagcg tcgatgtccct cagtctgcga 60
+ cgatcgtagc cagttgcggc ccccctctct tatatataaa 100
+//
diff --git a/iva/tests/data/kraken_count_cds_from_embl.2 b/iva/tests/data/kraken_count_cds_from_embl.2
new file mode 100644
index 0000000..72fe8e1
--- /dev/null
+++ b/iva/tests/data/kraken_count_cds_from_embl.2
@@ -0,0 +1,44 @@
+ID NC_123456; SV 1; linear; unassigned DNA; STD; VRL; 100 BP.
+XX
+AC NC_123456;
+XX
+DT 25-MAY-2001
+XX
+DE Hitchhiker virus 42, complete genome.
+XX
+KW DASeq
+XX
+DR BioProject; PRJNA12345.
+XX
+FH Key Location/Qualifiers
+FH
+FT source 1..100
+FT /mol_type="alien RNA"
+FT /db_xref="taxon:424242"
+FT /organism="Unknown"
+FT gene 11..13
+FT /locus_tag="ShortyMcShortGene"
+FT /db_xref="GeneID:1"
+FT /gene="tiny"
+FT CDS join(11..13)
+FT /locus_tag="short_tag"
+FT /protein_id="11"
+FT /gene="small"
+FT /codon_start=1
+FT /product="not a lot"
+FT gene 42..52
+FT /locus_tag="Vogon1"
+FT /db_xref="GeneID:42"
+FT /gene="poetic"
+FT CDS join(42..44,47..52)
+FT /locus_tag="Vogon1"
+FT /protein_id="43"
+FT /gene="poetic"
+FT /note="do not listen to this gene"
+FT /codon_start=1
+FT /product="poem"
+XX
+SQ Sequence 100 BP; 42 A; 42 C; 42 G; 42 T; 0 other;
+ gctatgctga caggtacgta cgcgcgcgtc gcagtcagcg tcgatgtccct cagtctgcga 60
+ cgatcgtagc cagttgcggc ccccctctct tatatataaa 100
+//
diff --git a/iva/tests/kraken_test.py b/iva/tests/kraken_test.py
index 01ed131..27530d8 100644
--- a/iva/tests/kraken_test.py
+++ b/iva/tests/kraken_test.py
@@ -27,6 +27,13 @@ class TestKraken(unittest.TestCase):
self.db = kraken.Database(os.path.join(data_dir, 'kraken_test.db'))
+ def test_count_cds_from_embl(self):
+ '''test count_cds_from_embl'''
+ for i in range(3):
+ infile = os.path.join(data_dir, 'kraken_count_cds_from_embl.' + str(i))
+ self.assertEqual(i, kraken.Database.count_cds_from_embl(infile))
+
+
def test_get_parent_taxons(self):
'''test _get_parent_taxons'''
taxons = set(['1', '9', '13'])
@@ -87,7 +94,7 @@ class TestKraken(unittest.TestCase):
self.db._append_to_file(tmp, '42')
self.assertTrue(filecmp.cmp(tmp, after))
os.unlink(tmp)
-
+
def test_species_to_dir(self):
'''test species_to_dir'''
diff --git a/scripts/iva_qc_make_db b/scripts/iva_qc_make_db
index 96d5669..d98d155 100755
--- a/scripts/iva_qc_make_db
+++ b/scripts/iva_qc_make_db
@@ -23,7 +23,7 @@ parser = argparse.ArgumentParser(
)
parser.add_argument('outdir', help='Name of output directory', metavar='Directory_name')
-parser.add_argument('--add_to_ref', action=iva.common.abspathAction, help='Filename of Genbank IDs or GI numbers to be added to database. Format is: whitespace separated list of GI numbers on each line. One line defines one genome (e.g. flu is 8 segements, so put 8 GI numbers on one line for one flu reference)', metavar='Filename')
+parser.add_argument('--add_to_ref', action=iva.common.abspathAction, help='Filename of Genbank IDs or GI numbers to be added to database. Format is: whitespace separated list of GI numbers on each line. One line defines one genome (e.g. flu is 8 segments, so put 8 GI numbers on one line for one flu reference)', metavar='Filename')
parser.add_argument('--skip_viruses', action='store_true', help='Do not run kraken-build --download-library viruses when building the database. If this option used, then --add_to_ref must also be used')
parser.add_argument('--threads', type=int, help='Number of threads to use [%(default)s]', metavar='INT', default=1)
parser.add_argument('--minimizer_len', type=int, help='Number to pass to kraken-build minimizer_len option [%(default)s]', default=13, metavar='INT')
diff --git a/setup.py b/setup.py
index 6c7a384..5269000 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ if not found_all_progs:
setup(
name='iva',
- version='1.0.7',
+ version='1.0.8',
description='Iterative Virus Assembler',
packages = find_packages(),
package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*', 'test_run_data/*']},
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iva.git
More information about the debian-med-commit
mailing list