[med-svn] [iva] 01/03: New upstream version 1.0.8+ds

Sascha Steinbiss satta at debian.org
Thu Jan 12 15:17:05 UTC 2017


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository iva.

commit 83186eb50d4c413acc5deea4b1afff2c27fd1de1
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Thu Jan 12 16:09:48 2017 +0100

    New upstream version 1.0.8+ds
---
 iva/common.py                               |  2 +-
 iva/gage/getMummerStats.sh                  |  2 ++
 iva/kraken.py                               | 16 +++++++++++
 iva/tests/data/kraken_count_cds_from_embl.0 | 27 ++++++++++++++++++
 iva/tests/data/kraken_count_cds_from_embl.1 | 34 ++++++++++++++++++++++
 iva/tests/data/kraken_count_cds_from_embl.2 | 44 +++++++++++++++++++++++++++++
 iva/tests/kraken_test.py                    |  9 +++++-
 scripts/iva_qc_make_db                      |  2 +-
 setup.py                                    |  2 +-
 9 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/iva/common.py b/iva/common.py
index 648425a..25aa16f 100644
--- a/iva/common.py
+++ b/iva/common.py
@@ -16,7 +16,7 @@ import argparse
 import os
 import sys
 import subprocess
-version = '1.0.7'
+version = '1.0.8'
 
 class abspathAction(argparse.Action):
     def __call__(self, parser, namespace, value, option_string):
diff --git a/iva/gage/getMummerStats.sh b/iva/gage/getMummerStats.sh
index de73d67..1dda2c3 100755
--- a/iva/gage/getMummerStats.sh
+++ b/iva/gage/getMummerStats.sh
@@ -1,3 +1,5 @@
+#!/bin/sh
+
 FILENAME=$1
 SCRIPT_PATH=$2
 JAVA_PATH=$2:.
diff --git a/iva/kraken.py b/iva/kraken.py
index 9d96859..b01ead4 100644
--- a/iva/kraken.py
+++ b/iva/kraken.py
@@ -69,6 +69,18 @@ class Database:
         self.done_files = {x:os.path.join(self.rootdir, 'progress.' + x + '.done') for x in self.tasks}
 
 
+    @classmethod
+    def count_cds_from_embl(cls, infile):
+        count = 0
+
+        with open(infile) as f:
+            for line in f:
+                if line.startswith('FT   CDS '):
+                    count += 1
+
+        return count
+
+
     def _mkdir(self, d, rmtree=False):
         if rmtree and os.path.exists(d):
             shutil.rmtree(d)
@@ -253,6 +265,10 @@ class Database:
                 self._replace_fasta_header(fa_file, 'gi|' + str(new_gi) + '|x')
                 embl_file = os.path.join(embl_dir, gi + '.embl')
                 self._genbank2embl(gb_file, embl_file)
+                number_of_cds = Database.count_cds_from_embl(embl_file)
+                print('GI', gi, '  CDS:', number_of_cds)
+                if number_of_cds < 1:
+                    raise Error('No CDS found for GI ' + gi + '. Can only use references that have at least one CDS. Cannot continue.')
 
         self._get_parent_taxons(real_taxon_ids)
 
diff --git a/iva/tests/data/kraken_count_cds_from_embl.0 b/iva/tests/data/kraken_count_cds_from_embl.0
new file mode 100644
index 0000000..45a1192
--- /dev/null
+++ b/iva/tests/data/kraken_count_cds_from_embl.0
@@ -0,0 +1,27 @@
+ID   NC_123456; SV 1; linear; unassigned DNA; STD; VRL; 100 BP.
+XX
+AC   NC_123456;
+XX
+DT   25-MAY-2001
+XX
+DE   Hitchhiker virus 42, complete genome.
+XX
+KW   DASeq
+XX
+DR   BioProject; PRJNA12345.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..100
+FT                   /mol_type="alien RNA"
+FT                   /db_xref="taxon:424242"
+FT                   /organism="Unknown"
+FT   gene            42..52
+FT                   /locus_tag="Vogon1"
+FT                   /db_xref="GeneID:42"
+FT                   /gene="poetic"
+XX
+SQ   Sequence 100 BP; 42 A; 42 C; 42 G; 42 T; 0 other;
+     gctatgctga caggtacgta cgcgcgcgtc gcagtcagcg tcgatgtccct cagtctgcga       60
+     cgatcgtagc cagttgcggc ccccctctct tatatataaa                             100
+//
diff --git a/iva/tests/data/kraken_count_cds_from_embl.1 b/iva/tests/data/kraken_count_cds_from_embl.1
new file mode 100644
index 0000000..a83967f
--- /dev/null
+++ b/iva/tests/data/kraken_count_cds_from_embl.1
@@ -0,0 +1,34 @@
+ID   NC_123456; SV 1; linear; unassigned DNA; STD; VRL; 100 BP.
+XX
+AC   NC_123456;
+XX
+DT   25-MAY-2001
+XX
+DE   Hitchhiker virus 42, complete genome.
+XX
+KW   DASeq
+XX
+DR   BioProject; PRJNA12345.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..100
+FT                   /mol_type="alien RNA"
+FT                   /db_xref="taxon:424242"
+FT                   /organism="Unknown"
+FT   gene            42..52
+FT                   /locus_tag="Vogon1"
+FT                   /db_xref="GeneID:42"
+FT                   /gene="poetic"
+FT   CDS             join(42..44,47..52)
+FT                   /locus_tag="Vogon1"
+FT                   /protein_id="43"
+FT                   /gene="poetic"
+FT                   /note="do not listen to this gene"
+FT                   /codon_start=1
+FT                   /product="poem"
+XX
+SQ   Sequence 100 BP; 42 A; 42 C; 42 G; 42 T; 0 other;
+     gctatgctga caggtacgta cgcgcgcgtc gcagtcagcg tcgatgtccct cagtctgcga       60
+     cgatcgtagc cagttgcggc ccccctctct tatatataaa                             100
+//
diff --git a/iva/tests/data/kraken_count_cds_from_embl.2 b/iva/tests/data/kraken_count_cds_from_embl.2
new file mode 100644
index 0000000..72fe8e1
--- /dev/null
+++ b/iva/tests/data/kraken_count_cds_from_embl.2
@@ -0,0 +1,44 @@
+ID   NC_123456; SV 1; linear; unassigned DNA; STD; VRL; 100 BP.
+XX
+AC   NC_123456;
+XX
+DT   25-MAY-2001
+XX
+DE   Hitchhiker virus 42, complete genome.
+XX
+KW   DASeq
+XX
+DR   BioProject; PRJNA12345.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..100
+FT                   /mol_type="alien RNA"
+FT                   /db_xref="taxon:424242"
+FT                   /organism="Unknown"
+FT   gene            11..13
+FT                   /locus_tag="ShortyMcShortGene"
+FT                   /db_xref="GeneID:1"
+FT                   /gene="tiny"
+FT   CDS             join(11..13)
+FT                   /locus_tag="short_tag"
+FT                   /protein_id="11"
+FT                   /gene="small"
+FT                   /codon_start=1
+FT                   /product="not a lot"
+FT   gene            42..52
+FT                   /locus_tag="Vogon1"
+FT                   /db_xref="GeneID:42"
+FT                   /gene="poetic"
+FT   CDS             join(42..44,47..52)
+FT                   /locus_tag="Vogon1"
+FT                   /protein_id="43"
+FT                   /gene="poetic"
+FT                   /note="do not listen to this gene"
+FT                   /codon_start=1
+FT                   /product="poem"
+XX
+SQ   Sequence 100 BP; 42 A; 42 C; 42 G; 42 T; 0 other;
+     gctatgctga caggtacgta cgcgcgcgtc gcagtcagcg tcgatgtccct cagtctgcga       60
+     cgatcgtagc cagttgcggc ccccctctct tatatataaa                             100
+//
diff --git a/iva/tests/kraken_test.py b/iva/tests/kraken_test.py
index 01ed131..27530d8 100644
--- a/iva/tests/kraken_test.py
+++ b/iva/tests/kraken_test.py
@@ -27,6 +27,13 @@ class TestKraken(unittest.TestCase):
         self.db = kraken.Database(os.path.join(data_dir, 'kraken_test.db'))
 
 
+    def test_count_cds_from_embl(self):
+        '''test count_cds_from_embl'''
+        for i in range(3):
+            infile = os.path.join(data_dir, 'kraken_count_cds_from_embl.' + str(i))
+            self.assertEqual(i, kraken.Database.count_cds_from_embl(infile))
+
+
     def test_get_parent_taxons(self):
         '''test _get_parent_taxons'''
         taxons = set(['1', '9', '13'])
@@ -87,7 +94,7 @@ class TestKraken(unittest.TestCase):
         self.db._append_to_file(tmp, '42')
         self.assertTrue(filecmp.cmp(tmp, after))
         os.unlink(tmp)
-    
+
 
     def test_species_to_dir(self):
         '''test species_to_dir'''
diff --git a/scripts/iva_qc_make_db b/scripts/iva_qc_make_db
index 96d5669..d98d155 100755
--- a/scripts/iva_qc_make_db
+++ b/scripts/iva_qc_make_db
@@ -23,7 +23,7 @@ parser = argparse.ArgumentParser(
 )
 
 parser.add_argument('outdir', help='Name of output directory', metavar='Directory_name')
-parser.add_argument('--add_to_ref', action=iva.common.abspathAction, help='Filename of Genbank IDs or GI numbers to be added to database. Format is: whitespace separated list of GI numbers on each line. One line defines one genome (e.g. flu is 8 segements, so put 8 GI numbers on one line for one flu reference)', metavar='Filename')
+parser.add_argument('--add_to_ref', action=iva.common.abspathAction, help='Filename of Genbank IDs or GI numbers to be added to database. Format is: whitespace separated list of GI numbers on each line. One line defines one genome (e.g. flu is 8 segments, so put 8 GI numbers on one line for one flu reference)', metavar='Filename')
 parser.add_argument('--skip_viruses', action='store_true', help='Do not run kraken-build --download-library viruses when building the database. If this option used, then --add_to_ref must also be used')
 parser.add_argument('--threads', type=int, help='Number of threads to use [%(default)s]', metavar='INT', default=1)
 parser.add_argument('--minimizer_len', type=int, help='Number to pass to kraken-build minimizer_len option [%(default)s]', default=13, metavar='INT')
diff --git a/setup.py b/setup.py
index 6c7a384..5269000 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ if not found_all_progs:
 
 setup(
     name='iva',
-    version='1.0.7',
+    version='1.0.8',
     description='Iterative Virus Assembler',
     packages = find_packages(),
     package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*', 'test_run_data/*']},

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iva.git



More information about the debian-med-commit mailing list