[med-svn] [ariba] 01/03: New upstream version 2.5.1+ds

Sascha Steinbiss satta at debian.org
Mon Dec 5 10:06:15 UTC 2016


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository ariba.

commit 713f261d25cc84c5020ad5ed7ff100bfb8d18225
Author: Sascha Steinbiss <satta at debian.org>
Date:   Mon Dec 5 09:58:15 2016 +0000

    New upstream version 2.5.1+ds
---
 ariba/ref_seq_chooser.py                           | 19 ++++++++++++-------
 ariba/tests/cluster_test.py                        |  4 ++--
 .../data/ref_seq_chooser_test_flanking.all_refs.fa | 20 ++++++++++++++++++++
 .../ref_seq_chooser_test_flanking.cluster_refs.fa  | 10 ++++++++++
 .../data/ref_seq_chooser_test_flanking.contigs.fa  | 22 ++++++++++++++++++++++
 ...f_seq_chooser_test_flanking.expected_contigs.fa | 11 +++++++++++
 ariba/tests/ref_seq_chooser_test.py                | 14 ++++++++++++++
 setup.py                                           |  2 +-
 8 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/ariba/ref_seq_chooser.py b/ariba/ref_seq_chooser.py
index 3565c9b..5c9eb69 100644
--- a/ariba/ref_seq_chooser.py
+++ b/ariba/ref_seq_chooser.py
@@ -93,7 +93,7 @@ class RefSeqChooser:
 
 
     @classmethod
-    def _best_of_two_hits(cls, hit1, hit2, use_qry_length=False):
+    def _best_of_two_hits(cls, hit1, hit2, use_qry_length=False, check_flanking=False):
         if use_qry_length:
             qry_length_percent1 = hit1.hit_length_qry / hit1.qry_length
             qry_length_percent2 = hit2.hit_length_qry / hit2.qry_length
@@ -107,6 +107,11 @@ class RefSeqChooser:
         elif hit1.percent_identity != hit2.percent_identity:
             return hit1 if hit1.percent_identity > hit2.percent_identity else hit2
         else:
+            if check_flanking:
+                flank1 = min(min(hit1.qry_start, hit1.qry_end), hit1.qry_length - 1 - max(hit1.qry_start, hit1.qry_end))
+                flank2 = min(min(hit2.qry_start, hit2.qry_end), hit2.qry_length - 1 - max(hit2.qry_start, hit2.qry_end))
+                if flank1 != flank2:
+                    return hit1 if flank1 > flank2 else hit2
             l1, c1 = RefSeqChooser._l_and_c_from_contig_name(hit1.qry_name)
             l2, c2 = RefSeqChooser._l_and_c_from_contig_name(hit2.qry_name)
             if l1 != l2:
@@ -116,20 +121,20 @@ class RefSeqChooser:
 
 
     @classmethod
-    def _choose_best_nucmer_match(cls, matches, use_qry_length=False):
+    def _choose_best_nucmer_match(cls, matches, use_qry_length=False, check_flanking=False):
         best_match = None
         for ref_name in matches:
             for hit in matches[ref_name]:
                 if best_match is None:
                     best_match = hit
                 else:
-                    best_match = RefSeqChooser._best_of_two_hits(best_match, hit, use_qry_length=use_qry_length)
+                    best_match = RefSeqChooser._best_of_two_hits(best_match, hit, use_qry_length=use_qry_length, check_flanking=check_flanking)
 
         return best_match
 
 
     @classmethod
-    def _closest_nucmer_match_between_fastas(cls, ref_fasta, qry_fasta, log_fh, min_id, min_length, breaklen, use_qry_length):
+    def _closest_nucmer_match_between_fastas(cls, ref_fasta, qry_fasta, log_fh, min_id, min_length, breaklen, use_qry_length, check_flanking):
         tmpdir = tempfile.mkdtemp(prefix='tmp.closest_nucmer_match.', dir=os.getcwd())
         coords_file = os.path.join(tmpdir, 'nucmer_vs_cluster_refs.coords')
         pymummer.nucmer.Runner(
@@ -147,13 +152,13 @@ class RefSeqChooser:
         if len(nucmer_matches) == 0:
             return None, {}
         else:
-            best_hit = RefSeqChooser._choose_best_nucmer_match(nucmer_matches, use_qry_length=use_qry_length)
+            best_hit = RefSeqChooser._choose_best_nucmer_match(nucmer_matches, use_qry_length=use_qry_length, check_flanking=check_flanking)
             return best_hit, nucmer_matches
 
 
     def run(self):
         print('Looking for closest match from sequences within cluster', file=self.log_fh)
-        best_hit_from_cluster, nucmer_matches = RefSeqChooser._closest_nucmer_match_between_fastas(self.cluster_fasta, self.assembly_fasta_in, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, False)
+        best_hit_from_cluster, nucmer_matches = RefSeqChooser._closest_nucmer_match_between_fastas(self.cluster_fasta, self.assembly_fasta_in, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, False, True)
         if best_hit_from_cluster is None:
             return
 
@@ -166,7 +171,7 @@ class RefSeqChooser:
         RefSeqChooser._make_matching_contig_pieces_fasta(self.assembly_fasta_in, pieces_coords, pieces_fasta_file)
 
         print('Checking for a better match to a ref sequence outside the cluster', file=self.log_fh)
-        best_hit_from_all_seqs, not_needed = RefSeqChooser._closest_nucmer_match_between_fastas(self.all_refs_fasta, pieces_fasta_file, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, True)
+        best_hit_from_all_seqs, not_needed = RefSeqChooser._closest_nucmer_match_between_fastas(self.all_refs_fasta, pieces_fasta_file, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, True, False)
         shutil.rmtree(tmpdir)
         self.closest_ref_from_all_refs = best_hit_from_all_seqs.ref_name
         if self.closest_ref_from_all_refs is None:
diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py
index 3e18ee4..754c7a5 100644
--- a/ariba/tests/cluster_test.py
+++ b/ariba/tests/cluster_test.py
@@ -277,7 +277,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080)
         c.run()
         expected = [
-            'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
+            'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l6.c30.ctg.1\t362\t27.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -490,7 +490,7 @@ class TestCluster(unittest.TestCase):
         c.run()
 
         expected = [
-            'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.l15.c30.ctg.1\t807\t22.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
+            'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.l15.c17.ctg.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa
new file mode 100644
index 0000000..7a29467
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa
@@ -0,0 +1,20 @@
+>ref1
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGCAGCTA
+>ref2
+CACGCGTCGTGGCCAACCACGCGTTCGTTGGCAGATGCCTTTACGATCACTACCCAAAAT
+AAAGAGCAGTGTGTGTATGTTACCTAACTACGTAGTAAGCGCTAGAGTAGGCAGTGGCCT
+AAGTGACACCTGTTCCGTGTTGCCCTGGCAGCAGCACACCGCATTCTAAGGACCGTCGCG
+TCGTATTCTTCCAGCTAAATCACCCTAAGTGCTATAATTTGGAGGAGTGAAGAGTTTGAT
+GCCAAGCTGACGTCAGGCGGGGATTGCCATTGATCTTGGCTCTCAGCCAGAGAAAGTACA
+TAACAGGAAAATTCAGCCCTTGGGTCTGTGCTCAACGATGGTTTGGAGACTCCTAGAATA
+ATAGCACCTCAGGGACCTTTTCCTAGGAACTGTCCACGGTCGCCACGACTGGAGCTGAAA
+TTTAGTACACAGAGCACCGCCTGTAGATTGCTCCTCGGTCCGGCTGTCTATAGACCGTCA
+CAGAATTCTAGAGCAACCGT
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa
new file mode 100644
index 0000000..1e81f7c
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa
@@ -0,0 +1,10 @@
+>ref1
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGCAGCTA
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa
new file mode 100644
index 0000000..814f1cd
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa
@@ -0,0 +1,22 @@
+>cluster.l15.c17.ctg.1
+ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGTACCTA
+>cluster.l6.c4.ctg.1
+ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGTACCTACTGACGTATCATCTGCGTACTGCGTCGTATGCATGAAAAC
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa
new file mode 100644
index 0000000..36413a4
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa
@@ -0,0 +1,11 @@
+>cluster.l6.c4.ctg.1
+ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGTACCTACTGACGTATCATCTGCGTACTGCGTCGTATGCATGAAAAC
diff --git a/ariba/tests/ref_seq_chooser_test.py b/ariba/tests/ref_seq_chooser_test.py
index 9cb39b3..00e6fbe 100644
--- a/ariba/tests/ref_seq_chooser_test.py
+++ b/ariba/tests/ref_seq_chooser_test.py
@@ -96,3 +96,17 @@ class TestRefSeqChooser(unittest.TestCase):
         self.assertTrue(os.path.exists(tmp_out))
         os.unlink(tmp_out)
 
+
+    def test_run_flanking_different(self):
+        '''Test full run where amount of flanking seq varies'''
+        all_ref_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.all_refs.fa')
+        cluster_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.cluster_refs.fa')
+        contig_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.contigs.fa')
+        expected_fa = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.expected_contigs.fa')
+        tmp_out = 'tmp.ref_seq_chooser_test_flanking.fa'
+        refchooser = ref_seq_chooser.RefSeqChooser(cluster_fasta, all_ref_fasta, contig_fasta, tmp_out, sys.stdout)
+        refchooser.run()
+        self.assertEqual('ref1', refchooser.closest_ref_from_all_refs)
+        self.assertTrue(refchooser.closest_ref_is_in_cluster)
+        self.assertTrue(filecmp.cmp(expected_fa, tmp_out, shallow=False))
+        os.unlink(tmp_out)
diff --git a/setup.py b/setup.py
index 25c8331..a3e6fa1 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
 setup(
     ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
     name='ariba',
-    version='2.5.0',
+    version='2.5.1',
     description='ARIBA: Antibiotic Resistance Identification By Assembly',
     packages = find_packages(),
     package_data={'ariba': ['test_run_data/*']},

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ariba.git



More information about the debian-med-commit mailing list