[med-svn] [ariba] 01/03: New upstream version 2.5.1+ds
Sascha Steinbiss
satta at debian.org
Mon Dec 5 10:06:15 UTC 2016
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository ariba.
commit 713f261d25cc84c5020ad5ed7ff100bfb8d18225
Author: Sascha Steinbiss <satta at debian.org>
Date: Mon Dec 5 09:58:15 2016 +0000
New upstream version 2.5.1+ds
---
ariba/ref_seq_chooser.py | 19 ++++++++++++-------
ariba/tests/cluster_test.py | 4 ++--
.../data/ref_seq_chooser_test_flanking.all_refs.fa | 20 ++++++++++++++++++++
.../ref_seq_chooser_test_flanking.cluster_refs.fa | 10 ++++++++++
.../data/ref_seq_chooser_test_flanking.contigs.fa | 22 ++++++++++++++++++++++
...f_seq_chooser_test_flanking.expected_contigs.fa | 11 +++++++++++
ariba/tests/ref_seq_chooser_test.py | 14 ++++++++++++++
setup.py | 2 +-
8 files changed, 92 insertions(+), 10 deletions(-)
diff --git a/ariba/ref_seq_chooser.py b/ariba/ref_seq_chooser.py
index 3565c9b..5c9eb69 100644
--- a/ariba/ref_seq_chooser.py
+++ b/ariba/ref_seq_chooser.py
@@ -93,7 +93,7 @@ class RefSeqChooser:
@classmethod
- def _best_of_two_hits(cls, hit1, hit2, use_qry_length=False):
+ def _best_of_two_hits(cls, hit1, hit2, use_qry_length=False, check_flanking=False):
if use_qry_length:
qry_length_percent1 = hit1.hit_length_qry / hit1.qry_length
qry_length_percent2 = hit2.hit_length_qry / hit2.qry_length
@@ -107,6 +107,11 @@ class RefSeqChooser:
elif hit1.percent_identity != hit2.percent_identity:
return hit1 if hit1.percent_identity > hit2.percent_identity else hit2
else:
+ if check_flanking:
+ flank1 = min(min(hit1.qry_start, hit1.qry_end), hit1.qry_length - 1 - max(hit1.qry_start, hit1.qry_end))
+ flank2 = min(min(hit2.qry_start, hit2.qry_end), hit2.qry_length - 1 - max(hit2.qry_start, hit2.qry_end))
+ if flank1 != flank2:
+ return hit1 if flank1 > flank2 else hit2
l1, c1 = RefSeqChooser._l_and_c_from_contig_name(hit1.qry_name)
l2, c2 = RefSeqChooser._l_and_c_from_contig_name(hit2.qry_name)
if l1 != l2:
@@ -116,20 +121,20 @@ class RefSeqChooser:
@classmethod
- def _choose_best_nucmer_match(cls, matches, use_qry_length=False):
+ def _choose_best_nucmer_match(cls, matches, use_qry_length=False, check_flanking=False):
best_match = None
for ref_name in matches:
for hit in matches[ref_name]:
if best_match is None:
best_match = hit
else:
- best_match = RefSeqChooser._best_of_two_hits(best_match, hit, use_qry_length=use_qry_length)
+ best_match = RefSeqChooser._best_of_two_hits(best_match, hit, use_qry_length=use_qry_length, check_flanking=check_flanking)
return best_match
@classmethod
- def _closest_nucmer_match_between_fastas(cls, ref_fasta, qry_fasta, log_fh, min_id, min_length, breaklen, use_qry_length):
+ def _closest_nucmer_match_between_fastas(cls, ref_fasta, qry_fasta, log_fh, min_id, min_length, breaklen, use_qry_length, check_flanking):
tmpdir = tempfile.mkdtemp(prefix='tmp.closest_nucmer_match.', dir=os.getcwd())
coords_file = os.path.join(tmpdir, 'nucmer_vs_cluster_refs.coords')
pymummer.nucmer.Runner(
@@ -147,13 +152,13 @@ class RefSeqChooser:
if len(nucmer_matches) == 0:
return None, {}
else:
- best_hit = RefSeqChooser._choose_best_nucmer_match(nucmer_matches, use_qry_length=use_qry_length)
+ best_hit = RefSeqChooser._choose_best_nucmer_match(nucmer_matches, use_qry_length=use_qry_length, check_flanking=check_flanking)
return best_hit, nucmer_matches
def run(self):
print('Looking for closest match from sequences within cluster', file=self.log_fh)
- best_hit_from_cluster, nucmer_matches = RefSeqChooser._closest_nucmer_match_between_fastas(self.cluster_fasta, self.assembly_fasta_in, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, False)
+ best_hit_from_cluster, nucmer_matches = RefSeqChooser._closest_nucmer_match_between_fastas(self.cluster_fasta, self.assembly_fasta_in, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, False, True)
if best_hit_from_cluster is None:
return
@@ -166,7 +171,7 @@ class RefSeqChooser:
RefSeqChooser._make_matching_contig_pieces_fasta(self.assembly_fasta_in, pieces_coords, pieces_fasta_file)
print('Checking for a better match to a ref sequence outside the cluster', file=self.log_fh)
- best_hit_from_all_seqs, not_needed = RefSeqChooser._closest_nucmer_match_between_fastas(self.all_refs_fasta, pieces_fasta_file, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, True)
+ best_hit_from_all_seqs, not_needed = RefSeqChooser._closest_nucmer_match_between_fastas(self.all_refs_fasta, pieces_fasta_file, self.log_fh, self.nucmer_min_id, self.nucmer_min_len, self.nucmer_breaklen, True, False)
shutil.rmtree(tmpdir)
self.closest_ref_from_all_refs = best_hit_from_all_seqs.ref_name
if self.closest_ref_from_all_refs is None:
diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py
index 3e18ee4..754c7a5 100644
--- a/ariba/tests/cluster_test.py
+++ b/ariba/tests/cluster_test.py
@@ -277,7 +277,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080)
c.run()
expected = [
- 'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l15.c30.ctg.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
+ 'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.l6.c30.ctg.1\t362\t27.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -490,7 +490,7 @@ class TestCluster(unittest.TestCase):
c.run()
expected = [
- 'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.l15.c30.ctg.1\t807\t22.8\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
+ 'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.l15.c17.ctg.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa
new file mode 100644
index 0000000..7a29467
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.all_refs.fa
@@ -0,0 +1,20 @@
+>ref1
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGCAGCTA
+>ref2
+CACGCGTCGTGGCCAACCACGCGTTCGTTGGCAGATGCCTTTACGATCACTACCCAAAAT
+AAAGAGCAGTGTGTGTATGTTACCTAACTACGTAGTAAGCGCTAGAGTAGGCAGTGGCCT
+AAGTGACACCTGTTCCGTGTTGCCCTGGCAGCAGCACACCGCATTCTAAGGACCGTCGCG
+TCGTATTCTTCCAGCTAAATCACCCTAAGTGCTATAATTTGGAGGAGTGAAGAGTTTGAT
+GCCAAGCTGACGTCAGGCGGGGATTGCCATTGATCTTGGCTCTCAGCCAGAGAAAGTACA
+TAACAGGAAAATTCAGCCCTTGGGTCTGTGCTCAACGATGGTTTGGAGACTCCTAGAATA
+ATAGCACCTCAGGGACCTTTTCCTAGGAACTGTCCACGGTCGCCACGACTGGAGCTGAAA
+TTTAGTACACAGAGCACCGCCTGTAGATTGCTCCTCGGTCCGGCTGTCTATAGACCGTCA
+CAGAATTCTAGAGCAACCGT
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa
new file mode 100644
index 0000000..1e81f7c
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.cluster_refs.fa
@@ -0,0 +1,10 @@
+>ref1
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGCAGCTA
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa
new file mode 100644
index 0000000..814f1cd
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.contigs.fa
@@ -0,0 +1,22 @@
+>cluster.l15.c17.ctg.1
+ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGTACCTA
+>cluster.l6.c4.ctg.1
+ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGTACCTACTGACGTATCATCTGCGTACTGCGTCGTATGCATGAAAAC
diff --git a/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa b/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa
new file mode 100644
index 0000000..36413a4
--- /dev/null
+++ b/ariba/tests/data/ref_seq_chooser_test_flanking.expected_contigs.fa
@@ -0,0 +1,11 @@
+>cluster.l6.c4.ctg.1
+ATCATCATCTGACTGATCGTACGTACGTGTCGTCAGTCAGCTAGCTGTCAGTAAGAAAAC
+GTCGCTCCTATGCGCTGGCACGTTCACACCTTTACGACAACCAGTAAGGATGCTTGGGCG
+AATCCCCTTCCCCCTTCTGGTAGTTTTCATTATGCTCAGCGTAACTGAGTCTACCAGGAG
+ACCTTGGACGGACGGTGAATCCGCATAGCGCACCCATAAGTAGGAGATAAGGTTACTGGA
+TTGTTCGCTGAAGAAGACAATCAAGGGGAGGTCTATTTGTTTATAGTGACACTACAAGGG
+AGGTGATGTTGGCCTGCTGGAAGGTTTTGAAAGAAGCGGGTGCTAGCCTGGCGACTCTTC
+ATCCATTTCAATGATTTCGGGGCTCCACTTATTTCCGAATCGGCTCCTGGGGTAGCCCTA
+ACTCCATGATCCACCTCGAATCGAACCGCCAGCAATTTCAGAGTATAGAACTAGACAGGA
+TTTACTGCCAGAGCTATCGAATATCATCGGAACGGGGACTTCGCGCACCATTGGACAAGC
+CAGATCTCAATCTGTACCTACTGACGTATCATCTGCGTACTGCGTCGTATGCATGAAAAC
diff --git a/ariba/tests/ref_seq_chooser_test.py b/ariba/tests/ref_seq_chooser_test.py
index 9cb39b3..00e6fbe 100644
--- a/ariba/tests/ref_seq_chooser_test.py
+++ b/ariba/tests/ref_seq_chooser_test.py
@@ -96,3 +96,17 @@ class TestRefSeqChooser(unittest.TestCase):
self.assertTrue(os.path.exists(tmp_out))
os.unlink(tmp_out)
+
+ def test_run_flanking_different(self):
+ '''Test full run where amount of flanking seq varies'''
+ all_ref_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.all_refs.fa')
+ cluster_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.cluster_refs.fa')
+ contig_fasta = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.contigs.fa')
+ expected_fa = os.path.join(data_dir, 'ref_seq_chooser_test_flanking.expected_contigs.fa')
+ tmp_out = 'tmp.ref_seq_chooser_test_flanking.fa'
+ refchooser = ref_seq_chooser.RefSeqChooser(cluster_fasta, all_ref_fasta, contig_fasta, tmp_out, sys.stdout)
+ refchooser.run()
+ self.assertEqual('ref1', refchooser.closest_ref_from_all_refs)
+ self.assertTrue(refchooser.closest_ref_is_in_cluster)
+ self.assertTrue(filecmp.cmp(expected_fa, tmp_out, shallow=False))
+ os.unlink(tmp_out)
diff --git a/setup.py b/setup.py
index 25c8331..a3e6fa1 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
setup(
ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
name='ariba',
- version='2.5.0',
+ version='2.5.1',
description='ARIBA: Antibiotic Resistance Identification By Assembly',
packages = find_packages(),
package_data={'ariba': ['test_run_data/*']},
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ariba.git
More information about the debian-med-commit
mailing list