[med-svn] [ariba] 02/03: New upstream version 2.7.0+ds
Sascha Steinbiss
satta at debian.org
Tue Jan 17 11:56:37 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository ariba.
commit 5ee95ba0d5349953264de62fe5af86ebebfecf9c
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Tue Jan 17 12:47:35 2017 +0100
New upstream version 2.7.0+ds
---
ariba/__init__.py | 2 +
ariba/common.py | 25 +++++
ariba/megares_data_finder.py | 68 ++++++++++++
ariba/megares_zip_parser.py | 119 +++++++++++++++++++++
ariba/ref_genes_getter.py | 38 +++----
.../data/megares_zip_parse_extract_files_ok.zip | Bin 0 -> 1328 bytes
...megares_zip_parse_extract_files_one_missing.zip | Bin 0 -> 1116 bytes
.../data/megares_zip_parser_load_annotations.csv | 3 +
.../megares_zip_parser_load_header_mappings.tsv | 3 +
.../data/megares_zip_parser_write_files.expect.fa | 6 ++
.../data/megares_zip_parser_write_files.expect.tsv | 3 +
.../megares_annotations_v1.01.csv | 4 +
.../megares_database_v1.01.fasta | 6 ++
.../megares_to_external_header_mappings_v1.01.tsv | 4 +
ariba/tests/megares_data_finder_test.py | 36 +++++++
ariba/tests/megares_zip_parser_test.py | 84 +++++++++++++++
scripts/ariba | 2 +-
setup.py | 3 +-
18 files changed, 385 insertions(+), 21 deletions(-)
diff --git a/ariba/__init__.py b/ariba/__init__.py
index 5f40fec..1ca0437 100644
--- a/ariba/__init__.py
+++ b/ariba/__init__.py
@@ -23,6 +23,8 @@ __all__ = [
'histogram',
'link',
'mapping',
+ 'megares_data_finder',
+ 'megares_zip_parser',
'mlst_profile',
'mlst_reporter',
'pubmlst_getter',
diff --git a/ariba/common.py b/ariba/common.py
index e4b5977..0410334 100644
--- a/ariba/common.py
+++ b/ariba/common.py
@@ -1,8 +1,14 @@
import os
+import time
import sys
import subprocess
+import urllib.request
import pyfastaq
+
+class Error (Exception): pass
+
+
def syscall(cmd, allow_fail=False, verbose=False, verbose_filehandle=sys.stdout, print_errors=True):
if verbose:
print('syscall:', cmd, flush=True, file=verbose_filehandle)
@@ -44,3 +50,22 @@ def cat_files(infiles, outfile):
pyfastaq.utils.close(f_in)
pyfastaq.utils.close(f_out)
+
+
+def download_file(url, outfile, max_attempts=3, sleep_time=2, verbose=False):
+ if verbose:
+ print('Downloading "', url, '" and saving as "', outfile, '" ...', end='', sep='', flush=True)
+
+ for i in range(max_attempts):
+ time.sleep(sleep_time)
+ try:
+ urllib.request.urlretrieve(url, filename=outfile)
+ except:
+ continue
+ break
+ else:
+ raise Error('Error downloading: ' + url)
+
+ if verbose:
+ print(' done', flush=True)
+
diff --git a/ariba/megares_data_finder.py b/ariba/megares_data_finder.py
new file mode 100644
index 0000000..c97d60e
--- /dev/null
+++ b/ariba/megares_data_finder.py
@@ -0,0 +1,68 @@
+import urllib.request
+from bs4 import BeautifulSoup
+from distutils.version import LooseVersion
+
+
+class Error (Exception): pass
+
+
+class MegaresDataFinder:
+ def __init__(self, version=None):
+ self.url_root = 'https://megares.meglab.org/download/'
+ self.index_url = self.url_root + 'index.php'
+ self.version = version
+
+
+ def _get_available_zips(self):
+ try:
+ response = urllib.request.urlopen(self.index_url)
+ html_text = response.read()
+ except:
+ raise Error('Error getting megares download page ' + self.index_url)
+
+ return html_text
+
+
+ @classmethod
+ def _zips_from_index_page_string(cls, html_text):
+ try:
+ soup = BeautifulSoup(html_text, 'html.parser')
+ except:
+ raise Error('Error parsing contents of megares download page. Cannot continue')
+
+ prefix = 'megares_v'
+ suffix = '.zip'
+ zips = {}
+
+ for link in soup.find_all('a'):
+ href = link.get('href')
+ if href.startswith(prefix) and href.endswith(suffix):
+ version = href[len(prefix):-len(suffix)]
+ zips[version] = href
+
+ return zips
+
+
+ @classmethod
+ def _get_url_for_version(cls, zips, version=None):
+ if version is None:
+ versions = list(zips.keys())
+ versions.sort(key=LooseVersion)
+ return zips[versions[-1]]
+ else:
+ try:
+ return zips[version]
+ except:
+ versions = ', '.join(list(zips.keys()))
+ raise Error('Error! version ' + version + ' of megares not found. Available versions: ' + versions)
+
+
+ def run(self):
+ print('Finding available megares versions from', self.index_url)
+ html_text = self._get_available_zips()
+ zips = MegaresDataFinder._zips_from_index_page_string(html_text)
+ print('Found versions: ', ', '.join(list(zips.keys())))
+ url = MegaresDataFinder._get_url_for_version(zips, version=self.version)
+ return self.url_root + url
+
+
diff --git a/ariba/megares_zip_parser.py b/ariba/megares_zip_parser.py
new file mode 100644
index 0000000..4c4d70c
--- /dev/null
+++ b/ariba/megares_zip_parser.py
@@ -0,0 +1,119 @@
+import os
+import sys
+import csv
+import zipfile
+import shutil
+import pyfastaq
+from ariba import common
+
+class Error (Exception): pass
+
+
+class MegaresZipParser:
+ def __init__(self, zip_url, outprefix):
+ self.zip_url = zip_url
+ self.outprefix = outprefix
+ self.zip_file = self.outprefix + '.downloaded.zip'
+
+
+ @classmethod
+ def _extract_files(cls, zip_file, outdir):
+ original_files = {'annotations': None, 'fasta': None, 'header_mappings': None}
+
+ try:
+ os.mkdir(outdir)
+ except:
+ raise Error('Error making directory ' + outdir)
+
+ zfile = zipfile.ZipFile(zip_file)
+ for member in zfile.namelist():
+ if '_annotations_' in member:
+ original_files['annotations'] = member
+ elif '_database_' in member and member.endswith('.fasta'):
+ original_files['fasta'] = member
+ elif '_header_mappings_' in member:
+ original_files['header_mappings'] = member
+ else:
+ continue
+
+ zfile.extract(member, path=outdir)
+
+ if None in original_files.values():
+ shutil.rmtree(outdir)
+ raise Error('Error. Not all expected files found in downloaded megares zipfile. ' + str(original_files))
+
+ return original_files
+
+
+ @classmethod
+ def _csv_to_dict(cls, infile, delimiter, expected_columns, key_column):
+ data = {}
+ non_key_columns = expected_columns - {key_column}
+
+ with open(infile) as f:
+ reader = csv.DictReader(f, delimiter=delimiter)
+ if set(expected_columns) != set(reader.fieldnames):
+ raise Error('Unexpected header in annotations file. Expected columns: ' + ','.join(expected_columns) + ' but got: ' + ','.join(reader.fieldnames))
+
+ for row in reader:
+ data[row[key_column]] = {x: row[x] for x in non_key_columns}
+
+ return data
+
+
+ @classmethod
+ def _load_annotations_file(cls, infile):
+ return MegaresZipParser._csv_to_dict(infile, ',', {'header', 'class', 'mechanism', 'group'}, 'header')
+
+
+ @classmethod
+ def _load_header_mappings_file(cls, infile):
+ return MegaresZipParser._csv_to_dict(infile, '\t', {'Source_Database', 'MEGARes_Header', 'Source_Headers(space_separated)'}, 'MEGARes_Header')
+
+
+ @classmethod
+ def _write_files(cls, outprefix, sequences, annotations, header_mappings):
+ fasta = outprefix + '.fa'
+ tsv = outprefix + '.tsv'
+ fh_fasta = pyfastaq.utils.open_file_write(fasta)
+ fh_tsv = pyfastaq.utils.open_file_write(tsv)
+
+ for seq in sorted(sequences):
+ final_column = []
+
+ if seq in annotations:
+ group = annotations[seq]['group']
+ final_column.append('class:' + annotations[seq]['class'] + '; mechanism:' + annotations[seq]['mechanism'] + '; group:' + group)
+ else:
+ group = 'unknown'
+ print('WARNING: sequence "', seq, '" has no record in annotations file', sep='', file=sys.stderr)
+
+ if seq in header_mappings:
+ final_column.append('Source_Database:' + header_mappings[seq]['Source_Database'] + '; Source_Headers:' + header_mappings[seq]['Source_Headers(space_separated)'])
+ else:
+ print('WARNING: sequence "', seq, '" has no record in header mappings file', sep='', file=sys.stderr)
+
+ if len(final_column) > 0:
+ print(group + '.' + seq, '1', '0', '.', '.', '; '.join(final_column), sep='\t', file=fh_tsv)
+ else:
+ print(group + '.' + seq, '1', '0', '.', '.', '.', sep='\t', file=fh_tsv)
+
+ sequences[seq].id = group + '.' + sequences[seq].id
+ print(sequences[seq], file=fh_fasta)
+
+ fh_fasta.close()
+ fh_tsv.close()
+
+
+ def run(self):
+ common.download_file(self.zip_url, self.zip_file, verbose=True)
+ tmpdir = self.zip_file + '.tmp.extract'
+ original_files = MegaresZipParser._extract_files(self.zip_file, tmpdir)
+ annotation_data = MegaresZipParser._load_annotations_file(os.path.join(tmpdir, original_files['annotations']))
+ header_data = MegaresZipParser._load_header_mappings_file(os.path.join(tmpdir, original_files['header_mappings']))
+ sequences = {}
+ pyfastaq.tasks.file_to_dict(os.path.join(tmpdir, original_files['fasta']), sequences)
+ MegaresZipParser._write_files(self.outprefix, sequences, annotation_data, header_data)
+ shutil.rmtree(tmpdir)
+ os.unlink(self.zip_file)
+
diff --git a/ariba/ref_genes_getter.py b/ariba/ref_genes_getter.py
index 34b4d8c..a6d82e9 100644
--- a/ariba/ref_genes_getter.py
+++ b/ariba/ref_genes_getter.py
@@ -5,15 +5,15 @@ import re
import shutil
import tarfile
import pyfastaq
-import urllib.request
import time
import json
-from ariba import common, card_record, vfdb_parser
+from ariba import common, card_record, vfdb_parser, megares_data_finder, megares_zip_parser
allowed_ref_dbs = {
'argannot',
'card',
+ 'megares',
'plasmidfinder',
'resfinder',
'srst2_argannot',
@@ -37,23 +37,9 @@ class RefGenesGetter:
pyfastaq.sequences.genetic_code = self.genetic_code
- def _download_file(self, url, outfile):
- print('Downloading "', url, '" and saving as "', outfile, '" ...', end='', sep='', flush=True)
- for i in range(self.max_download_attempts):
- time.sleep(self.sleep_time)
- try:
- urllib.request.urlretrieve(url, filename=outfile)
- except:
- continue
- break
- else:
- raise Error('Error downloading: ' + url)
- print(' done', flush=True)
-
-
def _get_card_versions(self, tmp_file):
print('Getting available CARD versions')
- self._download_file('https://card.mcmaster.ca/download', tmp_file)
+ common.download_file('https://card.mcmaster.ca/download', tmp_file, max_attempts=self.max_download_attempts, sleep_time=self.sleep_time, verbose=True)
p = re.compile(r'''href="(/download/.*?broad.*?v([0-9]+\.[0-9]+\.[0-9]+)\.tar\.gz)"''')
versions = {}
@@ -269,7 +255,7 @@ class RefGenesGetter:
raise Error('Error mkdir/chdir ' + tmpdir)
zipfile = 'arg-annot-database_doc.zip'
- self._download_file('http://www.mediterranee-infection.com/arkotheque/client/ihumed/_depot_arko/articles/304/arg-annot-database_doc.zip', zipfile)
+ common.download_file('http://www.mediterranee-infection.com/arkotheque/client/ihumed/_depot_arko/articles/304/arg-annot-database_doc.zip', zipfile, max_attempts=self.max_download_attempts, sleep_time=self.sleep_time, verbose=True)
common.syscall('unzip ' + zipfile)
os.chdir(current_dir)
print('Extracted files.')
@@ -301,6 +287,20 @@ class RefGenesGetter:
print(argannot_ref)
+ def _get_from_megares(self, outprefix):
+ data_finder = megares_data_finder.MegaresDataFinder(version=self.version)
+ download_url = data_finder.run()
+ zip_parser = megares_zip_parser.MegaresZipParser(download_url, outprefix)
+ zip_parser.run()
+ final_fasta = outprefix + '.fa'
+ final_tsv = outprefix + '.tsv'
+ print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
+ print('You can use them with ARIBA like this:')
+ print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
+ print('If you use this downloaded data, please cite:')
+ print('"MEGARes: an antimicrobial database for high throughput sequencing", Lakin et al 2016, PMID: PMC5210519\n')
+
+
def _get_from_plasmidfinder(self, outprefix):
outprefix = os.path.abspath(outprefix)
final_fasta = outprefix + '.fa'
@@ -408,7 +408,7 @@ class RefGenesGetter:
raise Error('Error mkdir ' + tmpdir)
zipfile = os.path.join(tmpdir, filename)
- self._download_file('http://www.mgc.ac.cn/VFs/Down/' + filename, zipfile)
+ common.download_file('http://www.mgc.ac.cn/VFs/Down/' + filename, zipfile, max_attempts=self.max_download_attempts, sleep_time=self.sleep_time, verbose=True)
print('Extracting files ... ', end='', flush=True)
vparser = vfdb_parser.VfdbParser(zipfile, outprefix)
vparser.run()
diff --git a/ariba/tests/data/megares_zip_parse_extract_files_ok.zip b/ariba/tests/data/megares_zip_parse_extract_files_ok.zip
new file mode 100644
index 0000000..8335a52
Binary files /dev/null and b/ariba/tests/data/megares_zip_parse_extract_files_ok.zip differ
diff --git a/ariba/tests/data/megares_zip_parse_extract_files_one_missing.zip b/ariba/tests/data/megares_zip_parse_extract_files_one_missing.zip
new file mode 100644
index 0000000..35c60de
Binary files /dev/null and b/ariba/tests/data/megares_zip_parse_extract_files_one_missing.zip differ
diff --git a/ariba/tests/data/megares_zip_parser_load_annotations.csv b/ariba/tests/data/megares_zip_parser_load_annotations.csv
new file mode 100644
index 0000000..f1b3a1a
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_load_annotations.csv
@@ -0,0 +1,3 @@
+header,class,mechanism,group
+Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA,betalactams,Class A betalactamases,OXA
+Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar,Class,foobar,Bar
diff --git a/ariba/tests/data/megares_zip_parser_load_header_mappings.tsv b/ariba/tests/data/megares_zip_parser_load_header_mappings.tsv
new file mode 100644
index 0000000..d906699
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_load_header_mappings.tsv
@@ -0,0 +1,3 @@
+Source_Database MEGARes_Header Source_Headers(space_separated)
+SOURCE1 Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA source header 1
+SOURCE2 Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar source header 2
diff --git a/ariba/tests/data/megares_zip_parser_write_files.expect.fa b/ariba/tests/data/megares_zip_parser_write_files.expect.fa
new file mode 100644
index 0000000..806c147
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_write_files.expect.fa
@@ -0,0 +1,6 @@
+>OXA.Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA
+ATGACCGAAAGCAGCGAACGCGCGTGCACCTGA
+>group1.Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar
+ATGTGCGCGCGCTGCGCGAGCAGCCGCGTGCTGGAATGA
+>unknown.Only_in_fasta_file
+ATGTGA
diff --git a/ariba/tests/data/megares_zip_parser_write_files.expect.tsv b/ariba/tests/data/megares_zip_parser_write_files.expect.tsv
new file mode 100644
index 0000000..7377284
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_write_files.expect.tsv
@@ -0,0 +1,3 @@
+OXA.Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA 1 0 . . class:betalactams; mechanism:Class A betalactamases; group:OXA; Source_Database:SOURCE1; Source_Headers:source header 1
+group1.Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar 1 0 . . class:Class foobar; mechanism:Bar; group:group1; Source_Database:SOURCE2; Source_Headers:source header 2
+unknown.Only_in_fasta_file 1 0 . . .
diff --git a/ariba/tests/data/megares_zip_parser_write_files/megares_annotations_v1.01.csv b/ariba/tests/data/megares_zip_parser_write_files/megares_annotations_v1.01.csv
new file mode 100644
index 0000000..c6bff24
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_write_files/megares_annotations_v1.01.csv
@@ -0,0 +1,4 @@
+header,class,mechanism,group
+Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA,betalactams,Class A betalactamases,OXA
+Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar,Class foobar,Bar,group1
+only in annotations file,foo,bar,baz
diff --git a/ariba/tests/data/megares_zip_parser_write_files/megares_database_v1.01.fasta b/ariba/tests/data/megares_zip_parser_write_files/megares_database_v1.01.fasta
new file mode 100644
index 0000000..146de2e
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_write_files/megares_database_v1.01.fasta
@@ -0,0 +1,6 @@
+>Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA
+ATGACCGAAAGCAGCGAACGCGCGTGCACCTGA
+>Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar
+ATGTGCGCGCGCTGCGCGAGCAGCCGCGTGCTGGAATGA
+>Only_in_fasta_file
+ATGTGA
diff --git a/ariba/tests/data/megares_zip_parser_write_files/megares_to_external_header_mappings_v1.01.tsv b/ariba/tests/data/megares_zip_parser_write_files/megares_to_external_header_mappings_v1.01.tsv
new file mode 100644
index 0000000..3bccd51
--- /dev/null
+++ b/ariba/tests/data/megares_zip_parser_write_files/megares_to_external_header_mappings_v1.01.tsv
@@ -0,0 +1,4 @@
+Source_Database MEGARes_Header Source_Headers(space_separated)
+SOURCE1 Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA source header 1
+SOURCE2 Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar source header 2
+sourceX only in header mapping file source header X
diff --git a/ariba/tests/megares_data_finder_test.py b/ariba/tests/megares_data_finder_test.py
new file mode 100644
index 0000000..669a663
--- /dev/null
+++ b/ariba/tests/megares_data_finder_test.py
@@ -0,0 +1,36 @@
+import unittest
+import os
+from ariba import megares_data_finder
+
+modules_dir = os.path.dirname(os.path.abspath(megares_data_finder.__file__))
+data_dir = os.path.join(modules_dir, 'tests', 'data')
+
+
+class TestMegaresDataFinder(unittest.TestCase):
+ def test_zips_from_index_page_string(self):
+ '''test _zips_from_index_page_string'''
+ html_string = r''''<!doctype html>
+<html>
+ <head>
+
+ </head>
+
+<ul>
+ <li><a href="megares_v1.01.zip">All Files</a></li>
+ <li><a href="foo.zip">All Files</a></li>
+ <li><a href="megares_v1.00.zip">All Files</a></li>
+</html>'''
+
+ expected = {'1.00': 'megares_v1.00.zip', '1.01': 'megares_v1.01.zip'}
+ got = megares_data_finder.MegaresDataFinder._zips_from_index_page_string(html_string)
+ self.assertEqual(expected, got)
+
+
+ def test_get_url_for_version(self):
+ '''test _get_url_for_version'''
+ zips = {'1.00': 'megares_v1.00.zip', '1.01': 'megares_v1.01.zip'}
+ self.assertEqual('megares_v1.01.zip', megares_data_finder.MegaresDataFinder._get_url_for_version(zips))
+ self.assertEqual('megares_v1.00.zip', megares_data_finder.MegaresDataFinder._get_url_for_version(zips, version='1.00'))
+ with self.assertRaises(megares_data_finder.Error):
+ self.assertEqual('megares_v1.00.zip', megares_data_finder.MegaresDataFinder._get_url_for_version(zips, version='0.42'))
+
diff --git a/ariba/tests/megares_zip_parser_test.py b/ariba/tests/megares_zip_parser_test.py
new file mode 100644
index 0000000..5afd951
--- /dev/null
+++ b/ariba/tests/megares_zip_parser_test.py
@@ -0,0 +1,84 @@
+import unittest
+import copy
+import shutil
+import filecmp
+import os
+import pyfastaq
+from ariba import megares_zip_parser
+
+modules_dir = os.path.dirname(os.path.abspath(megares_zip_parser.__file__))
+data_dir = os.path.join(modules_dir, 'tests', 'data')
+
+
+class TestMegaresZipParser(unittest.TestCase):
+ def test_extract_files_ok(self):
+ '''test _extract_files when all ok'''
+ zip_file = os.path.join(data_dir, 'megares_zip_parse_extract_files_ok.zip')
+ tmp_dir = 'tmp.test_megares_extract_files_ok'
+ got = megares_zip_parser.MegaresZipParser._extract_files(zip_file, tmp_dir)
+ common_dir = os.path.join('megares_zip_parse_extract_files_ok', 'megares_v1.01')
+ expected = {
+ 'annotations': os.path.join(common_dir, 'megares_annotations_v1.01.csv'),
+ 'fasta': os.path.join(common_dir, 'megares_database_v1.01.fasta'),
+ 'header_mappings': os.path.join(common_dir, 'megares_to_external_header_mappings_v1.01.tsv')
+ }
+
+ self.assertEqual(expected, got)
+
+ for filename in expected.values():
+ self.assertTrue(os.path.exists(os.path.join(tmp_dir, filename)))
+
+ shutil.rmtree(tmp_dir)
+
+
+ def test_extract_files_one_missing(self):
+ '''test _extract_files when one missing'''
+ zip_file = os.path.join(data_dir, 'megares_zip_parse_extract_files_one_missing.zip')
+ tmp_dir = 'tmp.test_megares_extract_files_one_missing'
+ with self.assertRaises(megares_zip_parser.Error):
+ got = megares_zip_parser.MegaresZipParser._extract_files(zip_file, tmp_dir)
+
+
+ def test_load_annotations_file(self):
+ '''test _load_annotations_file'''
+ infile = os.path.join(data_dir, 'megares_zip_parser_load_annotations.csv')
+ expected = {
+ 'Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA': {'class': 'betalactams', 'mechanism': 'Class A betalactamases', 'group': 'OXA'},
+ 'Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar': {'class': 'Class', 'mechanism': 'foobar', 'group': 'Bar'}
+ }
+ got = megares_zip_parser.MegaresZipParser._load_annotations_file(infile)
+ self.maxDiff = None
+ self.assertEqual(expected, got)
+
+
+ def test_load_header_mappings_file(self):
+ '''test _load_header_mappings_file'''
+ infile = os.path.join(data_dir, 'megares_zip_parser_load_header_mappings.tsv')
+ expected = {
+ 'Bla|OXA-1|JN123456|42-141|100|betalactams|Class_A_betalactamases|OXA': {'Source_Database': 'SOURCE1', 'Source_Headers(space_separated)': 'source header 1'},
+ 'Foo|Bar-1|JN42|1-11|10|foobar|Class_foobar|Bar': {'Source_Database': 'SOURCE2', 'Source_Headers(space_separated)': 'source header 2'},
+ }
+ got = megares_zip_parser.MegaresZipParser._load_header_mappings_file(infile)
+ self.maxDiff = None
+ self.assertEqual(expected, got)
+
+
+ def test_write_files(self):
+ '''test _write_files'''
+ fasta_file = os.path.join(data_dir, 'megares_zip_parser_write_files', 'megares_database_v1.01.fasta')
+ annotations_file = os.path.join(data_dir, 'megares_zip_parser_write_files', 'megares_annotations_v1.01.csv')
+ mappings_file = os.path.join(data_dir, 'megares_zip_parser_write_files', 'megares_to_external_header_mappings_v1.01.tsv')
+ sequences = {}
+ pyfastaq.tasks.file_to_dict(fasta_file, sequences)
+ annotation_data = megares_zip_parser.MegaresZipParser._load_annotations_file(annotations_file)
+ mappings_data = megares_zip_parser.MegaresZipParser._load_header_mappings_file(mappings_file)
+
+ tmp_prefix = 'tmp.test_megares_zip_parser_write_files'
+ megares_zip_parser.MegaresZipParser._write_files(tmp_prefix, sequences, annotation_data, mappings_data)
+
+ expected_fasta = os.path.join(data_dir, 'megares_zip_parser_write_files.expect.fa')
+ expected_tsv = os.path.join(data_dir, 'megares_zip_parser_write_files.expect.tsv')
+ self.assertTrue(filecmp.cmp(expected_fasta, tmp_prefix + '.fa', shallow=False))
+ self.assertTrue(filecmp.cmp(expected_tsv, tmp_prefix + '.tsv', shallow=False))
+ os.unlink(tmp_prefix + '.fa')
+ os.unlink(tmp_prefix + '.tsv')
diff --git a/scripts/ariba b/scripts/ariba
index 0718de6..2ba73d8 100755
--- a/scripts/ariba
+++ b/scripts/ariba
@@ -50,7 +50,7 @@ subparser_getref = subparsers.add_parser(
description='Download reference data from one of a few supported public resources',
)
subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files')
-subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card')
+subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card and megares')
subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
subparser_getref.set_defaults(func=ariba.tasks.getref.run)
diff --git a/setup.py b/setup.py
index d8af3cc..54be6c4 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
setup(
ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
name='ariba',
- version='2.6.1',
+ version='2.7.0',
description='ARIBA: Antibiotic Resistance Identification By Assembly',
packages = find_packages(),
package_data={'ariba': ['test_run_data/*']},
@@ -66,6 +66,7 @@ setup(
test_suite='nose.collector',
tests_require=['nose >= 1.3'],
install_requires=[
+ 'BeautifulSoup4 >= 4.1.0',
'dendropy >= 4.1.0',
'pyfastaq >= 3.12.0',
'pysam >= 0.9.1',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ariba.git
More information about the debian-med-commit
mailing list