[med-svn] [Git][med-team/ariba][master] 6 commits: Merge branch 'upstream' of salsa.debian.org:med-team/ariba into upstream
Sascha Steinbiss
gitlab at salsa.debian.org
Wed May 16 09:37:22 BST 2018
Sascha Steinbiss pushed to branch master at Debian Med / ariba
Commits:
313bcec6 by Sascha Steinbiss at 2018-03-05T09:09:54+01:00
Merge branch 'upstream' of salsa.debian.org:med-team/ariba into upstream
- - - - -
0c1eaaa8 by Sascha Steinbiss at 2018-05-15T17:03:58+02:00
New upstream version 2.12.0+ds
- - - - -
d37c5d5a by Sascha Steinbiss at 2018-05-15T17:03:59+02:00
Update upstream source from tag 'upstream/2.12.0+ds'
Update to upstream version '2.12.0+ds'
with Debian dir c77f7f2df2033418d069034a8886b6080189287e
- - - - -
e2703a25 by Sascha Steinbiss at 2018-05-15T17:34:35+02:00
new upstream release
- - - - -
3761d382 by Sascha Steinbiss at 2018-05-15T17:34:49+02:00
remove obsolete Python version hint
- - - - -
5d1142d9 by Sascha Steinbiss at 2018-05-15T17:35:08+02:00
use debhelper 11
- - - - -
19 changed files:
- README.md
- ariba/assembly.py
- ariba/cdhit.py
- ariba/cluster.py
- ariba/external_progs.py
- ariba/read_filter.py
- ariba/ref_genes_getter.py
- ariba/tasks/prepareref.py
- ariba/tasks/run.py
- ariba/tests/assembly_test.py
- + ariba/tests/data/ref_genes_getter.fix_virulencefinder_fasta_file.in.fa
- + ariba/tests/data/ref_genes_getter.fix_virulencefinder_fasta_file.out.fa
- + ariba/tests/ref_genes_getter_test.py
- ariba/versions.py
- debian/changelog
- debian/compat
- debian/control
- scripts/ariba
- setup.py
Changes:
=====================================
README.md
=====================================
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Antimicrobial Resistance Identification By Assembly
For how to use ARIBA, please see the [ARIBA wiki page][ARIBA wiki].
[![Build Status](https://travis-ci.org/sanger-pathogens/ariba.svg?branch=master)](https://travis-ci.org/sanger-pathogens/ariba)
-[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-brightgreen.svg)](https://github.com/ssjunnebo/ariba/blob/master/LICENSE)
+[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-brightgreen.svg)](https://github.com/sanger-pathogens/ariba/blob/master/LICENSE)
[![status](https://img.shields.io/badge/MGEN-10.1099%2Fmgen.0.000131-brightgreen.svg)](http://mgen.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000131)
## Contents
=====================================
ariba/assembly.py
=====================================
--- a/ariba/assembly.py
+++ b/ariba/assembly.py
@@ -59,7 +59,7 @@ class Assembly:
self.threads = threads
if extern_progs is None:
- self.extern_progs = external_progs.ExternalProgs()
+ self.extern_progs = external_progs.ExternalProgs(using_spades=self.assembler == 'spades')
else:
self.extern_progs = extern_progs
=====================================
ariba/cdhit.py
=====================================
--- a/ariba/cdhit.py
+++ b/ariba/cdhit.py
@@ -27,7 +27,7 @@ class Runner:
self.length_diff_cutoff = length_diff_cutoff
self.verbose = verbose
self.min_cluster_number = min_cluster_number
- extern_progs = external_progs.ExternalProgs(fail_on_error=True)
+ extern_progs = external_progs.ExternalProgs(fail_on_error=True, using_spades=False)
self.cd_hit_est = extern_progs.exe('cdhit')
=====================================
ariba/cluster.py
=====================================
--- a/ariba/cluster.py
+++ b/ariba/cluster.py
@@ -130,7 +130,7 @@ class Cluster:
self.log_fh = None
if extern_progs is None:
- self.extern_progs = external_progs.ExternalProgs()
+ self.extern_progs = external_progs.ExternalProgs(using_spades=self.assembler == 'spades')
else:
self.extern_progs = extern_progs
=====================================
ariba/external_progs.py
=====================================
--- a/ariba/external_progs.py
+++ b/ariba/external_progs.py
@@ -20,10 +20,19 @@ prog_to_default = {
prog_to_env_var = {x: 'ARIBA_' + x.upper() for x in prog_to_default if x not in {'nucmer'}}
+# Nucmer 3.1 'nucmer --version' outputs this:
+# nucmer
+# NUCmer (NUCleotide MUMmer) version 3.1
+#
+# Numcer 4 'nucmer --version' outputs this:
+# 4.0.0beta2
+#
+# ... make the regex permissive and hope things
+# still work for later versions
prog_to_version_cmd = {
'bowtie2': ('--version', re.compile('.*bowtie2.*version (.*)$')),
'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
- 'nucmer': ('--version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')),
+ 'nucmer': ('--version', re.compile('([0-9]+\.[0-9\.]+.*$)')),
'spades': ('--version', re.compile('SPAdes\s+v([0-9\.]+)'))
}
@@ -40,11 +49,12 @@ prog_optional = set([
])
class ExternalProgs:
- def __init__(self, verbose=False, fail_on_error=True):
+ def __init__(self, verbose=False, fail_on_error=True, using_spades=False):
self.progs = {}
self.version_report = []
self.all_deps_ok = True
self.versions = {}
+ self.using_spades = using_spades
if verbose:
print('{:_^79}'.format(' Checking dependencies and their versions '))
@@ -53,6 +63,9 @@ class ExternalProgs:
warnings = []
for prog in sorted(prog_to_default):
+ if prog == 'spades' and not self.using_spades:
+ continue
+
msg_sink = errors
if prog in prog_optional:
msg_sink = warnings
=====================================
ariba/read_filter.py
=====================================
--- a/ariba/read_filter.py
+++ b/ariba/read_filter.py
@@ -20,7 +20,7 @@ class ReadFilter:
self.log_fh = log_fh
if extern_progs is None:
- self.extern_progs = external_progs.ExternalProgs()
+ self.extern_progs = external_progs.ExternalProgs(using_spades=False)
else:
self.extern_progs = extern_progs
=====================================
ariba/ref_genes_getter.py
=====================================
--- a/ariba/ref_genes_getter.py
+++ b/ariba/ref_genes_getter.py
@@ -7,6 +7,8 @@ import tarfile
import pyfastaq
import time
import json
+import subprocess
+import sys
from ariba import common, card_record, vfdb_parser, megares_data_finder, megares_zip_parser
@@ -186,6 +188,19 @@ class RefGenesGetter:
print('and in your methods say that version', self.version, 'of the database was used')
+ @classmethod
+ def _get_genetic_epi_database_from_bitbucket(cls, db_name, outdir, git_commit=None):
+ assert db_name in {'plasmidfinder', 'resfinder', 'virulence_finder'}
+ cmd = 'git clone ' + 'https://bitbucket.org/genomicepidemiology/' + db_name + '_db.git ' + outdir
+ common.syscall(cmd)
+
+ if git_commit is not None:
+ common.syscall('cd ' + outdir + ' && git checkout ' + git_commit)
+
+ print('Using this git commit for ' + db_name + ' database:')
+ subprocess.check_call('cd ' + outdir + ' && git log -n 1', shell=True)
+
+
def _get_from_resfinder(self, outprefix):
outprefix = os.path.abspath(outprefix)
final_fasta = outprefix + '.fa'
@@ -193,17 +208,22 @@ class RefGenesGetter:
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()
- try:
- os.mkdir(tmpdir)
+ if self.version =='old':
+ try:
+ os.mkdir(tmpdir)
+ os.chdir(tmpdir)
+ except:
+ raise Error('Error mkdir/chdir ' + tmpdir)
+
+ zipfile = 'resfinder.zip'
+ cmd = 'curl -X POST --data "folder=resfinder&filename=resfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
+ print('Downloading data with:', cmd, sep='\n')
+ common.syscall(cmd)
+ common.syscall('unzip ' + zipfile)
+ else:
+ RefGenesGetter._get_genetic_epi_database_from_bitbucket('resfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
- except:
- raise Error('Error mkdir/chdir ' + tmpdir)
- zipfile = 'resfinder.zip'
- cmd = 'curl -X POST --data "folder=resfinder&filename=resfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
- print('Downloading data with:', cmd, sep='\n')
- common.syscall(cmd)
- common.syscall('unzip ' + zipfile)
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
@@ -222,7 +242,7 @@ class RefGenesGetter:
except:
description = '.'
- # names are not unique across the files
+ # names are not unique across the files
if seq.id in used_names:
used_names[seq.id] += 1
seq.id += '_' + str(used_names[seq.id])
@@ -310,17 +330,21 @@ class RefGenesGetter:
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()
- try:
- os.mkdir(tmpdir)
+ if self.version == 'old':
+ try:
+ os.mkdir(tmpdir)
+ os.chdir(tmpdir)
+ except:
+ raise Error('Error mkdir/chdir ' + tmpdir)
+
+ zipfile = 'plasmidfinder.zip'
+ cmd = 'curl -X POST --data "folder=plasmidfinder&filename=plasmidfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
+ print('Downloading data with:', cmd, sep='\n')
+ common.syscall(cmd)
+ common.syscall('unzip ' + zipfile)
+ else:
+ RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
- except:
- raise Error('Error mkdir/chdir ' + tmpdir)
-
- zipfile = 'plasmidfinder.zip'
- cmd = 'curl -X POST --data "folder=plasmidfinder&filename=plasmidfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
- print('Downloading data with:', cmd, sep='\n')
- common.syscall(cmd)
- common.syscall('unzip ' + zipfile)
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
@@ -357,8 +381,13 @@ class RefGenesGetter:
def _get_from_srst2_argannot(self, outprefix):
- srst2_version = '0.2.0'
- srst2_url = 'https://github.com/katholt/srst2/raw/v' + srst2_version + '/data/ARGannot.r1.fasta'
+ if self.version is None:
+ self.version = 'r2'
+ if self.version not in {'r1', 'r2'}:
+ raise Error('srst2_argannot version must be r1 or r2. Got this: ' + self.version)
+
+ version_string = '.r1' if self.version == 'r1' else '_r2'
+ srst2_url = 'https://raw.githubusercontent.com/katholt/srst2/master/data/ARGannot' + version_string + '.fasta'
srst2_fa = outprefix + '.original.fa'
command = 'wget -O ' + srst2_fa + ' ' + srst2_url
common.syscall(command, verbose=True)
@@ -389,7 +418,9 @@ class RefGenesGetter:
print('If you use this downloaded data, please cite:')
print('"SRST2: Rapid genomic surveillance for public health and hospital microbiology labs",\nInouye et al 2014, Genome Medicine, PMID: 25422674\n')
print(argannot_ref)
- print('and in your methods say that the ARG-ANNOT sequences were used from version', srst2_version, 'of SRST2.')
+ # Use to also output the version of SRST2 here, but the r2 version of their
+ # fasta file was made after SRST2 release 0.2.0. At the time of writing this,
+ # 0.2.0 is the latest release, ie r2 isn't in an SRST2 release.
def _get_from_vfdb_core(self, outprefix):
@@ -427,6 +458,31 @@ class RefGenesGetter:
print('"VFDB 2016: hierarchical and refined dataset for big data analysis-10 years on",\nChen LH et al 2016, Nucleic Acids Res. 44(Database issue):D694-D697. PMID: 26578559\n')
+ @classmethod
+ def _fix_virulencefinder_fasta_file(cls, infile, outfile):
+ '''Some line breaks are missing in the FASTA files from
+ viruslence finder. Which means there are lines like this:
+ AAGATCCAATAACTGAAGATGTTGAACAAACAATTCATAATATTTATGGTCAATATGCTATTTTCGTTGA
+ AGGTGTTGCGCATTTACCTGGACATCTCTCTCCATTATTAAAAAAATTACTACTTAAATCTTTATAA>coa:1:BA000018.3
+ ATGAAAAAGCAAATAATTTCGCTAGGCGCATTAGCAGTTGCATCTAGCTTATTTACATGGGATAACAAAG
+ and therefore the sequences are messed up when we parse them. Also
+ one has a > at the end, then the seq name on the next line.
+ This function fixes the file by adding line breaks'''
+ with open(infile) as f_in, open(outfile, 'w') as f_out:
+ for line in f_in:
+ if line.startswith('>') or '>' not in line:
+ print(line, end='', file=f_out)
+ elif line.endswith('>\n'):
+ print('WARNING: found line with ">" at the end! Fixing. Line:' + line.rstrip() + ' in file ' + infile, file=sys.stderr)
+ print(line.rstrip('>\n'), file=f_out)
+ print('>', end='', file=f_out)
+ else:
+ print('WARNING: found line with ">" not at the start! Fixing. Line:' + line.rstrip() + ' in file ' + infile, file=sys.stderr)
+ line1, line2 = line.split('>')
+ print(line1, file=f_out)
+ print('>', line2, sep='', end='', file=f_out)
+
+
def _get_from_virulencefinder(self, outprefix):
outprefix = os.path.abspath(outprefix)
final_fasta = outprefix + '.fa'
@@ -434,17 +490,21 @@ class RefGenesGetter:
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()
- try:
- os.mkdir(tmpdir)
+ if self.version == 'old':
+ try:
+ os.mkdir(tmpdir)
+ os.chdir(tmpdir)
+ except:
+ raise Error('Error mkdir/chdir ' + tmpdir)
+
+ zipfile = 'plasmidfinder.zip'
+ cmd = 'curl -X POST --data "folder=virulencefinder&filename=virulencefinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
+ print('Downloading data with:', cmd, sep='\n')
+ common.syscall(cmd)
+ common.syscall('unzip ' + zipfile)
+ else:
+ RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
- except:
- raise Error('Error mkdir/chdir ' + tmpdir)
-
- zipfile = 'plasmidfinder.zip'
- cmd = 'curl -X POST --data "folder=virulencefinder&filename=virulencefinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
- print('Downloading data with:', cmd, sep='\n')
- common.syscall(cmd)
- common.syscall('unzip ' + zipfile)
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
@@ -454,7 +514,9 @@ class RefGenesGetter:
for filename in os.listdir(tmpdir):
if filename.endswith('.fsa'):
print(' ', filename)
- file_reader = pyfastaq.sequences.file_reader(os.path.join(tmpdir, filename))
+ fix_file = os.path.join(tmpdir, filename + '.fix.fsa')
+ RefGenesGetter._fix_virulencefinder_fasta_file(os.path.join(tmpdir, filename), fix_file)
+ file_reader = pyfastaq.sequences.file_reader(fix_file)
for seq in file_reader:
original_id = seq.id
seq.id = seq.id.replace('_', '.', 1)
=====================================
ariba/tasks/prepareref.py
=====================================
--- a/ariba/tasks/prepareref.py
+++ b/ariba/tasks/prepareref.py
@@ -6,7 +6,7 @@ def run(options):
if options.no_cdhit and options.cdhit_clusters is not None:
sys.exit('Cannot use both --no_cdhit and --cdhit_clusters. Neither or exactly one of those options must be used')
- extern_progs, version_report_lines = versions.get_all_versions()
+ extern_progs, version_report_lines = versions.get_all_versions(using_spades=False)
if options.verbose:
print(*version_report_lines, sep='\n')
=====================================
ariba/tasks/run.py
=====================================
--- a/ariba/tasks/run.py
+++ b/ariba/tasks/run.py
@@ -35,7 +35,7 @@ def run(options):
print('Output directory already exists. ARIBA makes the output directory. Cannot continue.', file=sys.stderr)
sys.exit(1)
- extern_progs, version_report_lines = ariba.versions.get_all_versions()
+ extern_progs, version_report_lines = ariba.versions.get_all_versions(using_spades=options.assembler == 'spades')
if options.verbose:
print(*version_report_lines, sep='\n')
=====================================
ariba/tests/assembly_test.py
=====================================
--- a/ariba/tests/assembly_test.py
+++ b/ariba/tests/assembly_test.py
@@ -9,7 +9,7 @@ from ariba import external_progs
modules_dir = os.path.dirname(os.path.abspath(assembly.__file__))
data_dir = os.path.join(modules_dir, 'tests', 'data')
-extern_progs = external_progs.ExternalProgs()
+extern_progs = external_progs.ExternalProgs(using_spades=True)
class TestAssembly(unittest.TestCase):
def test_run_fermilite(self):
=====================================
ariba/tests/data/ref_genes_getter.fix_virulencefinder_fasta_file.in.fa
=====================================
--- /dev/null
+++ b/ariba/tests/data/ref_genes_getter.fix_virulencefinder_fasta_file.in.fa
@@ -0,0 +1,11 @@
+>seq1
+ACGT
+A
+>seq2
+AGT
+AC>seq3
+ACGT
+>seq4
+AACGT>
+seq5
+AAC
=====================================
ariba/tests/data/ref_genes_getter.fix_virulencefinder_fasta_file.out.fa
=====================================
--- /dev/null
+++ b/ariba/tests/data/ref_genes_getter.fix_virulencefinder_fasta_file.out.fa
@@ -0,0 +1,12 @@
+>seq1
+ACGT
+A
+>seq2
+AGT
+AC
+>seq3
+ACGT
+>seq4
+AACGT
+>seq5
+AAC
=====================================
ariba/tests/ref_genes_getter_test.py
=====================================
--- /dev/null
+++ b/ariba/tests/ref_genes_getter_test.py
@@ -0,0 +1,18 @@
+import unittest
+import os
+import filecmp
+from ariba import ref_genes_getter
+
+modules_dir = os.path.dirname(os.path.abspath(ref_genes_getter.__file__))
+data_dir = os.path.join(modules_dir, 'tests', 'data')
+
+
+class TestRefGenesGetter(unittest.TestCase):
+ def test_fix_virulencefinder_fasta_file(self):
+ '''test _fix_virulencefinder_fasta_file'''
+ infile = os.path.join(data_dir, 'ref_genes_getter.fix_virulencefinder_fasta_file.in.fa')
+ tmp_file = 'tmp.test.ref_genes_getter.fix_virulencefinder_fasta_file.out.fa'
+ expected_file = os.path.join(data_dir, 'ref_genes_getter.fix_virulencefinder_fasta_file.out.fa')
+ ref_genes_getter.RefGenesGetter._fix_virulencefinder_fasta_file(infile, tmp_file)
+ self.assertTrue(filecmp.cmp(expected_file, tmp_file, shallow=False))
+ os.unlink(tmp_file)
=====================================
ariba/versions.py
=====================================
--- a/ariba/versions.py
+++ b/ariba/versions.py
@@ -17,8 +17,8 @@ package_max_versions = {
}
-def get_all_versions(raise_error=True):
- extern_progs = external_progs.ExternalProgs(fail_on_error=False)
+def get_all_versions(raise_error=True, using_spades=True):
+ extern_progs = external_progs.ExternalProgs(fail_on_error=False, using_spades=using_spades)
report_lines = [
'ARIBA version: ' + ariba_version,
=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,11 @@
+ariba (2.12.0+ds-1) unstable; urgency=medium
+
+ * New upstream release.
+ * Remove obsolete python version hint in d/control.
+ * Bump to debhelper 11.
+
+ -- Sascha Steinbiss <satta at debian.org> Tue, 15 May 2018 17:08:54 +0200
+
ariba (2.11.1+ds-3) unstable; urgency=medium
[ Steffen Möller ]
=====================================
debian/compat
=====================================
--- a/debian/compat
+++ b/debian/compat
@@ -1 +1 @@
-10
+11
=====================================
debian/control
=====================================
--- a/debian/control
+++ b/debian/control
@@ -3,7 +3,7 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.
Uploaders: Sascha Steinbiss <satta at debian.org>
Section: science
Priority: optional
-Build-Depends: debhelper (>= 10),
+Build-Depends: debhelper (>= 11),
python3,
python3-all,
python3-dev,
@@ -25,7 +25,6 @@ Build-Depends: debhelper (>= 10),
help2man,
asciidoctor
Standards-Version: 4.1.4
-X-Python3-Version: >= 3.2
Vcs-Browser: https://salsa.debian.org/med-team/ariba
Vcs-Git: https://salsa.debian.org/med-team/ariba.git
Homepage: https://github.com/sanger-pathogens/ariba
=====================================
scripts/ariba
=====================================
--- a/scripts/ariba
+++ b/scripts/ariba
@@ -62,7 +62,7 @@ subparser_getref = subparsers.add_parser(
description='Download reference data from one of a few supported public resources',
)
subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files')
-subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card and megares')
+subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Applies to: card, megares, plasmidfinder, resfinder, srst2_argannot, virulencefinder. For plasmid/res/virulencefinder: default is to get latest from bitbucket - supply git commit hash to get a specific version from bitbucket, or use "old " to get from old website. For srst2_argannot: default is latest version r2, use r1 to get the older version')
subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
subparser_getref.set_defaults(func=ariba.tasks.getref.run)
=====================================
setup.py
=====================================
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
setup(
ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
name='ariba',
- version='2.11.1',
+ version='2.12.0',
description='ARIBA: Antibiotic Resistance Identification By Assembly',
packages = find_packages(),
package_data={'ariba': ['test_run_data/*']},
View it on GitLab: https://salsa.debian.org/med-team/ariba/compare/79c961396f25a029c77839958b7e32c794642fe7...5d1142d924d972f1f5de5ea5dca0f69ef3d75e99
---
View it on GitLab: https://salsa.debian.org/med-team/ariba/compare/79c961396f25a029c77839958b7e32c794642fe7...5d1142d924d972f1f5de5ea5dca0f69ef3d75e99
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180516/1deef334/attachment-0001.html>
More information about the debian-med-commit
mailing list