[med-svn] [fastaq] 01/05: New upstream version 3.16.0
Sascha Steinbiss
satta at debian.org
Mon Dec 4 23:50:10 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository fastaq.
commit c14ae5c3e068f6aeedf4d0b41f9668851b438182
Author: Sascha Steinbiss <satta at debian.org>
Date: Mon Dec 4 17:53:09 2017 +0100
New upstream version 3.16.0
---
AUTHORS | 2 +-
LICENSE | 4 ++++
pyfastaq/tasks.py | 21 +++++++++++++++------
pyfastaq/tests/data/readnames_with_comments.fastq | 20 ++++++++++++++++++++
.../data/readnames_with_comments.fastq.filtered | 4 ++++
.../tests/data/readnames_with_comments.fastq.ids | 1 +
pyfastaq/tests/tasks_test.py | 9 +++++++++
setup.py | 4 ++--
8 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/AUTHORS b/AUTHORS
index 711ae85..39205dc 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1 +1 @@
-Martin Hunt (mh12 at sanger.ac.uk)
+Martin Hunt (path-help at sanger.ac.uk)
diff --git a/LICENSE b/LICENSE
index 1a0079f..8e19ad1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,7 @@
+Copyright (c) 2013 - 2017 by Genome Research Ltd.
+
+This is free software, licensed under:
+
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index b788672..f5e1c52 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -306,13 +306,22 @@ def filter(
r = re.compile(regex)
- def passes(seq):
+ def passes(seq, name_regex):
+ # remove trailing comments from FASTQ readname lines
+ matches = name_regex.match(seq.id)
+ if matches is not None:
+ clean_seq_id = matches.group(1)
+ else:
+ clean_seq_id = seq.id
+
return minlength <= len(seq) <= maxlength \
- and (regex is None or r.search(seq.id) is not None) \
- and (ids_file is None or seq.id in ids_from_file)
-
+ and (regex is None or r.search(clean_seq_id) is not None) \
+ and (ids_file is None or clean_seq_id in ids_from_file)
+
+ name_regex = re.compile(r'^([^\s]+).*?$')
+
for seq in seq_reader:
- seq_passes = passes(seq)
+ seq_passes = passes(seq, name_regex)
if mate_in:
try:
seq_mate = next(seq_reader_mate)
@@ -320,7 +329,7 @@ def filter(
utils.close(f_out)
raise Error('Error getting mate for sequence', seq.id, ' ... cannot continue')
- mate_passes = passes(seq_mate)
+ mate_passes = passes(seq_mate, name_regex)
want_the_pair = (seq_passes and mate_passes) \
or (( seq_passes or mate_passes) and not both_mates_pass)
if want_the_pair != invert:
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq b/pyfastaq/tests/data/readnames_with_comments.fastq
new file mode 100644
index 0000000..8adbb17
--- /dev/null
+++ b/pyfastaq/tests/data/readnames_with_comments.fastq
@@ -0,0 +1,20 @@
+ at A1234::15950:1663 stuff_to_remove
+TCGTAAGCCTGCTCGAGC
++
+>>3>>44 at CFFFGG??EE
+ at A1234::16080:1672 stuff_to_remove
+CCATCGTCTTCGCCCTGC
++
+111AA1AAAAF1EAEGAG
+ at A1234::12967:1677 stuff_to_remove
+CTCCAGCATCGTGCAAAT
++
+3>>A?@CBDFAAACCBAF
+ at A1234::16114:1681 stuff_to_remove
+TTGATATAGAGATACTTC
++
+3>A3A5D55DBFFDFGGG
+ at A1234::16669:1683 stuff_to_remove
+CTGCGCGACTATACGCAG
++
+1>1>>>A1>D?FF10E0A
\ No newline at end of file
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq.filtered b/pyfastaq/tests/data/readnames_with_comments.fastq.filtered
new file mode 100644
index 0000000..f277d15
--- /dev/null
+++ b/pyfastaq/tests/data/readnames_with_comments.fastq.filtered
@@ -0,0 +1,4 @@
+ at A1234::12967:1677 stuff_to_remove
+CTCCAGCATCGTGCAAAT
++
+3>>A?@CBDFAAACCBAF
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq.ids b/pyfastaq/tests/data/readnames_with_comments.fastq.ids
new file mode 100644
index 0000000..9343080
--- /dev/null
+++ b/pyfastaq/tests/data/readnames_with_comments.fastq.ids
@@ -0,0 +1 @@
+A1234::12967:1677
\ No newline at end of file
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index 5db41d4..a8856bb 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -179,6 +179,14 @@ class TestFilter(unittest.TestCase):
tasks.filter(infile, outfile, ids_file=infile + '.ids')
self.assertTrue(filecmp.cmp(infile + '.filtered', outfile))
os.unlink(outfile)
+
+ def test_ids_with_comments_from_file_filter(self):
+ '''Test that can extract reads from a file of read names where the read names have extra data after space'''
+ infile = os.path.join(data_dir, 'readnames_with_comments.fastq')
+ outfile = 'tmp.ids_file_filter.fastq'
+ tasks.filter(infile, outfile, ids_file=infile + '.ids')
+ self.assertTrue(filecmp.cmp(infile + '.filtered', outfile))
+ os.unlink(outfile)
def test_invert_filter(self):
'''Test that inverting filtering works'''
@@ -724,3 +732,4 @@ class TestToFastaUnion(unittest.TestCase):
if __name__ == '__main__':
unittest.main()
+
diff --git a/setup.py b/setup.py
index 46f813f..7fb94e3 100644
--- a/setup.py
+++ b/setup.py
@@ -4,11 +4,11 @@ from setuptools import setup, find_packages
setup(
name='pyfastaq',
- version='3.15.0',
+ version='3.16.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
- author_email='mh12 at sanger.ac.uk',
+ author_email='path-help at sanger.ac.uk',
url='https://github.com/sanger-pathogens/Fastaq',
scripts=glob.glob('scripts/*'),
test_suite='nose.collector',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list