[med-svn] [fastaq] 01/05: New upstream version 3.16.0

Sascha Steinbiss satta at debian.org
Mon Dec 4 23:50:10 UTC 2017


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository fastaq.

commit c14ae5c3e068f6aeedf4d0b41f9668851b438182
Author: Sascha Steinbiss <satta at debian.org>
Date:   Mon Dec 4 17:53:09 2017 +0100

    New upstream version 3.16.0
---
 AUTHORS                                             |  2 +-
 LICENSE                                             |  4 ++++
 pyfastaq/tasks.py                                   | 21 +++++++++++++++------
 pyfastaq/tests/data/readnames_with_comments.fastq   | 20 ++++++++++++++++++++
 .../data/readnames_with_comments.fastq.filtered     |  4 ++++
 .../tests/data/readnames_with_comments.fastq.ids    |  1 +
 pyfastaq/tests/tasks_test.py                        |  9 +++++++++
 setup.py                                            |  4 ++--
 8 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 711ae85..39205dc 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1 +1 @@
-Martin Hunt (mh12 at sanger.ac.uk)
+Martin Hunt (path-help at sanger.ac.uk)
diff --git a/LICENSE b/LICENSE
index 1a0079f..8e19ad1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,7 @@
+Copyright (c) 2013 - 2017 by Genome Research Ltd.
+
+This is free software, licensed under:
+
 GNU GENERAL PUBLIC LICENSE
                        Version 3, 29 June 2007
 
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index b788672..f5e1c52 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -306,13 +306,22 @@ def filter(
         r = re.compile(regex)
 
 
-    def passes(seq):
+    def passes(seq, name_regex):
+        # remove trailing comments from FASTQ readname lines
+        matches = name_regex.match(seq.id)
+        if matches is not None:
+            clean_seq_id = matches.group(1)
+        else:
+            clean_seq_id = seq.id
+        
         return minlength <= len(seq) <= maxlength \
-              and (regex is None or r.search(seq.id) is not None) \
-              and (ids_file is None or seq.id in ids_from_file)
-
+              and (regex is None or r.search(clean_seq_id) is not None) \
+              and (ids_file is None or clean_seq_id in ids_from_file)
+        
+    name_regex = re.compile(r'^([^\s]+).*?$')
+	
     for seq in seq_reader:
-        seq_passes = passes(seq)
+        seq_passes = passes(seq, name_regex)
         if mate_in:
             try:
                 seq_mate = next(seq_reader_mate)
@@ -320,7 +329,7 @@ def filter(
                 utils.close(f_out)
                 raise Error('Error getting mate for sequence', seq.id, ' ... cannot continue')
 
-            mate_passes = passes(seq_mate)
+            mate_passes = passes(seq_mate, name_regex)
             want_the_pair = (seq_passes and mate_passes) \
                             or (( seq_passes or mate_passes) and not both_mates_pass)
             if want_the_pair != invert:
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq b/pyfastaq/tests/data/readnames_with_comments.fastq
new file mode 100644
index 0000000..8adbb17
--- /dev/null
+++ b/pyfastaq/tests/data/readnames_with_comments.fastq
@@ -0,0 +1,20 @@
+ at A1234::15950:1663 stuff_to_remove
+TCGTAAGCCTGCTCGAGC
++
+>>3>>44 at CFFFGG??EE
+ at A1234::16080:1672 stuff_to_remove
+CCATCGTCTTCGCCCTGC
++
+111AA1AAAAF1EAEGAG
+ at A1234::12967:1677 stuff_to_remove
+CTCCAGCATCGTGCAAAT
++
+3>>A?@CBDFAAACCBAF
+ at A1234::16114:1681 stuff_to_remove
+TTGATATAGAGATACTTC
++
+3>A3A5D55DBFFDFGGG
+ at A1234::16669:1683 stuff_to_remove
+CTGCGCGACTATACGCAG
++
+1>1>>>A1>D?FF10E0A
\ No newline at end of file
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq.filtered b/pyfastaq/tests/data/readnames_with_comments.fastq.filtered
new file mode 100644
index 0000000..f277d15
--- /dev/null
+++ b/pyfastaq/tests/data/readnames_with_comments.fastq.filtered
@@ -0,0 +1,4 @@
+ at A1234::12967:1677 stuff_to_remove
+CTCCAGCATCGTGCAAAT
++
+3>>A?@CBDFAAACCBAF
diff --git a/pyfastaq/tests/data/readnames_with_comments.fastq.ids b/pyfastaq/tests/data/readnames_with_comments.fastq.ids
new file mode 100644
index 0000000..9343080
--- /dev/null
+++ b/pyfastaq/tests/data/readnames_with_comments.fastq.ids
@@ -0,0 +1 @@
+A1234::12967:1677
\ No newline at end of file
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index 5db41d4..a8856bb 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -179,6 +179,14 @@ class TestFilter(unittest.TestCase):
         tasks.filter(infile, outfile, ids_file=infile + '.ids')
         self.assertTrue(filecmp.cmp(infile + '.filtered', outfile))
         os.unlink(outfile)
+		
+    def test_ids_with_comments_from_file_filter(self):
+        '''Test that can extract reads from a file of read names where the read names have extra data after space'''
+        infile = os.path.join(data_dir, 'readnames_with_comments.fastq')
+        outfile = 'tmp.ids_file_filter.fastq'
+        tasks.filter(infile, outfile, ids_file=infile + '.ids')
+        self.assertTrue(filecmp.cmp(infile + '.filtered', outfile))
+        os.unlink(outfile)
 
     def test_invert_filter(self):
         '''Test that inverting filtering works'''
@@ -724,3 +732,4 @@ class TestToFastaUnion(unittest.TestCase):
 if __name__ == '__main__':
     unittest.main()
 
+
diff --git a/setup.py b/setup.py
index 46f813f..7fb94e3 100644
--- a/setup.py
+++ b/setup.py
@@ -4,11 +4,11 @@ from setuptools import setup, find_packages
 
 setup(
     name='pyfastaq',
-    version='3.15.0',
+    version='3.16.0',
     description='Script to manipulate FASTA and FASTQ files, plus API for developers',
     packages = find_packages(),
     author='Martin Hunt',
-    author_email='mh12 at sanger.ac.uk',
+    author_email='path-help at sanger.ac.uk',
     url='https://github.com/sanger-pathogens/Fastaq',
     scripts=glob.glob('scripts/*'),
     test_suite='nose.collector',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git



More information about the debian-med-commit mailing list