[med-svn] [fastaq] 01/01: new upstream
Jorge Soares
jssoares-guest at moszumanska.debian.org
Tue Nov 18 16:17:28 UTC 2014
This is an automated email from the git hooks/post-receive script.
jssoares-guest pushed a commit to branch master
in repository fastaq.
commit 35b6b76c8a48c9e8702763abee5fef92c9dbc718
Author: Jorge Soares <j.s.soares at gmail.com>
Date: Tue Nov 18 16:16:56 2014 +0000
new upstream
---
fastaq/tasks.py | 31 +++++++++++-----------
...sequences_test_fastaq_to_quasr_primers.expected | 2 --
.../data/sequences_test_fastaq_to_quasr_primers.fa | 4 ---
fastaq/tests/data/tasks_test_sequence_trim_1.fa | 24 ++++++++++++-----
.../data/tasks_test_sequence_trim_1.trimmed.fa | 14 ++++++----
fastaq/tests/data/tasks_test_sequence_trim_2.fa | 24 ++++++++++++-----
.../data/tasks_test_sequence_trim_2.trimmed.fa | 14 ++++++----
fastaq/tests/data/tasks_test_sequences_to_trim.fa | 8 ++----
fastaq/tests/tasks_test.py | 11 +-------
scripts/fastaq_sequence_trim | 4 ++-
scripts/fastaq_to_quasr_primers_file | 12 ---------
setup.py | 2 +-
12 files changed, 76 insertions(+), 74 deletions(-)
diff --git a/fastaq/tasks.py b/fastaq/tasks.py
index 068a640..1a7d378 100644
--- a/fastaq/tasks.py
+++ b/fastaq/tasks.py
@@ -467,10 +467,16 @@ def search_for_seq(infile, outfile, search_string):
utils.close(fout)
-def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50):
- trim_seqs = {}
- file_to_dict(to_trim_file, trim_seqs)
- trim_seqs = [x.seq for x in trim_seqs.values()]
+def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50, check_revcomp=False):
+ to_trim_seqs = {}
+ file_to_dict(to_trim_file, to_trim_seqs)
+ trim_seqs = [x.seq for x in to_trim_seqs.values()]
+ if check_revcomp:
+ for seq in to_trim_seqs.values():
+ seq.revcomp()
+ trim_seqs_revcomp = [x.seq for x in to_trim_seqs.values()]
+ else:
+ trim_seqs_revcomp = []
seq_reader_1 = sequences.file_reader(infile_1)
seq_reader_2 = sequences.file_reader(infile_2)
@@ -490,6 +496,11 @@ def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_le
seq.trim(len(trim_seq),0)
break
+ for trim_seq in trim_seqs_revcomp:
+ if seq.seq.endswith(trim_seq):
+ seq.trim(0,len(trim_seq))
+ break
+
if len(seq_1) >= min_length and len(seq_2) >= min_length:
print(seq_1, file=f_out_1)
print(seq_2, file=f_out_2)
@@ -679,18 +690,6 @@ def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False
sequences.Fasta.line_length = original_line_length
-def to_quasr_primers(infile, outfile):
- seq_reader = sequences.file_reader(infile)
- f_out = utils.open_file_write(outfile)
-
- for seq in seq_reader:
- seq2 = copy.copy(seq)
- seq2.revcomp()
- print(seq.seq, seq2.seq, sep='\t', file=f_out)
-
- utils.close(f_out)
-
-
def to_fasta_union(infile, outfile, seqname='union'):
seq_reader = sequences.file_reader(infile)
new_seq = []
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
deleted file mode 100644
index 88ce837..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-ACGT ACGT
-AG CT
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
deleted file mode 100644
index be7c130..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-ACGT
->2
-AG
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.fa
index 28f665b..ac2ff83 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_1.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_1.fa
@@ -1,12 +1,24 @@
>1/1
-TRIM1GCTCGAGCT
+1234567890
>2/1
-TRIM1AGCTAGCTAG
+AACG123456789
>3/1
-CGCTAGCTAG
+1234567890
>4/1
-TRIM2AGCTAGCTAG
+AACG1234567890
>5/1
-AGCTAGCTAG
+1234567890
>6/1
-TRIM4AGCTAGCTAG
+AACG1234567890
+>7/1
+123456789AGGC
+>8/1
+123456789
+>9/1
+1234567890AGGC
+>10/1
+AACG123456789CGTT
+>11/1
+AACG1234567890CGTT
+>12/1
+AACG1234567890CGTT
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
index 0bebad8..0512244 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
@@ -1,8 +1,12 @@
->3/1
-CGCTAGCTAG
+>1/1
+1234567890
>4/1
-AGCTAGCTAG
+1234567890
>5/1
-AGCTAGCTAG
+1234567890
>6/1
-AGCTAGCTAG
+1234567890
+>9/1
+1234567890
+>12/1
+1234567890
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.fa
index 7514250..cf3e872 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_2.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_2.fa
@@ -1,12 +1,24 @@
>1/2
-TRIM1ACGTACGTAC
+1234567890
>2/2
-TRIM2ACGTAGTGA
+1234567890
>3/2
-ACGCTGCAGTCAGTCAGTAT
+AACG123456789
>4/2
-TRIM3CGATCGATCG
+1234567890
>5/2
-TRIM3CGATCGATCG
+AACG1234567890
>6/2
-CGATCGATCG
+GCCT1234567890
+>7/2
+1234567890
+>8/2
+123456789AGGC
+>9/2
+1234567890CGTT
+>10/2
+AACG1234567890CGTT
+>11/2
+AACG123456789CGTT
+>12/2
+AACG1234567890CGTT
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
index ec80f40..432f60a 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
@@ -1,8 +1,12 @@
->3/2
-ACGCTGCAGTCAGTCAGTAT
+>1/2
+1234567890
>4/2
-CGATCGATCG
+1234567890
>5/2
-CGATCGATCG
+1234567890
>6/2
-CGATCGATCG
+1234567890
+>9/2
+1234567890
+>12/2
+1234567890
diff --git a/fastaq/tests/data/tasks_test_sequences_to_trim.fa b/fastaq/tests/data/tasks_test_sequences_to_trim.fa
index 395eaaa..cd2aa28 100644
--- a/fastaq/tests/data/tasks_test_sequences_to_trim.fa
+++ b/fastaq/tests/data/tasks_test_sequences_to_trim.fa
@@ -1,8 +1,4 @@
>1
-TRIM1
+AACG
>2
-TRIM2
->3
-TRIM3
->4
-TRIM4
+GCCT
diff --git a/fastaq/tests/tasks_test.py b/fastaq/tests/tasks_test.py
index 36ebfba..7528815 100644
--- a/fastaq/tests/tasks_test.py
+++ b/fastaq/tests/tasks_test.py
@@ -291,7 +291,7 @@ class TestSequenceTrim(unittest.TestCase):
to_trim = os.path.join(data_dir, 'tasks_test_sequences_to_trim.fa')
expected1 = os.path.join(data_dir, 'tasks_test_sequence_trim_1.trimmed.fa')
expected2 = os.path.join(data_dir, 'tasks_test_sequence_trim_2.trimmed.fa')
- tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10)
+ tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10, check_revcomp=True)
self.assertTrue(filecmp.cmp(expected1, tmp1))
self.assertTrue(filecmp.cmp(expected2, tmp2))
os.unlink(tmp1)
@@ -478,15 +478,6 @@ class TestStripIlluminaSuffix(unittest.TestCase):
os.unlink(tmpfile)
-class TestToQuasrPrimers(unittest.TestCase):
- def test_to_quasr_primers(self):
- '''Check that fasta file gets converted to QUASR sequence file'''
- tmpfile = 'tmp.primers'
- tasks.to_quasr_primers(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.fa'), tmpfile)
- self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.expected'), tmpfile))
- os.unlink(tmpfile)
-
-
class TestToFasta(unittest.TestCase):
def test_to_fasta(self):
'''Test to_fasta'''
diff --git a/scripts/fastaq_sequence_trim b/scripts/fastaq_sequence_trim
index 50a4f34..7021c6c 100755
--- a/scripts/fastaq_sequence_trim
+++ b/scripts/fastaq_sequence_trim
@@ -7,6 +7,7 @@ parser = argparse.ArgumentParser(
description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
usage = '%(prog)s [options] <fasta/q 1 in> <fastaq/2 in> <out 1> <out 2> <trim_seqs>')
parser.add_argument('--min_length', type=int, help='Minimum length of output sequences [%(default)s]', default=50, metavar='INT')
+parser.add_argument('--revcomp', action='store_true', help='Trim the end of each sequence if it matches the reverse complement. This option is intended for PCR primer trimming')
parser.add_argument('infile_1', help='Name of forward fasta/q file to be trimmed', metavar='fasta/q 1 in')
parser.add_argument('infile_2', help='Name of reverse fasta/q file to be trimmed', metavar='fasta/q 2 in')
parser.add_argument('outfile_1', help='Name of output forward fasta/q file', metavar='out_1')
@@ -19,5 +20,6 @@ tasks.sequence_trim(
options.outfile_1,
options.outfile_2,
options.trim_seqs,
- min_length=options.min_length
+ min_length=options.min_length,
+ check_revcomp=options.revcomp
)
diff --git a/scripts/fastaq_to_quasr_primers_file b/scripts/fastaq_to_quasr_primers_file
deleted file mode 100755
index 8e5bf7c..0000000
--- a/scripts/fastaq_to_quasr_primers_file
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
- usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.to_quasr_primers(options.infile, options.outfile)
diff --git a/setup.py b/setup.py
index 3064862..5506ba9 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def read(fname):
setup(
name='Fastaq',
- version='1.5.0',
+ version='1.6.0',
description='Scripts to manipulate FASTA and FASTQ files, plus API for developers',
long_description=read('README.md'),
packages = find_packages(),
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list