[med-svn] [fastaq_tmp] 01/02: Imported Upstream version 1.6.0
Andreas Tille
tille at debian.org
Tue Mar 3 07:41:56 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch upstream
in repository fastaq_tmp.
commit ea86704d47a615da7ce242c28aadac5d32a8410c
Author: Jorge Soares <j.s.soares at gmail.com>
Date: Tue Nov 18 16:17:44 2014 +0000
Imported Upstream version 1.6.0
---
debian/changelog | 5 -
debian/compat | 1 -
debian/control | 174 -----
debian/copyright | 22 -
debian/fastaq.manpages | 1 -
...ay-import-statements-for-manpage-creation.patch | 737 ---------------------
debian/patches/series | 1 -
debian/rules | 25 -
debian/source/format | 1 -
debian/upstream/metadata | 12 -
debian/usage_to_man | 99 ---
debian/watch | 3 -
fastaq/tasks.py | 31 +-
...sequences_test_fastaq_to_quasr_primers.expected | 2 -
.../data/sequences_test_fastaq_to_quasr_primers.fa | 4 -
fastaq/tests/data/tasks_test_sequence_trim_1.fa | 24 +-
.../data/tasks_test_sequence_trim_1.trimmed.fa | 14 +-
fastaq/tests/data/tasks_test_sequence_trim_2.fa | 24 +-
.../data/tasks_test_sequence_trim_2.trimmed.fa | 14 +-
fastaq/tests/data/tasks_test_sequences_to_trim.fa | 8 +-
fastaq/tests/tasks_test.py | 11 +-
scripts/fastaq_sequence_trim | 4 +-
scripts/fastaq_to_quasr_primers_file | 12 -
setup.py | 2 +-
24 files changed, 76 insertions(+), 1155 deletions(-)
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 3e78fa5..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-fastaq (1.5.0-1) unstable; urgency=medium
-
- * Initial release (Closes: #766321)
-
- -- Jorge Soares <j.s.soares at gmail.com> Thu, 23 Oct 2014 20:23:54 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 97f4773..0000000
--- a/debian/control
+++ /dev/null
@@ -1,174 +0,0 @@
-Source: fastaq
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>,
- Jorge Soares <j.s.soares at gmail.com>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 9),
- python3,
- python3-setuptools,
- python3-numpy,
- python3-nose,
- samtools,
- help2man
-Standards-Version: 3.9.6
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/fastaq.git
-Homepage: https://github.com/sanger-pathogens/Fastaq
-
-Package: fastaq
-Architecture: all
-Depends: ${python3:Depends},
- ${misc:Depends}
-Description: FASTA and FASTQ file manipulation tools
- A collection of scripts that perform useful and common
- fasta/q manipulation tasks.
- .
- All scripts automatically detect whether the input is
- a FASTA or FASTQ file.
- .
- Input and output files can be gzipped.
- .
- fastaq_capillary_to_pairs -
- Given a fasta/q file of capillary reads,
- makes an interleaved file of read pairs
- .
- fastaq_chunker -
- Splits a multi fasta/q file into separate files.
- Splits sequences into chunks of a fixed size.
- .
- fastaq_count_sequences -
- Counts the number of sequences in a fasta/q file
- .
- fastaq_deinterleave -
- Deinterleaves fasta/q file, so that reads are written
- alternately between two output files
- .
- fastaq_enumerate_names -
- Renames sequences in a file, calling them 1,2,3...
- .
- fastaq_expand_nucleotides -
- Makes all combinations of sequences in input file
- by using all possibilities of redundant bases.
- e.g. ART could be AAT or AGT.
- .
- fastaq_extend_gaps -
- Extends the length of all gaps (and trims the start/end
- of sequences) in a fasta/q file.
- .
- fastaq_fasta_to_fastq -
- Given a fasta and qual file, makes a fastq file.
- .
- fastaq_filter -
- Filters a fasta/q file by sequence length and/or
- by name matching a regular expression.
- .
- fastaq_get_ids -
- Gets IDs from each sequence in a fasta or fastq file.
- .
- fastaq_get_seq_flanking_gaps -
- Gets the sequences either side of gaps in a fasta/q file.
- .
- fastaq_insert_or_delete_bases -
- Deletes or inserts bases at given position(s)
- from a fasta/q file.
- .
- fastaq_interleave -
- Interleaves two fasta/q files, so that reads are written
- alternately first/second in output file.
- .
- fastaq_long_read_simulate -
- Simulates long reads from a fasta/q file. Can optionally
- make insertions into the reads, like pacbio does.
- .
- fastaq_make_random_contigs -
- Makes a multi-fasta file of random sequences,
- all of the same length. Each base has equal chance of
- being A,C,G or T
- .
- fastaq_merge -
- Converts multi fasta/q file to single sequence file,
- preserving original order of sequences.
- .
- fastaq_replace_bases -
- Replaces all occurences of one letter with another in
- a fasta/q file.
- .
- fastaq_reverse_complement -
- Reverse complements all sequences in a fasta/q file
- .
- fastaq_scaffolds_to_contigs -
- Creates a file of contigs from a file of scaffolds - i.e.
- breaks at every gap in the input.
- .
- fastaq_search_for_seq -
- Searches for an exact match on a given string and its
- reverese complement, in every sequences of a fasta/q file.
- Case insensitive. Guaranteed to find all hits.
- .
- fastaq_sequence_trim -
- Trims sequences off the start of all sequences in a pair
- of fasta/q files, whenever there is a perfect match.
- Only keeps a read pair if both reads of the pair are at
- least a minimum length after any trimming.
- .
- fastaq_split_by_base_count -
- Splits a multi fasta/q file into separate files.
- Does not split sequences. Puts up to max_bases
- into each split file. The exception is that any
- sequence longer than max_bases is put into its own file.
- .
- fastaq_strip_illumina_suffix -
- Strips /1 or /2 off the end of every read name
- in a fasta/q file.
- .
- fastaq_to_fake_qual -
- Makes fake quality scores file from a fasta/q file.
- .
- fastaq_to_fasta -
- Converts sequence file to FASTA format.
- .
- fastaq_to_mira_xml -
- Creates an xml file from a fasta/q file of reads,
- for use with Mira assembler.
- .
- fastaq_to_orfs_gff -
- Writes a GFF file of open reading frames from a fasta/q file
- .
- fastaq_to_perfect_reads -
- Makes perfect paired end fastq reads from a fasta/q file,
- with insert sizes sampled from a normal distribution.
- Read orientation is innies. Output is an interleaved fastq file.
- .
- fastaq_to_quasr_primers_file -
- Converts a fasta/q file to QUASR primers format:
- just the sequence on each line and its reverse complement,
- tab separated.
- .
- fastaq_to_random_subset -
- Takes a random subset of reads from a fasta/q file and optionally
- the corresponding read from a mates file.
- Ouptut is interleaved if mates file given.
- .
- fastaq_to_tiling_bam -
- Takes a fasta/q file. Makes a BAM file containing perfect
- (unpaired) reads tiling the whole genome.
- .
- fastaq_to_unique_by_id -
- Removes duplicate sequences from a fasta/q file,
- based on their names. If the same name is found
- more than once, then the longest sequence is kept.
- Order of sequences is preserved in output.
- .
- fastaq_translate -
- Translates all sequences in a fasta or fastq file.
- Output is always fasta format
- .
- fastaq_trim_ends -
- Trims set number of bases off each sequence in a fasta/q file
- .
- fastaq_trim_Ns_at_end -
- Trims any Ns off each sequence in a fasta/q file.
- Does nothing to gaps in the middle, just trims the ends
- .
- A developer API is also provided by this package.
- There are plenty of examples in tasks.py
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 7b2546c..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,22 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: Fastaq
-Source: https://github.com/sanger-pathogens/Fastaq
-
-Files: *
-Copyright: © 2012-2013 Martin Hunt <mh12 at sanger.ac.uk>
-License: GPL-3+
- This package is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
- .
- This package is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>
- .
- On Debian systems, the complete text of the GNU General
- Public License version 3 can be found in "/usr/share/common-licenses/GPL-3".
diff --git a/debian/fastaq.manpages b/debian/fastaq.manpages
deleted file mode 100644
index d2c65e3..0000000
--- a/debian/fastaq.manpages
+++ /dev/null
@@ -1 +0,0 @@
-debian/man/*
\ No newline at end of file
diff --git a/debian/patches/delay-import-statements-for-manpage-creation.patch b/debian/patches/delay-import-statements-for-manpage-creation.patch
deleted file mode 100644
index b3120ce..0000000
--- a/debian/patches/delay-import-statements-for-manpage-creation.patch
+++ /dev/null
@@ -1,737 +0,0 @@
-Description: Delay import of Fastaq modules by the python executables
- Man pages for this package are being automatically created with through the
- help2man wrapper called usage_to_man. help2man calls the python executables
- with the -h option and converts the usage into a man page.
- .
- The first step done by all the executables is the import of the modules deployed
- by this package. Since the package is not installed in the system at build time,
- the man pages would never be properly created.
- .
- This patch solves this problem by importing the modules in this package after
- the argument parsing code.
- .
- Upstream prefered to keep the code as it is for styling reasons, which is
- perfectly reasonable
- .
- fastaq (1.5.0-1) UNRELEASED; urgency=low
- .
- * Initial release (Closes: #1234)
-Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
---- a/scripts/fastaq_capillary_to_pairs
-+++ b/scripts/fastaq_capillary_to_pairs
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.capillary_to_pairs(options.infile, options.outprefix)
---- a/scripts/fastaq_chunker
-+++ b/scripts/fastaq_chunker
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
-@@ -12,6 +11,10 @@
- parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
- parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.split_by_fixed_size(
- options.infile,
- options.outprefix,
---- a/scripts/fastaq_count_sequences
-+++ b/scripts/fastaq_count_sequences
-@@ -1,11 +1,14 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Counts the number of sequences in a fasta/q file',
- usage = '%(prog)s <fasta/q in>')
- parser.add_argument('infile', help='Name of input fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- print(tasks.count_sequences(options.infile))
---- a/scripts/fastaq_deinterleave
-+++ b/scripts/fastaq_deinterleave
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files',
-@@ -11,4 +10,8 @@
- parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads')
- parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out)
---- a/scripts/fastaq_enumerate_names
-+++ b/scripts/fastaq_enumerate_names
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Renames sequences in a file, calling them 1,2,3... etc',
-@@ -12,6 +11,10 @@
- parser.add_argument('infile', help='Name of fasta/q file to be read')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.enumerate_names(options.infile,
- options.outfile,
- start_index=options.start_index,
---- a/scripts/fastaq_expand_nucleotides
-+++ b/scripts/fastaq_expand_nucleotides
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Makes all combinations of sequences in input file by using all possibilities of redundant bases. e.g. ART could be AAT or AGT. Assumes input is nucleotides, not amino acids',
-@@ -9,6 +8,10 @@
- parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
- parser.add_argument('outfile', help='Name of output file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.expand_nucleotides(
- options.infile,
- options.outfile,
---- a/scripts/fastaq_extend_gaps
-+++ b/scripts/fastaq_extend_gaps
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost',
-@@ -10,4 +9,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.extend_gaps(options.infile, options.outfile, options.trim_number)
---- a/scripts/fastaq_fasta_to_fastq
-+++ b/scripts/fastaq_fasta_to_fastq
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Given a fasta and qual file, makes a fastq file',
-@@ -10,4 +9,8 @@
- parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in')
- parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile)
---- a/scripts/fastaq_filter
-+++ b/scripts/fastaq_filter
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression',
-@@ -14,6 +13,10 @@
- parser.add_argument('infile', help='Name of fasta/q file to be filtered')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.filter(options.infile,
- options.outfile,
- minlength=options.min_length,
---- a/scripts/fastaq_get_ids
-+++ b/scripts/fastaq_get_ids
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Gets IDs from each sequence in a fasta or fastq file',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.get_ids(options.infile, options.outfile)
---- a/scripts/fastaq_get_seq_flanking_gaps
-+++ b/scripts/fastaq_get_seq_flanking_gaps
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Gets the sequences either side of gaps in a fasta/q file',
-@@ -11,4 +10,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right)
---- a/scripts/fastaq_insert_or_delete_bases
-+++ b/scripts/fastaq_insert_or_delete_bases
-@@ -1,9 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--import sys
--import random
--from fastaq import sequences, utils, intervals
-
- parser = argparse.ArgumentParser(
- description = 'Deletes or inserts bases at given position(s) from a fasta/q file',
-@@ -16,6 +13,11 @@
- parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step')
- options = parser.parse_args()
-
-+
-+import sys
-+import random
-+from fastaq import sequences, utils, intervals
-+
- test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]]
-
- if sum(test_ops) != 1:
---- a/scripts/fastaq_interleave
-+++ b/scripts/fastaq_interleave
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file',
-@@ -10,4 +9,8 @@
- parser.add_argument('infile_2', help='Name of second input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.interleave(options.infile_1, options.infile_2, options.outfile)
---- a/scripts/fastaq_long_read_simulate
-+++ b/scripts/fastaq_long_read_simulate
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Simulates long reads from a fasta/q file. Can optionally make insertions into the reads, like pacbio does. If insertions made, coverage calculation is done before the insertions (so total read length may appear longer then expected).',
-@@ -16,7 +15,6 @@
- parser.add_argument('--fixed_read_length', type=int, help='Length of each read. Only applies if method is tile or uniform. [%(default)s]', default=20000, metavar='INT')
- parser.add_argument('--coverage', type=float, help='Read coverage. Only applies if method is gamma or uniform. [%(default)s]', default=2, metavar='FLOAT')
-
--
- tiling_group = parser.add_argument_group('tiling options')
- tiling_group.add_argument('--tile_step', type=int, help='Distance between start of each read [%(default)s]', default=10000, metavar='INT')
-
-@@ -29,8 +27,11 @@
- ins_group.add_argument('--ins_skip', type=int, help='Insert a random base every --skip bases plus or minus --ins_window. If this option is used, must also use --ins_window.', metavar='INT')
- ins_group.add_argument('--ins_window', type=int, help='See --ins_skip. If this option is used, must also use --ins_skip.', metavar='INT')
-
--
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.make_long_reads(
- options.infile,
- options.outfile,
---- a/scripts/fastaq_make_random_contigs
-+++ b/scripts/fastaq_make_random_contigs
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T',
-@@ -14,6 +13,10 @@
- parser.add_argument('length', type=int, help='Length of each contig')
- parser.add_argument('outfile', help='Name of output file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.make_random_contigs(
- options.contigs,
- options.length,
---- a/scripts/fastaq_merge
-+++ b/scripts/fastaq_merge
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Converts multi fasta/q file to single sequence file, preserving original order of sequences',
-@@ -10,6 +9,10 @@
- parser.add_argument('outfile', help='Name of output file')
- parser.add_argument('-n', '--name', help='Name of sequence in output file [%(default)s]', default='union')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.merge_to_one_seq(
- options.infile,
- options.outfile,
---- a/scripts/fastaq_replace_bases
-+++ b/scripts/fastaq_replace_bases
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Replaces all occurences of one letter with another in a fasta/q file',
-@@ -11,4 +10,8 @@
- parser.add_argument('old', help='Base to be replaced')
- parser.add_argument('new', help='Replace with this letter')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.replace_bases(options.infile, options.outfile, options.old, options.new)
---- a/scripts/fastaq_reverse_complement
-+++ b/scripts/fastaq_reverse_complement
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Reverse complements all sequences in a fasta/q file',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.reverse_complement(options.infile, options.outfile)
---- a/scripts/fastaq_scaffolds_to_contigs
-+++ b/scripts/fastaq_scaffolds_to_contigs
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input',
-@@ -10,4 +9,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output contigs file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs)
---- a/scripts/fastaq_search_for_seq
-+++ b/scripts/fastaq_search_for_seq
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits',
-@@ -10,4 +9,8 @@
- parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand')
- parser.add_argument('search_string', help='String to search for in the sequences')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.search_for_seq(options.infile, options.outfile, options.search_string)
---- a/scripts/fastaq_sequence_trim
-+++ b/scripts/fastaq_sequence_trim
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
-@@ -13,6 +12,10 @@
- parser.add_argument('outfile_2', help='Name of output reverse fasta/q file', metavar='out_2')
- parser.add_argument('trim_seqs', help='Name of fasta/q file of sequences to search for at the start of each input sequence', metavar='trim_seqs')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.sequence_trim(
- options.infile_1,
- options.infile_2,
---- a/scripts/fastaq_split_by_base_count
-+++ b/scripts/fastaq_split_by_base_count
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.',
-@@ -10,6 +9,9 @@
- parser.add_argument('outprefix', help='Name of output fasta/q file')
- parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases')
- parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT')
--
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs)
---- a/scripts/fastaq_strip_illumina_suffix
-+++ b/scripts/fastaq_strip_illumina_suffix
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Strips /1 or /2 off the end of every read name in a fasta/q file',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.strip_illumina_suffix(options.infile, options.outfile)
---- a/scripts/fastaq_to_fake_qual
-+++ b/scripts/fastaq_to_fake_qual
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Makes fake quality scores file from a fasta/q file',
-@@ -10,6 +9,10 @@
- parser.add_argument('outfile', help='Name of output file')
- parser.add_argument('-q', '--qual', type=int, help='Quality score to assign to all bases [%(default)s]', default=40)
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.fastaq_to_fake_qual(
- options.infile,
- options.outfile,
---- a/scripts/fastaq_to_fasta
-+++ b/scripts/fastaq_to_fasta
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Converts sequence file to FASTA format',
-@@ -11,6 +10,10 @@
- parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60)
- parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.to_fasta(
- options.infile,
- options.outfile,
---- a/scripts/fastaq_to_mira_xml
-+++ b/scripts/fastaq_to_mira_xml
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('xml_out', help='Name of output xml file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.fastaq_to_mira_xml(options.infile, options.xml_out)
---- a/scripts/fastaq_to_orfs_gff
-+++ b/scripts/fastaq_to_orfs_gff
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Writes a GFF file of open reading frames from a fasta/q file',
-@@ -10,4 +9,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('gff_out', help='Name of output gff file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.fastaq_to_orfs_gff(options.infile, options.gff_out, min_length=options.min_length)
---- a/scripts/fastaq_to_perfect_reads
-+++ b/scripts/fastaq_to_perfect_reads
-@@ -1,10 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--import random
--from math import floor, ceil
--from fastaq import sequences, utils
--import sys
-
- parser = argparse.ArgumentParser(
- description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.',
-@@ -20,6 +16,12 @@
- parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT')
- options = parser.parse_args()
-
-+
-+import random
-+from math import floor, ceil
-+from fastaq import sequences, utils
-+import sys
-+
- random.seed(a=options.seed)
-
- seq_reader = sequences.file_reader(options.infile)
---- a/scripts/fastaq_to_quasr_primers_file
-+++ b/scripts/fastaq_to_quasr_primers_file
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.to_quasr_primers(options.infile, options.outfile)
---- a/scripts/fastaq_to_random_subset
-+++ b/scripts/fastaq_to_random_subset
-@@ -1,9 +1,6 @@
- #!/usr/bin/env python3
-
--import sys
- import argparse
--import random
--from fastaq import sequences, utils
-
- parser = argparse.ArgumentParser(
- description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
-@@ -15,6 +12,11 @@
- parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT')
- options = parser.parse_args()
-
-+
-+import sys
-+import random
-+from fastaq import sequences, utils
-+
- seq_reader = sequences.file_reader(options.infile)
- fout = utils.open_file_write(options.outfile)
-
---- a/scripts/fastaq_to_tiling_bam
-+++ b/scripts/fastaq_to_tiling_bam
-@@ -1,9 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--import sys
--import os
--from fastaq import sequences, utils
-
- parser = argparse.ArgumentParser(
- description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome',
-@@ -17,6 +14,11 @@
- parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42')
- options = parser.parse_args()
-
-+
-+import sys
-+import os
-+from fastaq import sequences, utils
-+
- # make a header first - we need to add the @RG line to the default header made by samtools
- tmp_empty_file = options.outfile + '.tmp.empty'
- f = utils.open_file_write(tmp_empty_file)
---- a/scripts/fastaq_to_unique_by_id
-+++ b/scripts/fastaq_to_unique_by_id
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.to_unique_by_id(options.infile, options.outfile)
---- a/scripts/fastaq_translate
-+++ b/scripts/fastaq_translate
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format',
-@@ -10,4 +9,8 @@
- parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q')
- parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.translate(options.infile, options.outfile, frame=options.frame)
---- a/scripts/fastaq_trim_Ns_at_end
-+++ b/scripts/fastaq_trim_Ns_at_end
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.trim_Ns_at_end(options.infile, options.outfile)
---- a/scripts/fastaq_trim_ends
-+++ b/scripts/fastaq_trim_ends
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Trims set number of bases off each sequence in a fasta/q file',
-@@ -11,4 +10,8 @@
- parser.add_argument('end_trim', type=int, help='Number of bases to trim off end')
- parser.add_argument('outfile', help='Name of output fasta/q file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim)
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index dfa3826..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-delay-import-statements-for-manpage-creation.patch
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 58f2a1b..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/make -f
-
-export DH_VERBOSE := 1
-export PYBUILD_NAME=fastaq
-
-mandir := $(CURDIR)/debian/man
-debfolder := $(CURDIR)/debian
-
-%:
- dh $@ --with python3 --buildsystem=pybuild
-
-override_dh_auto_build:
- dh_python3
- dh_auto_build
- mkdir $(CURDIR)/doc
- cd $(CURDIR)/doc
-
-override_dh_auto_clean:
- rm -rf build .pybuild
- rm -rf $(mandir)
-
-override_dh_installman:
- mkdir -p $(mandir)
- $(debfolder)/usage_to_man
- dh_installman --
\ No newline at end of file
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 46ebe02..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
\ No newline at end of file
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index d8b5812..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,12 +0,0 @@
-Reference:
- Author:
- Title:
- Journal:
- Year:
- Volume:
- Number:
- Pages:
- DOI:
- PMID:
- URL:
- eprint:
diff --git a/debian/usage_to_man b/debian/usage_to_man
deleted file mode 100755
index 32f28b8..0000000
--- a/debian/usage_to_man
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-#Converts Fastaq python scripts usage into man pages.
-#The man pages are placed in the man folder of the main Fastaq directory
-
-createManPages();
-
-sub createManPages {
-
- my $source= 'scripts';
- my $destination= 'debian/man';
- my $app_name = 'Fastaq';
-
-
- unless ( -d $destination ) {
- system(mkdir $destination);
- }
-
- my @files;
-
- push(@files,`ls $source/fastaq_*`);
-
- if ( scalar @files > 0 ) {
-
- print "Creating manpages\n";
- for my $file ( @files ) {
- $file =~ s/\n$//;
-
- my $filename = $file;
- $filename =~ s/$source\///;
-
- my $uc_filename = uc($filename);
- my $man_file = $filename;
-
- $man_file = $destination . '/' . $man_file . '.1';
-
- open (my $man_fh, ">", $man_file);
-
- my $grep_string = $filename . ': error: too few arguments';
-
- my $cmd = "help2man -m $filename -n $filename --no-discard-stderr $file | sed 's/usage://gi'";
- my @output;
- push(@output, `$cmd`);
-
- for my $line (@output) {
- $line =~ s/\n$//;
-
- }
-
- for (my $i = 0; $i < scalar @output; $i++) {
- my $output_line = $output[$i];
-
- if ($output_line =~ m/^\.TH/) {
- $output_line =~ s/\s+/ /g;
- $output_line =~ s/(\.TH) ("\d+") ("[a-zA-Z0-9_ ]*") ("[a-zA-Z0-9_<>\[\]\/\.\(\), ]*") ("[a-zA-Z0-9_]*")/$1 $uc_filename $2 $3 "$app_name" "Fastaq executables"/;
- }
-
- $output_line =~ s/ \\- $filename/$filename/;
-
- if ( $output_line =~ m/^.PP/ && $output[$i + 1] =~ m/^$filename\:/ ) {
- $output_line = $output[$i + 1] = '';
- }
-
- if ($output_line =~ m/^\.SH "SEE ALSO"/) {
- last;
- }
- print $man_fh "$output_line\n";
- }
-
- writeAuthorAndCopyright($man_fh,$filename);
- close($man_fh);
- }
- print "Manpage creation complete\n";
- }
-}
-
-sub writeAuthorAndCopyright {
-
- my ($man_fh,$filename) = @_;
-
- my $author_blurb = <<END_OF_AUTHOR_BLURB;
-.SH "AUTHOR"
-.sp
-$filename was originally written by Martin Hunt (mh12\@sanger.ac.uk)
-END_OF_AUTHOR_BLURB
-
- print $man_fh "$author_blurb\n";
-
- my $copyright_blurb = <<'END_OF_C_BLURB';
-.SH "COPYING"
-.sp
-Wellcome Trust Sanger Institute Copyright \(co 2013 Wellcome Trust Sanger Institute This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version\&.
-END_OF_C_BLURB
-
- print $man_fh "$copyright_blurb\n";
-
-}
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 46c1516..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=3
-https://github.com/sanger-pathogens/fastaq/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
-
diff --git a/fastaq/tasks.py b/fastaq/tasks.py
index 068a640..1a7d378 100644
--- a/fastaq/tasks.py
+++ b/fastaq/tasks.py
@@ -467,10 +467,16 @@ def search_for_seq(infile, outfile, search_string):
utils.close(fout)
-def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50):
- trim_seqs = {}
- file_to_dict(to_trim_file, trim_seqs)
- trim_seqs = [x.seq for x in trim_seqs.values()]
+def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50, check_revcomp=False):
+ to_trim_seqs = {}
+ file_to_dict(to_trim_file, to_trim_seqs)
+ trim_seqs = [x.seq for x in to_trim_seqs.values()]
+ if check_revcomp:
+ for seq in to_trim_seqs.values():
+ seq.revcomp()
+ trim_seqs_revcomp = [x.seq for x in to_trim_seqs.values()]
+ else:
+ trim_seqs_revcomp = []
seq_reader_1 = sequences.file_reader(infile_1)
seq_reader_2 = sequences.file_reader(infile_2)
@@ -490,6 +496,11 @@ def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_le
seq.trim(len(trim_seq),0)
break
+ for trim_seq in trim_seqs_revcomp:
+ if seq.seq.endswith(trim_seq):
+ seq.trim(0,len(trim_seq))
+ break
+
if len(seq_1) >= min_length and len(seq_2) >= min_length:
print(seq_1, file=f_out_1)
print(seq_2, file=f_out_2)
@@ -679,18 +690,6 @@ def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False
sequences.Fasta.line_length = original_line_length
-def to_quasr_primers(infile, outfile):
- seq_reader = sequences.file_reader(infile)
- f_out = utils.open_file_write(outfile)
-
- for seq in seq_reader:
- seq2 = copy.copy(seq)
- seq2.revcomp()
- print(seq.seq, seq2.seq, sep='\t', file=f_out)
-
- utils.close(f_out)
-
-
def to_fasta_union(infile, outfile, seqname='union'):
seq_reader = sequences.file_reader(infile)
new_seq = []
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
deleted file mode 100644
index 88ce837..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-ACGT ACGT
-AG CT
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
deleted file mode 100644
index be7c130..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-ACGT
->2
-AG
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.fa
index 28f665b..ac2ff83 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_1.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_1.fa
@@ -1,12 +1,24 @@
>1/1
-TRIM1GCTCGAGCT
+1234567890
>2/1
-TRIM1AGCTAGCTAG
+AACG123456789
>3/1
-CGCTAGCTAG
+1234567890
>4/1
-TRIM2AGCTAGCTAG
+AACG1234567890
>5/1
-AGCTAGCTAG
+1234567890
>6/1
-TRIM4AGCTAGCTAG
+AACG1234567890
+>7/1
+123456789AGGC
+>8/1
+123456789
+>9/1
+1234567890AGGC
+>10/1
+AACG123456789CGTT
+>11/1
+AACG1234567890CGTT
+>12/1
+AACG1234567890CGTT
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
index 0bebad8..0512244 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
@@ -1,8 +1,12 @@
->3/1
-CGCTAGCTAG
+>1/1
+1234567890
>4/1
-AGCTAGCTAG
+1234567890
>5/1
-AGCTAGCTAG
+1234567890
>6/1
-AGCTAGCTAG
+1234567890
+>9/1
+1234567890
+>12/1
+1234567890
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.fa
index 7514250..cf3e872 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_2.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_2.fa
@@ -1,12 +1,24 @@
>1/2
-TRIM1ACGTACGTAC
+1234567890
>2/2
-TRIM2ACGTAGTGA
+1234567890
>3/2
-ACGCTGCAGTCAGTCAGTAT
+AACG123456789
>4/2
-TRIM3CGATCGATCG
+1234567890
>5/2
-TRIM3CGATCGATCG
+AACG1234567890
>6/2
-CGATCGATCG
+GCCT1234567890
+>7/2
+1234567890
+>8/2
+123456789AGGC
+>9/2
+1234567890CGTT
+>10/2
+AACG1234567890CGTT
+>11/2
+AACG123456789CGTT
+>12/2
+AACG1234567890CGTT
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
index ec80f40..432f60a 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
@@ -1,8 +1,12 @@
->3/2
-ACGCTGCAGTCAGTCAGTAT
+>1/2
+1234567890
>4/2
-CGATCGATCG
+1234567890
>5/2
-CGATCGATCG
+1234567890
>6/2
-CGATCGATCG
+1234567890
+>9/2
+1234567890
+>12/2
+1234567890
diff --git a/fastaq/tests/data/tasks_test_sequences_to_trim.fa b/fastaq/tests/data/tasks_test_sequences_to_trim.fa
index 395eaaa..cd2aa28 100644
--- a/fastaq/tests/data/tasks_test_sequences_to_trim.fa
+++ b/fastaq/tests/data/tasks_test_sequences_to_trim.fa
@@ -1,8 +1,4 @@
>1
-TRIM1
+AACG
>2
-TRIM2
->3
-TRIM3
->4
-TRIM4
+GCCT
diff --git a/fastaq/tests/tasks_test.py b/fastaq/tests/tasks_test.py
index 36ebfba..7528815 100644
--- a/fastaq/tests/tasks_test.py
+++ b/fastaq/tests/tasks_test.py
@@ -291,7 +291,7 @@ class TestSequenceTrim(unittest.TestCase):
to_trim = os.path.join(data_dir, 'tasks_test_sequences_to_trim.fa')
expected1 = os.path.join(data_dir, 'tasks_test_sequence_trim_1.trimmed.fa')
expected2 = os.path.join(data_dir, 'tasks_test_sequence_trim_2.trimmed.fa')
- tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10)
+ tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10, check_revcomp=True)
self.assertTrue(filecmp.cmp(expected1, tmp1))
self.assertTrue(filecmp.cmp(expected2, tmp2))
os.unlink(tmp1)
@@ -478,15 +478,6 @@ class TestStripIlluminaSuffix(unittest.TestCase):
os.unlink(tmpfile)
-class TestToQuasrPrimers(unittest.TestCase):
- def test_to_quasr_primers(self):
- '''Check that fasta file gets converted to QUASR sequence file'''
- tmpfile = 'tmp.primers'
- tasks.to_quasr_primers(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.fa'), tmpfile)
- self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.expected'), tmpfile))
- os.unlink(tmpfile)
-
-
class TestToFasta(unittest.TestCase):
def test_to_fasta(self):
'''Test to_fasta'''
diff --git a/scripts/fastaq_sequence_trim b/scripts/fastaq_sequence_trim
index 50a4f34..7021c6c 100755
--- a/scripts/fastaq_sequence_trim
+++ b/scripts/fastaq_sequence_trim
@@ -7,6 +7,7 @@ parser = argparse.ArgumentParser(
description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
usage = '%(prog)s [options] <fasta/q 1 in> <fastaq/2 in> <out 1> <out 2> <trim_seqs>')
parser.add_argument('--min_length', type=int, help='Minimum length of output sequences [%(default)s]', default=50, metavar='INT')
+parser.add_argument('--revcomp', action='store_true', help='Trim the end of each sequence if it matches the reverse complement. This option is intended for PCR primer trimming')
parser.add_argument('infile_1', help='Name of forward fasta/q file to be trimmed', metavar='fasta/q 1 in')
parser.add_argument('infile_2', help='Name of reverse fasta/q file to be trimmed', metavar='fasta/q 2 in')
parser.add_argument('outfile_1', help='Name of output forward fasta/q file', metavar='out_1')
@@ -19,5 +20,6 @@ tasks.sequence_trim(
options.outfile_1,
options.outfile_2,
options.trim_seqs,
- min_length=options.min_length
+ min_length=options.min_length,
+ check_revcomp=options.revcomp
)
diff --git a/scripts/fastaq_to_quasr_primers_file b/scripts/fastaq_to_quasr_primers_file
deleted file mode 100755
index 8e5bf7c..0000000
--- a/scripts/fastaq_to_quasr_primers_file
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
- usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.to_quasr_primers(options.infile, options.outfile)
diff --git a/setup.py b/setup.py
index 3064862..5506ba9 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def read(fname):
setup(
name='Fastaq',
- version='1.5.0',
+ version='1.6.0',
description='Scripts to manipulate FASTA and FASTQ files, plus API for developers',
long_description=read('README.md'),
packages = find_packages(),
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq_tmp.git
More information about the debian-med-commit
mailing list