[med-svn] [fastaq] 01/02: many changes
Jorge Soares
jssoares-guest at moszumanska.debian.org
Tue Nov 18 17:41:10 UTC 2014
This is an automated email from the git hooks/post-receive script.
jssoares-guest pushed a commit to branch master
in repository fastaq.
commit 7f0118a2effe1e31dc7e990e2368aa9e83a60f98
Author: Jorge Soares <j.s.soares at gmail.com>
Date: Tue Nov 18 17:28:58 2014 +0000
many changes
---
.pc/.quilt_patches | 1 +
.pc/.quilt_series | 1 +
.pc/.version | 1 +
.pc/applied-patches | 1 +
.../.timestamp | 0
.../scripts/fastaq_capillary_to_pairs | 12 +
.../scripts/fastaq_chunker | 21 ++
.../scripts/fastaq_count_sequences | 11 +
.../scripts/fastaq_deinterleave | 14 +
.../scripts/fastaq_enumerate_names | 19 ++
.../scripts/fastaq_expand_nucleotides | 15 +
.../scripts/fastaq_extend_gaps | 13 +
.../scripts/fastaq_fasta_to_fastq | 13 +
.../scripts/fastaq_filter | 24 ++
.../scripts/fastaq_get_ids | 12 +
.../scripts/fastaq_get_seq_flanking_gaps | 14 +
.../scripts/fastaq_insert_or_delete_bases | 94 +++++++
.../scripts/fastaq_interleave | 13 +
.../scripts/fastaq_long_read_simulate | 50 ++++
.../scripts/fastaq_make_random_contigs | 25 ++
.../scripts/fastaq_merge | 18 ++
.../scripts/fastaq_replace_bases | 14 +
.../scripts/fastaq_reverse_complement | 12 +
.../scripts/fastaq_scaffolds_to_contigs | 13 +
.../scripts/fastaq_search_for_seq | 13 +
.../scripts/fastaq_sequence_trim | 25 ++
.../scripts/fastaq_split_by_base_count | 15 +
.../scripts/fastaq_strip_illumina_suffix | 12 +
.../scripts/fastaq_to_fake_qual | 18 ++
.../scripts/fastaq_to_fasta | 19 ++
.../scripts/fastaq_to_mira_xml | 12 +
.../scripts/fastaq_to_orfs_gff | 13 +
.../scripts/fastaq_to_perfect_reads | 86 ++++++
.../scripts/fastaq_to_random_subset | 36 +++
.../scripts/fastaq_to_tiling_bam | 79 ++++++
.../scripts/fastaq_to_unique_by_id | 12 +
.../scripts/fastaq_translate | 13 +
.../scripts/fastaq_trim_Ns_at_end | 12 +
.../scripts/fastaq_trim_ends | 14 +
debian/changelog | 6 +
debian/copyright | 2 +-
...ay-import-statements-for-manpage-creation.patch | 307 ++++++++++++---------
debian/usage_to_man | 47 +++-
43 files changed, 1014 insertions(+), 138 deletions(-)
diff --git a/.pc/.quilt_patches b/.pc/.quilt_patches
new file mode 100644
index 0000000..6857a8d
--- /dev/null
+++ b/.pc/.quilt_patches
@@ -0,0 +1 @@
+debian/patches
diff --git a/.pc/.quilt_series b/.pc/.quilt_series
new file mode 100644
index 0000000..c206706
--- /dev/null
+++ b/.pc/.quilt_series
@@ -0,0 +1 @@
+series
diff --git a/.pc/.version b/.pc/.version
new file mode 100644
index 0000000..0cfbf08
--- /dev/null
+++ b/.pc/.version
@@ -0,0 +1 @@
+2
diff --git a/.pc/applied-patches b/.pc/applied-patches
new file mode 100644
index 0000000..dfa3826
--- /dev/null
+++ b/.pc/applied-patches
@@ -0,0 +1 @@
+delay-import-statements-for-manpage-creation.patch
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/.timestamp b/.pc/delay-import-statements-for-manpage-creation.patch/.timestamp
new file mode 100644
index 0000000..e69de29
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_capillary_to_pairs b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_capillary_to_pairs
new file mode 100755
index 0000000..0d4a48f
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_capillary_to_pairs
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2',
+ usage = '%(prog)s <infile> <outfiles prefix>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix')
+options = parser.parse_args()
+tasks.capillary_to_pairs(options.infile, options.outprefix)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_chunker b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_chunker
new file mode 100755
index 0000000..d1aeb68
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_chunker
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
+ usage = '%(prog)s [options] <fasta/q in> <prefix of output files> <chunk size> <tolerance>')
+parser.add_argument('infile', help='Name of input fasta/q file to be split')
+parser.add_argument('outprefix', help='Name of output fasta/q file')
+parser.add_argument('chunk_size', type=int, help='Size of each chunk')
+parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
+parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
+options = parser.parse_args()
+tasks.split_by_fixed_size(
+ options.infile,
+ options.outprefix,
+ options.chunk_size,
+ options.tolerance,
+ skip_if_all_Ns=options.skip_all_Ns
+)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_count_sequences b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_count_sequences
new file mode 100755
index 0000000..fcb7911
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_count_sequences
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Counts the number of sequences in a fasta/q file',
+ usage = '%(prog)s <fasta/q in>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+options = parser.parse_args()
+print(tasks.count_sequences(options.infile))
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_deinterleave b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_deinterleave
new file mode 100755
index 0000000..a28c505
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_deinterleave
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files',
+ usage = '%(prog)s [options] <fasta/q in> <out_fwd> <out_rev>')
+parser.add_argument('--fasta_out', action='store_true', help='Use this to write output as fasta (default is same as input)', default=False)
+parser.add_argument('infile', help='Name of fasta/q file to be deinterleaved')
+parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads')
+parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads')
+options = parser.parse_args()
+tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_enumerate_names b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_enumerate_names
new file mode 100755
index 0000000..89831cb
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_enumerate_names
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Renames sequences in a file, calling them 1,2,3... etc',
+ usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
+parser.add_argument('--start_index', type=int, help='Starting number [%(default)s]', default=1)
+parser.add_argument('--rename_file', help='If used, will write a file of old name to new name')
+parser.add_argument('--keep_suffix', action='store_true', help='Use this to keep a /1 or /2 suffix at the end of each name')
+parser.add_argument('infile', help='Name of fasta/q file to be read')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.enumerate_names(options.infile,
+ options.outfile,
+ start_index=options.start_index,
+ keep_illumina_suffix=options.keep_suffix,
+ rename_file=options.rename_file)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_expand_nucleotides b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_expand_nucleotides
new file mode 100755
index 0000000..2dbde36
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_expand_nucleotides
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Makes all combinations of sequences in input file by using all possibilities of redundant bases. e.g. ART could be AAT or AGT. Assumes input is nucleotides, not amino acids',
+ usage = '%(prog)s <infile> <outfile>')
+parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
+parser.add_argument('outfile', help='Name of output file')
+options = parser.parse_args()
+tasks.expand_nucleotides(
+ options.infile,
+ options.outfile,
+)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_extend_gaps b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_extend_gaps
new file mode 100755
index 0000000..e8622c3
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_extend_gaps
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost',
+ usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
+parser.add_argument('--trim_number', type=int, help='Number of bases to trim around each gap, and off ends of each sequence [%(default)s]', default=100)
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.extend_gaps(options.infile, options.outfile, options.trim_number)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_fasta_to_fastq b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_fasta_to_fastq
new file mode 100755
index 0000000..18b6edb
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_fasta_to_fastq
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Given a fasta and qual file, makes a fastq file',
+ usage = '%(prog)s <fasta in> <qual in> <fastq out>')
+parser.add_argument('fasta', help='Name of input fasta file', metavar='fasta in')
+parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in')
+parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out')
+options = parser.parse_args()
+tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_filter b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_filter
new file mode 100755
index 0000000..cb260e6
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_filter
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression',
+ usage = '%(prog)s [options] <infile> <outfile>')
+parser.add_argument('--min_length', type=int, help='Minimum length of sequence to keep [%(default)s]', default=0, metavar='INT')
+parser.add_argument('--max_length', type=float, help='Maximum length of sequence to keep [%(default)s]', default=float('inf'), metavar='INT')
+parser.add_argument('--regex', help='If given, only reads with a name matching the regular expression will be kept')
+parser.add_argument('--ids_file', help='If given, only reads whose ID is in th given file will be used. One ID per line of file.')
+parser.add_argument('-v', '--invert', action='store_true', help='Keep sequences that do not match the filters')
+parser.add_argument('infile', help='Name of fasta/q file to be filtered')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.filter(options.infile,
+ options.outfile,
+ minlength=options.min_length,
+ maxlength=options.max_length,
+ regex=options.regex,
+ ids_file=options.ids_file,
+ invert=options.invert
+)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_get_ids b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_get_ids
new file mode 100755
index 0000000..59b9e0e
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_get_ids
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Gets IDs from each sequence in a fasta or fastq file',
+ usage = '%(prog)s <infile> <outfile>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output file')
+options = parser.parse_args()
+tasks.get_ids(options.infile, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_get_seq_flanking_gaps b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_get_seq_flanking_gaps
new file mode 100755
index 0000000..0c54154
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_get_seq_flanking_gaps
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Gets the sequences either side of gaps in a fasta/q file',
+ usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
+parser.add_argument('--left', type=int, help='Number of bases to get to left of gap [%(default)s]', default=25, metavar='INT')
+parser.add_argument('--right', type=int, help='Number of bases to get to right of gap [%(default)s]', default=25, metavar='INT')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_insert_or_delete_bases b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_insert_or_delete_bases
new file mode 100755
index 0000000..61e1e80
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_insert_or_delete_bases
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+import argparse
+import sys
+import random
+from fastaq import sequences, utils, intervals
+
+parser = argparse.ArgumentParser(
+ description = 'Deletes or inserts bases at given position(s) from a fasta/q file',
+ usage = '%(prog)s <fasta/q in> <outfile>')
+parser.add_argument('infile', help='Name of fasta/q file to be read')
+parser.add_argument('outfile', help='Name of output file')
+parser.add_argument('-d','--delete', action='append', help='Delete the given bases from the given sequence. Format same as samtools view: name:start-end. This option can be used multiple times (once for each region to delete). Overlapping coords will be merged before deleting', metavar='Name:start:bases')
+parser.add_argument('--delete_range', help='Deletes bases starting at position P in each sequence of the input file. Deletes start + (n-1)*step bases from sequence n.', metavar='P,start,step')
+parser.add_argument('-i','--insert', action='append', help='Insert a random string of bases at the given position. Format is name:position:number_to_add. Bases are added after the position. This option can be used multiple times', metavar='Name:start:bases')
+parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step')
+options = parser.parse_args()
+
+test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]]
+
+if sum(test_ops) != 1:
+ print('Must use one of --delete, --insert, --delete_range, --insert_range. Cannot continue', file=sys.stderr)
+ sys.exit(1)
+
+
+def range2dic(range_in):
+ if range_in is None:
+ return {}
+ (pos, start, step) = range_in.split(',')
+ d = {}
+ d['pos'] = int(pos) - 1
+ d['bases'] = int(start)
+ d['step'] = int(step)
+ return d
+
+delete_range = range2dic(options.delete_range)
+insert_range = range2dic(options.insert_range)
+
+
+# convert the -d regions into sequence name, start and end coords
+to_delete = {}
+if options.delete:
+ for s in options.delete:
+ id, coords = s.rsplit(':')
+ start, end = [int(x)-1 for x in coords.split('-')]
+ if id not in to_delete:
+ to_delete[id] = []
+ to_delete[id].append(intervals.Interval(start, end))
+
+
+to_insert = {}
+if options.insert:
+ for s in options.insert:
+ id, pos, bases = s.rsplit(':',2)
+ pos = int(pos) - 1
+ bases = int(bases)
+ if id not in to_insert:
+ to_insert[id] = []
+ to_insert[id].append((pos, bases))
+
+
+assert len(to_delete) * len(to_insert) == 0
+
+# merge overlapping regions to be deleted
+for l in to_delete.values():
+ intervals.merge_overlapping_in_list(l)
+
+# sort positions to be inserted
+for l in to_insert.values():
+ l.sort()
+
+# read in the fasta/q file and print outfile with deleted sequences
+seq_reader = sequences.file_reader(options.infile)
+f = utils.open_file_write(options.outfile)
+
+for seq in seq_reader:
+ if seq.id in to_delete:
+ # delete regions for this sequence, but start at the end so the
+ # coords don't get messed up after the first deletion
+ for inter in reversed(to_delete[seq.id]):
+ seq.seq = seq.seq[:inter.start] + seq.seq[inter.end + 1:]
+ elif options.delete_range:
+ seq.seq = seq.seq[:delete_range['pos']] + seq.seq[delete_range['pos'] + delete_range['bases']:]
+ delete_range['bases'] += delete_range['step']
+ elif seq.id in to_insert:
+ for pos, bases in reversed(to_insert[seq.id]):
+ seq.seq = seq.seq[:pos + 1] + ''.join([random.choice('ACGT') for x in range(bases)]) + seq.seq[pos + 1:]
+ elif options.insert_range:
+ seq.seq = seq.seq[:insert_range['pos'] + 1] + ''.join([random.choice('ACGT') for x in range(insert_range['bases'])]) + seq.seq[insert_range['pos'] + 1:]
+ insert_range['bases'] += insert_range['step']
+
+ print(seq, file=f)
+
+utils.close(f)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_interleave b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_interleave
new file mode 100755
index 0000000..4b39a3e
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_interleave
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file',
+ usage = '%(prog)s [options] <fasta/q 1> <fasta/q 2> <outfile>')
+parser.add_argument('infile_1', help='Name of first input fasta/q file')
+parser.add_argument('infile_2', help='Name of second input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads')
+options = parser.parse_args()
+tasks.interleave(options.infile_1, options.infile_2, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_long_read_simulate b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_long_read_simulate
new file mode 100755
index 0000000..23106f3
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_long_read_simulate
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Simulates long reads from a fasta/q file. Can optionally make insertions into the reads, like pacbio does. If insertions made, coverage calculation is done before the insertions (so total read length may appear longer then expected).',
+ usage = '%(prog)s [options] <infile> <outfile>')
+
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta file')
+
+parser.add_argument('--method', help='How to sample the read positions and lengths. Choose from 1) "tiling", where reads of fixed length are taken at equal intervals from the reference. 2) "unfiform", where reads of fixed length taken at positions sampled uniformly. 3) "gamma", where reads lengths are taken from a gamma distribution, and positions sampled uniformly. [%(default)s]', default='tiling', choices=['tiling', 'uniform', 'gamma'], metavar='tiling|uniform|gamma')
+parser.add_argument('--seed', type=int, help='Seed for random number generator [default: use python\'s default]', metavar='INT')
+parser.add_argument('--qual', help='Write a file of fake quality scores called outfile.qual, all bases same quality [%(default)s]', metavar='INT')
+parser.add_argument('--fixed_read_length', type=int, help='Length of each read. Only applies if method is tile or uniform. [%(default)s]', default=20000, metavar='INT')
+parser.add_argument('--coverage', type=float, help='Read coverage. Only applies if method is gamma or uniform. [%(default)s]', default=2, metavar='FLOAT')
+
+
+tiling_group = parser.add_argument_group('tiling options')
+tiling_group.add_argument('--tile_step', type=int, help='Distance between start of each read [%(default)s]', default=10000, metavar='INT')
+
+gamma_group = parser.add_argument_group('gamma options')
+gamma_group.add_argument('--gamma_shape', type=float, help='Shape parameter of gamma distribution [%(default)s]', default=1.2, metavar='FLOAT')
+gamma_group.add_argument('--gamma_scale', type=float, help='Scale parameter of gamma distribution [%(default)s]', default=6000, metavar='FLOAT')
+gamma_group.add_argument('--gamma_min_length', type=int, help='Minimum read length [%(default)s]', default=20000, metavar='INT')
+
+ins_group = parser.add_argument_group('options to add insertions to reads')
+ins_group.add_argument('--ins_skip', type=int, help='Insert a random base every --skip bases plus or minus --ins_window. If this option is used, must also use --ins_window.', metavar='INT')
+ins_group.add_argument('--ins_window', type=int, help='See --ins_skip. If this option is used, must also use --ins_skip.', metavar='INT')
+
+
+options = parser.parse_args()
+tasks.make_long_reads(
+ options.infile,
+ options.outfile,
+ method=options.method,
+ fixed_read_length=options.fixed_read_length,
+ coverage=options.coverage,
+ tile_step=options.tile_step,
+ gamma_shape=options.gamma_shape,
+ gamma_scale=options.gamma_scale,
+ gamma_min_length=options.gamma_min_length,
+ seed=options.seed,
+ ins_skip=options.ins_skip,
+ ins_window=options.ins_window
+)
+
+if options.qual:
+ tasks.fastaq_to_fake_qual(options.outfile, options.outfile + '.qual', q=options.qual)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_make_random_contigs b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_make_random_contigs
new file mode 100755
index 0000000..c6774fe
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_make_random_contigs
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T',
+ usage = '%(prog)s [options] <number of sequences> <length of each sequence> <fasta out>')
+parser.add_argument('--first_number', type=int, help='If numbering the sequences, the first sequence gets this number [%(default)s]', default=1)
+parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z')
+parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='')
+parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None)
+parser.add_argument('contigs', type=int, help='Nunber of contigs to make')
+parser.add_argument('length', type=int, help='Length of each contig')
+parser.add_argument('outfile', help='Name of output file')
+options = parser.parse_args()
+tasks.make_random_contigs(
+ options.contigs,
+ options.length,
+ options.outfile,
+ name_by_letters=options.name_by_letters,
+ prefix=options.prefix,
+ seed=options.seed,
+ first_number=options.first_number
+)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_merge b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_merge
new file mode 100755
index 0000000..d919323
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_merge
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Converts multi fasta/q file to single sequence file, preserving original order of sequences',
+ usage = '%(prog)s <infile> <outfile>')
+parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
+parser.add_argument('outfile', help='Name of output file')
+parser.add_argument('-n', '--name', help='Name of sequence in output file [%(default)s]', default='union')
+options = parser.parse_args()
+tasks.merge_to_one_seq(
+ options.infile,
+ options.outfile,
+ seqname=options.name
+)
+
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_replace_bases b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_replace_bases
new file mode 100755
index 0000000..6ce2fc0
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_replace_bases
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Replaces all occurences of one letter with another in a fasta/q file',
+ usage = '%(prog)s <fasta/q in> <outfile> <old> <new>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output file')
+parser.add_argument('old', help='Base to be replaced')
+parser.add_argument('new', help='Replace with this letter')
+options = parser.parse_args()
+tasks.replace_bases(options.infile, options.outfile, options.old, options.new)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_reverse_complement b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_reverse_complement
new file mode 100755
index 0000000..147e01f
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_reverse_complement
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Reverse complements all sequences in a fasta/q file',
+ usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.reverse_complement(options.infile, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_scaffolds_to_contigs b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_scaffolds_to_contigs
new file mode 100755
index 0000000..46d4861
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_scaffolds_to_contigs
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input',
+ usage = '%(prog)s [options] <infile> <outfile>')
+parser.add_argument('--number_contigs', action='store_true', help='Use this to enumerate contig names 1,2,3,... within each scaffold')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output contigs file')
+options = parser.parse_args()
+tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_search_for_seq b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_search_for_seq
new file mode 100755
index 0000000..c00ed7a
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_search_for_seq
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits',
+ usage = '%(prog)s [options] <fasta/q in> <outfile> <search_string>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand')
+parser.add_argument('search_string', help='String to search for in the sequences')
+options = parser.parse_args()
+tasks.search_for_seq(options.infile, options.outfile, options.search_string)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_sequence_trim b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_sequence_trim
new file mode 100755
index 0000000..7021c6c
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_sequence_trim
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
+ usage = '%(prog)s [options] <fasta/q 1 in> <fastaq/2 in> <out 1> <out 2> <trim_seqs>')
+parser.add_argument('--min_length', type=int, help='Minimum length of output sequences [%(default)s]', default=50, metavar='INT')
+parser.add_argument('--revcomp', action='store_true', help='Trim the end of each sequence if it matches the reverse complement. This option is intended for PCR primer trimming')
+parser.add_argument('infile_1', help='Name of forward fasta/q file to be trimmed', metavar='fasta/q 1 in')
+parser.add_argument('infile_2', help='Name of reverse fasta/q file to be trimmed', metavar='fasta/q 2 in')
+parser.add_argument('outfile_1', help='Name of output forward fasta/q file', metavar='out_1')
+parser.add_argument('outfile_2', help='Name of output reverse fasta/q file', metavar='out_2')
+parser.add_argument('trim_seqs', help='Name of fasta/q file of sequences to search for at the start of each input sequence', metavar='trim_seqs')
+options = parser.parse_args()
+tasks.sequence_trim(
+ options.infile_1,
+ options.infile_2,
+ options.outfile_1,
+ options.outfile_2,
+ options.trim_seqs,
+ min_length=options.min_length,
+ check_revcomp=options.revcomp
+)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_split_by_base_count b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_split_by_base_count
new file mode 100755
index 0000000..dd7b43d
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_split_by_base_count
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.',
+ usage = '%(prog)s [options] <fasta/q in> <prefix of output files> <max_bases>')
+parser.add_argument('infile', help='Name of input fasta/q file to be split')
+parser.add_argument('outprefix', help='Name of output fasta/q file')
+parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases')
+parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT')
+
+options = parser.parse_args()
+tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_strip_illumina_suffix b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_strip_illumina_suffix
new file mode 100755
index 0000000..6a29a42
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_strip_illumina_suffix
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Strips /1 or /2 off the end of every read name in a fasta/q file',
+ usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.strip_illumina_suffix(options.infile, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_fake_qual b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_fake_qual
new file mode 100755
index 0000000..272f7a3
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_fake_qual
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Makes fake quality scores file from a fasta/q file',
+ usage = '%(prog)s <infile> <outfile>')
+parser.add_argument('infile', help='Name of input file')
+parser.add_argument('outfile', help='Name of output file')
+parser.add_argument('-q', '--qual', type=int, help='Quality score to assign to all bases [%(default)s]', default=40)
+options = parser.parse_args()
+tasks.fastaq_to_fake_qual(
+ options.infile,
+ options.outfile,
+ q=options.qual
+)
+
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_fasta b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_fasta
new file mode 100755
index 0000000..742e95f
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_fasta
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Converts sequence file to FASTA format',
+ usage = '%(prog)s <infile> <outfile>')
+parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
+parser.add_argument('outfile', help='Name of output file')
+parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60)
+parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name')
+options = parser.parse_args()
+tasks.to_fasta(
+ options.infile,
+ options.outfile,
+ line_length=options.line_length,
+ strip_after_first_whitespace=options.strip_after_whitespace
+)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_mira_xml b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_mira_xml
new file mode 100755
index 0000000..582d669
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_mira_xml
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler',
+ usage = '%(prog)s [options] <fastq_in> <xml_out>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('xml_out', help='Name of output xml file')
+options = parser.parse_args()
+tasks.fastaq_to_mira_xml(options.infile, options.xml_out)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_orfs_gff b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_orfs_gff
new file mode 100755
index 0000000..0098023
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_orfs_gff
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Writes a GFF file of open reading frames from a fasta/q file',
+ usage = '%(prog)s [options] <fasta/q in> <gff_out>')
+parser.add_argument('--min_length', type=int, help='Minimum length of ORF, in nucleotides [%(default)s]', default=300, metavar='INT')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('gff_out', help='Name of output gff file')
+options = parser.parse_args()
+tasks.fastaq_to_orfs_gff(options.infile, options.gff_out, min_length=options.min_length)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_perfect_reads b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_perfect_reads
new file mode 100755
index 0000000..6f3ca10
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_perfect_reads
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import argparse
+import random
+from math import floor, ceil
+from fastaq import sequences, utils
+import sys
+
+parser = argparse.ArgumentParser(
+ description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.',
+ usage = '%(prog)s <fasta/q in> <out.fastq> <mean insert size> <insert std deviation> <mean coverage> <read length>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fastq file')
+parser.add_argument('mean_insert', type=int, help='Mean insert size of read pairs', metavar='mean insert size')
+parser.add_argument('insert_std', type=float, help='Standard devation of insert size', metavar='insert std deviation')
+parser.add_argument('coverage', type=float, help='Mean coverage of the reads', metavar='mean coverage')
+parser.add_argument('readlength', type=int, help='Length of each read', metavar='read length')
+parser.add_argument('--fragments', help='Write FASTA sequences of fragments (i.e. read pairs plus sequences in between them) to the given filename', metavar='FILENAME')
+parser.add_argument('--no_n', action='store_true', help='Don\'t allow any N or n characters in the reads')
+parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT')
+options = parser.parse_args()
+
+random.seed(a=options.seed)
+
+seq_reader = sequences.file_reader(options.infile)
+fout = utils.open_file_write(options.outfile)
+pair_counter = 1
+
+if options.fragments:
+ fout_frags = utils.open_file_write(options.fragments)
+
+for ref in seq_reader:
+ # check if current seq is long enough
+ if len(ref) < options.mean_insert + 4 * options.insert_std:
+ print('Warning, sequence ', ref.id, ' too short. Skipping it...', file=sys.stderr)
+ continue
+
+ # work out how many reads to simulate
+ read_pairs = int(0.5 * options.coverage * len(ref) / options.readlength)
+
+ # it's possible that we pick the same fragment twice, in which case the
+ # reads would get the same name. So remember the frag coords
+ used_fragments = {} # (middle_position, length) => count
+
+ # do the simulation: pick insert size from normal distribution, and
+ # position in genome from uniform distribution
+ x = 0
+ while x < read_pairs:
+ isize = int(random.normalvariate(options.mean_insert, options.insert_std))
+ while isize > len(ref) or isize < options.readlength:
+ isize = int(random.normalvariate(options.mean_insert, options.insert_std))
+ middle_pos = random.randint(ceil(0.5 *isize), floor(len(ref) - 0.5 * isize))
+ read_start1 = int(middle_pos - ceil(0.5 * isize))
+ read_start2 = read_start1 + isize - options.readlength
+
+ readname = ':'.join([ref.id, str(pair_counter), str(read_start1+1), str(read_start2+1)])
+
+ fragment = (middle_pos, isize)
+ if fragment in used_fragments:
+ used_fragments[fragment] += 1
+ readname += '.dup.' + str(used_fragments[fragment])
+ else:
+ used_fragments[fragment] = 1
+
+ read1 = sequences.Fastq(readname + '/1', ref.seq[read_start1:read_start1 + options.readlength], 'I' * options.readlength)
+ read2 = sequences.Fastq(readname + '/2', ref.seq[read_start2:read_start2 + options.readlength], 'I' * options.readlength)
+
+
+ if options.no_n and ('n' in read1.seq or 'N' in read1.seq or 'n' in read2.seq or 'N' in read2.seq):
+ continue
+
+ read2.revcomp()
+
+ print(read1, file=fout)
+ print(read2, file=fout)
+
+ if options.fragments:
+ frag = sequences.Fasta(readname, ref.seq[read_start1:read_start2 + options.readlength])
+ print(frag, file=fout_frags)
+
+ pair_counter += 1
+ x += 1
+
+utils.close(fout)
+if options.fragments:
+ utils.close(fout_frags)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_random_subset b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_random_subset
new file mode 100755
index 0000000..b4f11c5
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_random_subset
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+import sys
+import argparse
+import random
+from fastaq import sequences, utils
+
+parser = argparse.ArgumentParser(
+ description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
+ 'from a mates file. Ouptut is interleaved if mates file given',
+ usage = '%(prog)s [options] <fasta/q in> <outfile> <probablilty of keeping read (pair) in [0,100]>')
+parser.add_argument('--mate_file', help='Name of fasta/q mates file')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of fasta/q output file')
+parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT')
+options = parser.parse_args()
+
+seq_reader = sequences.file_reader(options.infile)
+fout = utils.open_file_write(options.outfile)
+
+if options.mate_file:
+ mate_seq_reader = sequences.file_reader(options.mate_file)
+
+for seq in seq_reader:
+ if options.mate_file:
+ try:
+ mate_seq = next(mate_seq_reader)
+ except StopIteration:
+ print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
+ sys.exit(1)
+ if random.randint(0, 100) <= options.probability:
+ print(seq, file=fout)
+ if options.mate_file:
+ print(mate_seq, file=fout)
+
+utils.close(fout)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_tiling_bam b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_tiling_bam
new file mode 100755
index 0000000..9b9738d
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_tiling_bam
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+import argparse
+import sys
+import os
+from fastaq import sequences, utils
+
+parser = argparse.ArgumentParser(
+ description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome',
+ usage = '%(prog)s [options] <fasta/q in> <read length> <read step> <read prefix> <out.bam>',
+ epilog = 'Important: assumes that samtools is in your path')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('read_length', type=int, help='Length of reads')
+parser.add_argument('read_step', type=int, help='Distance between start of each read')
+parser.add_argument('read_prefix', help='Prefix of read names')
+parser.add_argument('outfile', help='Name of output BAM file')
+parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42')
+options = parser.parse_args()
+
+# make a header first - we need to add the @RG line to the default header made by samtools
+tmp_empty_file = options.outfile + '.tmp.empty'
+f = utils.open_file_write(tmp_empty_file)
+utils.close(f)
+try:
+ f = os.popen('samtools view -H -T ' + options.infile + ' ' + tmp_empty_file)
+except IOError:
+ print('Error making tmp header file', file=sys.stderr)
+ sys.exit(1)
+
+header_lines = f.readlines()
+header_lines.append('@RG\tID:' + options.read_group + '\tSM:FAKE')
+f.close()
+os.unlink(tmp_empty_file)
+
+seq_reader = sequences.file_reader(options.infile)
+try:
+ f = os.popen('samtools view -hbS - > ' + options.outfile, 'w')
+except IOError:
+ print("Error opening for writing BAM file '" + options.outfile + "'", file=sys.stderr)
+ sys.exit(1)
+
+print(''.join(header_lines), file=f)
+
+for seq in seq_reader:
+ end_range = len(seq)
+ if len(seq) < options.read_length:
+ end_range = 1
+ for i in range(0, end_range, options.read_step):
+ if len(seq) <= options.read_length:
+ start = 0
+ end = len(seq) - 1
+ else:
+ start = i
+ end = start + options.read_length - 1
+
+ if end > len(seq) - 1:
+ end = len(seq) - 1
+ start = end - options.read_length + 1
+
+ read = sequences.Fastq(options.read_prefix + ':' + seq.id + ':' + str(start + 1) + ':' + str(end + 1), seq[start:end+1], 'I' * (end - start + 1))
+
+ print ('\t'.join([read.id,
+ '0',
+ seq.id,
+ str(start + 1),
+ '60',
+ str(len(read)) + 'M',
+ '*',
+ '*',
+ '*',
+ read.seq,
+ read.qual,
+ 'RG:Z:' + options.read_group]), file=f)
+
+ if end == len(seq) - 1:
+ break
+
+f.close()
+
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_unique_by_id b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_unique_by_id
new file mode 100755
index 0000000..e743a92
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_to_unique_by_id
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output',
+ usage = '%(prog)s <infile> <outfile>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.to_unique_by_id(options.infile, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_translate b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_translate
new file mode 100755
index 0000000..9ec04c7
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_translate
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format',
+ usage = '%(prog)s <in.fasta/q> <out.fasta>')
+parser.add_argument('--frame', type=int, choices=[0,1,2], help='Frame to translate [%(default)s]', default=0)
+parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q')
+parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta')
+options = parser.parse_args()
+tasks.translate(options.infile, options.outfile, frame=options.frame)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_trim_Ns_at_end b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_trim_Ns_at_end
new file mode 100755
index 0000000..200d71f
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_trim_Ns_at_end
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends',
+ usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.trim_Ns_at_end(options.infile, options.outfile)
diff --git a/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_trim_ends b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_trim_ends
new file mode 100755
index 0000000..ffc662d
--- /dev/null
+++ b/.pc/delay-import-statements-for-manpage-creation.patch/scripts/fastaq_trim_ends
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import argparse
+from fastaq import tasks
+
+parser = argparse.ArgumentParser(
+ description = 'Trims set number of bases off each sequence in a fasta/q file',
+ usage = '%(prog)s [options] <fasta/q in> <bases off start> <bases off end> <fasta/q out>')
+parser.add_argument('infile', help='Name of input fasta/q file')
+parser.add_argument('start_trim', type=int, help='Number of bases to trim off start')
+parser.add_argument('end_trim', type=int, help='Number of bases to trim off end')
+parser.add_argument('outfile', help='Name of output fasta/q file')
+options = parser.parse_args()
+tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim)
diff --git a/debian/changelog b/debian/changelog
index 3e78fa5..fe41c01 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+fastaq (1.6.0-1) UNRELEASED; urgency=medium
+
+ * New upstream release
+
+ -- Jorge Soares <j.s.soares at gmail.com> Tue, 18 Nov 2014 16:34:01 +0000
+
fastaq (1.5.0-1) unstable; urgency=medium
* Initial release (Closes: #766321)
diff --git a/debian/copyright b/debian/copyright
index 7b2546c..ca8eea8 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -4,7 +4,7 @@ Source: https://github.com/sanger-pathogens/Fastaq
Files: *
Copyright: © 2012-2013 Martin Hunt <mh12 at sanger.ac.uk>
-License: GPL-3+
+License: GPL-3
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
diff --git a/debian/patches/delay-import-statements-for-manpage-creation.patch b/debian/patches/delay-import-statements-for-manpage-creation.patch
index b3120ce..48dac81 100644
--- a/debian/patches/delay-import-statements-for-manpage-creation.patch
+++ b/debian/patches/delay-import-statements-for-manpage-creation.patch
@@ -15,10 +15,12 @@ Description: Delay import of Fastaq modules by the python executables
.
fastaq (1.5.0-1) UNRELEASED; urgency=low
.
- * Initial release (Closes: #1234)
-Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
---- a/scripts/fastaq_capillary_to_pairs
-+++ b/scripts/fastaq_capillary_to_pairs
+ * Initial release (Closes: #766321)
+Author: Jorge Soares <j.s.soares at gmail.com>
+Index: fastaq/scripts/fastaq_capillary_to_pairs
+===================================================================
+--- fastaq.orig/scripts/fastaq_capillary_to_pairs
++++ fastaq/scripts/fastaq_capillary_to_pairs
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -27,7 +29,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix')
options = parser.parse_args()
@@ -36,8 +38,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.capillary_to_pairs(options.infile, options.outprefix)
---- a/scripts/fastaq_chunker
-+++ b/scripts/fastaq_chunker
+Index: fastaq/scripts/fastaq_chunker
+===================================================================
+--- fastaq.orig/scripts/fastaq_chunker
++++ fastaq/scripts/fastaq_chunker
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -46,7 +50,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
-@@ -12,6 +11,10 @@
+@@ -12,6 +11,10 @@ parser.add_argument('chunk_size', type=i
parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
options = parser.parse_args()
@@ -57,8 +61,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.split_by_fixed_size(
options.infile,
options.outprefix,
---- a/scripts/fastaq_count_sequences
-+++ b/scripts/fastaq_count_sequences
+Index: fastaq/scripts/fastaq_count_sequences
+===================================================================
+--- fastaq.orig/scripts/fastaq_count_sequences
++++ fastaq/scripts/fastaq_count_sequences
@@ -1,11 +1,14 @@
#!/usr/bin/env python3
@@ -75,8 +81,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
print(tasks.count_sequences(options.infile))
---- a/scripts/fastaq_deinterleave
-+++ b/scripts/fastaq_deinterleave
+Index: fastaq/scripts/fastaq_deinterleave
+===================================================================
+--- fastaq.orig/scripts/fastaq_deinterleave
++++ fastaq/scripts/fastaq_deinterleave
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -85,7 +93,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files',
-@@ -11,4 +10,8 @@
+@@ -11,4 +10,8 @@ parser.add_argument('infile', help='Name
parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads')
parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads')
options = parser.parse_args()
@@ -94,8 +102,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out)
---- a/scripts/fastaq_enumerate_names
-+++ b/scripts/fastaq_enumerate_names
+Index: fastaq/scripts/fastaq_enumerate_names
+===================================================================
+--- fastaq.orig/scripts/fastaq_enumerate_names
++++ fastaq/scripts/fastaq_enumerate_names
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -104,7 +114,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Renames sequences in a file, calling them 1,2,3... etc',
-@@ -12,6 +11,10 @@
+@@ -12,6 +11,10 @@ parser.add_argument('--keep_suffix', act
parser.add_argument('infile', help='Name of fasta/q file to be read')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -115,8 +125,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.enumerate_names(options.infile,
options.outfile,
start_index=options.start_index,
---- a/scripts/fastaq_expand_nucleotides
-+++ b/scripts/fastaq_expand_nucleotides
+Index: fastaq/scripts/fastaq_expand_nucleotides
+===================================================================
+--- fastaq.orig/scripts/fastaq_expand_nucleotides
++++ fastaq/scripts/fastaq_expand_nucleotides
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -125,7 +137,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Makes all combinations of sequences in input file by using all possibilities of redundant bases. e.g. ART could be AAT or AGT. Assumes input is nucleotides, not amino acids',
-@@ -9,6 +8,10 @@
+@@ -9,6 +8,10 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
parser.add_argument('outfile', help='Name of output file')
options = parser.parse_args()
@@ -136,8 +148,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.expand_nucleotides(
options.infile,
options.outfile,
---- a/scripts/fastaq_extend_gaps
-+++ b/scripts/fastaq_extend_gaps
+Index: fastaq/scripts/fastaq_extend_gaps
+===================================================================
+--- fastaq.orig/scripts/fastaq_extend_gaps
++++ fastaq/scripts/fastaq_extend_gaps
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -146,7 +160,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,8 @@ parser.add_argument('--trim_number', typ
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -155,8 +169,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.extend_gaps(options.infile, options.outfile, options.trim_number)
---- a/scripts/fastaq_fasta_to_fastq
-+++ b/scripts/fastaq_fasta_to_fastq
+Index: fastaq/scripts/fastaq_fasta_to_fastq
+===================================================================
+--- fastaq.orig/scripts/fastaq_fasta_to_fastq
++++ fastaq/scripts/fastaq_fasta_to_fastq
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -165,7 +181,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Given a fasta and qual file, makes a fastq file',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,8 @@ parser.add_argument('fasta', help='Name
parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in')
parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out')
options = parser.parse_args()
@@ -174,8 +190,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile)
---- a/scripts/fastaq_filter
-+++ b/scripts/fastaq_filter
+Index: fastaq/scripts/fastaq_filter
+===================================================================
+--- fastaq.orig/scripts/fastaq_filter
++++ fastaq/scripts/fastaq_filter
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -184,7 +202,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression',
-@@ -14,6 +13,10 @@
+@@ -14,6 +13,10 @@ parser.add_argument('-v', '--invert', ac
parser.add_argument('infile', help='Name of fasta/q file to be filtered')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -195,8 +213,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.filter(options.infile,
options.outfile,
minlength=options.min_length,
---- a/scripts/fastaq_get_ids
-+++ b/scripts/fastaq_get_ids
+Index: fastaq/scripts/fastaq_get_ids
+===================================================================
+--- fastaq.orig/scripts/fastaq_get_ids
++++ fastaq/scripts/fastaq_get_ids
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -205,7 +225,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Gets IDs from each sequence in a fasta or fastq file',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output file')
options = parser.parse_args()
@@ -214,8 +234,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.get_ids(options.infile, options.outfile)
---- a/scripts/fastaq_get_seq_flanking_gaps
-+++ b/scripts/fastaq_get_seq_flanking_gaps
+Index: fastaq/scripts/fastaq_get_seq_flanking_gaps
+===================================================================
+--- fastaq.orig/scripts/fastaq_get_seq_flanking_gaps
++++ fastaq/scripts/fastaq_get_seq_flanking_gaps
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -224,7 +246,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Gets the sequences either side of gaps in a fasta/q file',
-@@ -11,4 +10,8 @@
+@@ -11,4 +10,8 @@ parser.add_argument('--right', type=int,
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -233,8 +255,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right)
---- a/scripts/fastaq_insert_or_delete_bases
-+++ b/scripts/fastaq_insert_or_delete_bases
+Index: fastaq/scripts/fastaq_insert_or_delete_bases
+===================================================================
+--- fastaq.orig/scripts/fastaq_insert_or_delete_bases
++++ fastaq/scripts/fastaq_insert_or_delete_bases
@@ -1,9 +1,6 @@
#!/usr/bin/env python3
@@ -245,7 +269,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Deletes or inserts bases at given position(s) from a fasta/q file',
-@@ -16,6 +13,11 @@
+@@ -16,6 +13,11 @@ parser.add_argument('-i','--insert', act
parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step')
options = parser.parse_args()
@@ -257,8 +281,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]]
if sum(test_ops) != 1:
---- a/scripts/fastaq_interleave
-+++ b/scripts/fastaq_interleave
+Index: fastaq/scripts/fastaq_interleave
+===================================================================
+--- fastaq.orig/scripts/fastaq_interleave
++++ fastaq/scripts/fastaq_interleave
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -267,7 +293,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,8 @@ parser.add_argument('infile_1', help='Na
parser.add_argument('infile_2', help='Name of second input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads')
options = parser.parse_args()
@@ -276,8 +302,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.interleave(options.infile_1, options.infile_2, options.outfile)
---- a/scripts/fastaq_long_read_simulate
-+++ b/scripts/fastaq_long_read_simulate
+Index: fastaq/scripts/fastaq_long_read_simulate
+===================================================================
+--- fastaq.orig/scripts/fastaq_long_read_simulate
++++ fastaq/scripts/fastaq_long_read_simulate
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -286,15 +314,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Simulates long reads from a fasta/q file. Can optionally make insertions into the reads, like pacbio does. If insertions made, coverage calculation is done before the insertions (so total read length may appear longer then expected).',
-@@ -16,7 +15,6 @@
- parser.add_argument('--fixed_read_length', type=int, help='Length of each read. Only applies if method is tile or uniform. [%(default)s]', default=20000, metavar='INT')
- parser.add_argument('--coverage', type=float, help='Read coverage. Only applies if method is gamma or uniform. [%(default)s]', default=2, metavar='FLOAT')
-
--
- tiling_group = parser.add_argument_group('tiling options')
- tiling_group.add_argument('--tile_step', type=int, help='Distance between start of each read [%(default)s]', default=10000, metavar='INT')
-
-@@ -29,8 +27,11 @@
+@@ -29,8 +28,11 @@ ins_group = parser.add_argument_group('o
ins_group.add_argument('--ins_skip', type=int, help='Insert a random base every --skip bases plus or minus --ins_window. If this option is used, must also use --ins_window.', metavar='INT')
ins_group.add_argument('--ins_window', type=int, help='See --ins_skip. If this option is used, must also use --ins_skip.', metavar='INT')
@@ -307,8 +327,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.make_long_reads(
options.infile,
options.outfile,
---- a/scripts/fastaq_make_random_contigs
-+++ b/scripts/fastaq_make_random_contigs
+Index: fastaq/scripts/fastaq_make_random_contigs
+===================================================================
+--- fastaq.orig/scripts/fastaq_make_random_contigs
++++ fastaq/scripts/fastaq_make_random_contigs
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -317,7 +339,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T',
-@@ -14,6 +13,10 @@
+@@ -14,6 +13,10 @@ parser.add_argument('contigs', type=int,
parser.add_argument('length', type=int, help='Length of each contig')
parser.add_argument('outfile', help='Name of output file')
options = parser.parse_args()
@@ -328,8 +350,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.make_random_contigs(
options.contigs,
options.length,
---- a/scripts/fastaq_merge
-+++ b/scripts/fastaq_merge
+Index: fastaq/scripts/fastaq_merge
+===================================================================
+--- fastaq.orig/scripts/fastaq_merge
++++ fastaq/scripts/fastaq_merge
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -338,7 +362,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Converts multi fasta/q file to single sequence file, preserving original order of sequences',
-@@ -10,6 +9,10 @@
+@@ -10,6 +9,10 @@ parser.add_argument('infile', help='Name
parser.add_argument('outfile', help='Name of output file')
parser.add_argument('-n', '--name', help='Name of sequence in output file [%(default)s]', default='union')
options = parser.parse_args()
@@ -349,8 +373,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.merge_to_one_seq(
options.infile,
options.outfile,
---- a/scripts/fastaq_replace_bases
-+++ b/scripts/fastaq_replace_bases
+Index: fastaq/scripts/fastaq_replace_bases
+===================================================================
+--- fastaq.orig/scripts/fastaq_replace_bases
++++ fastaq/scripts/fastaq_replace_bases
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -359,7 +385,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Replaces all occurences of one letter with another in a fasta/q file',
-@@ -11,4 +10,8 @@
+@@ -11,4 +10,8 @@ parser.add_argument('outfile', help='Nam
parser.add_argument('old', help='Base to be replaced')
parser.add_argument('new', help='Replace with this letter')
options = parser.parse_args()
@@ -368,8 +394,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.replace_bases(options.infile, options.outfile, options.old, options.new)
---- a/scripts/fastaq_reverse_complement
-+++ b/scripts/fastaq_reverse_complement
+Index: fastaq/scripts/fastaq_reverse_complement
+===================================================================
+--- fastaq.orig/scripts/fastaq_reverse_complement
++++ fastaq/scripts/fastaq_reverse_complement
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -378,7 +406,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Reverse complements all sequences in a fasta/q file',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -387,8 +415,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.reverse_complement(options.infile, options.outfile)
---- a/scripts/fastaq_scaffolds_to_contigs
-+++ b/scripts/fastaq_scaffolds_to_contigs
+Index: fastaq/scripts/fastaq_scaffolds_to_contigs
+===================================================================
+--- fastaq.orig/scripts/fastaq_scaffolds_to_contigs
++++ fastaq/scripts/fastaq_scaffolds_to_contigs
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -397,17 +427,18 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,7 @@ parser.add_argument('--number_contigs',
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output contigs file')
options = parser.parse_args()
+
-+
+from fastaq import tasks
+
tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs)
---- a/scripts/fastaq_search_for_seq
-+++ b/scripts/fastaq_search_for_seq
+Index: fastaq/scripts/fastaq_search_for_seq
+===================================================================
+--- fastaq.orig/scripts/fastaq_search_for_seq
++++ fastaq/scripts/fastaq_search_for_seq
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -416,17 +447,18 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,7 @@ parser.add_argument('infile', help='Name
parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand')
parser.add_argument('search_string', help='String to search for in the sequences')
options = parser.parse_args()
+
-+
+from fastaq import tasks
+
tasks.search_for_seq(options.infile, options.outfile, options.search_string)
---- a/scripts/fastaq_sequence_trim
-+++ b/scripts/fastaq_sequence_trim
+Index: fastaq/scripts/fastaq_sequence_trim
+===================================================================
+--- fastaq.orig/scripts/fastaq_sequence_trim
++++ fastaq/scripts/fastaq_sequence_trim
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -435,7 +467,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
-@@ -13,6 +12,10 @@
+@@ -14,6 +13,10 @@ parser.add_argument('outfile_1', help='N
parser.add_argument('outfile_2', help='Name of output reverse fasta/q file', metavar='out_2')
parser.add_argument('trim_seqs', help='Name of fasta/q file of sequences to search for at the start of each input sequence', metavar='trim_seqs')
options = parser.parse_args()
@@ -446,8 +478,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.sequence_trim(
options.infile_1,
options.infile_2,
---- a/scripts/fastaq_split_by_base_count
-+++ b/scripts/fastaq_split_by_base_count
+Index: fastaq/scripts/fastaq_split_by_base_count
+===================================================================
+--- fastaq.orig/scripts/fastaq_split_by_base_count
++++ fastaq/scripts/fastaq_split_by_base_count
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -456,19 +490,19 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.',
-@@ -10,6 +9,9 @@
- parser.add_argument('outprefix', help='Name of output fasta/q file')
- parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases')
+@@ -12,4 +11,8 @@ parser.add_argument('max_bases', type=in
parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT')
--
+
options = parser.parse_args()
+
+
+from fastaq import tasks
+
tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs)
---- a/scripts/fastaq_strip_illumina_suffix
-+++ b/scripts/fastaq_strip_illumina_suffix
+Index: fastaq/scripts/fastaq_strip_illumina_suffix
+===================================================================
+--- fastaq.orig/scripts/fastaq_strip_illumina_suffix
++++ fastaq/scripts/fastaq_strip_illumina_suffix
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -477,7 +511,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Strips /1 or /2 off the end of every read name in a fasta/q file',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -486,8 +520,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.strip_illumina_suffix(options.infile, options.outfile)
---- a/scripts/fastaq_to_fake_qual
-+++ b/scripts/fastaq_to_fake_qual
+Index: fastaq/scripts/fastaq_to_fake_qual
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_fake_qual
++++ fastaq/scripts/fastaq_to_fake_qual
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -496,7 +532,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Makes fake quality scores file from a fasta/q file',
-@@ -10,6 +9,10 @@
+@@ -10,6 +9,10 @@ parser.add_argument('infile', help='Name
parser.add_argument('outfile', help='Name of output file')
parser.add_argument('-q', '--qual', type=int, help='Quality score to assign to all bases [%(default)s]', default=40)
options = parser.parse_args()
@@ -507,8 +543,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.fastaq_to_fake_qual(
options.infile,
options.outfile,
---- a/scripts/fastaq_to_fasta
-+++ b/scripts/fastaq_to_fasta
+Index: fastaq/scripts/fastaq_to_fasta
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_fasta
++++ fastaq/scripts/fastaq_to_fasta
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -517,7 +555,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Converts sequence file to FASTA format',
-@@ -11,6 +10,10 @@
+@@ -11,6 +10,10 @@ parser.add_argument('outfile', help='Nam
parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60)
parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name')
options = parser.parse_args()
@@ -528,8 +566,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
tasks.to_fasta(
options.infile,
options.outfile,
---- a/scripts/fastaq_to_mira_xml
-+++ b/scripts/fastaq_to_mira_xml
+Index: fastaq/scripts/fastaq_to_mira_xml
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_mira_xml
++++ fastaq/scripts/fastaq_to_mira_xml
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -538,7 +578,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('xml_out', help='Name of output xml file')
options = parser.parse_args()
@@ -547,8 +587,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.fastaq_to_mira_xml(options.infile, options.xml_out)
---- a/scripts/fastaq_to_orfs_gff
-+++ b/scripts/fastaq_to_orfs_gff
+Index: fastaq/scripts/fastaq_to_orfs_gff
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_orfs_gff
++++ fastaq/scripts/fastaq_to_orfs_gff
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -557,7 +599,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Writes a GFF file of open reading frames from a fasta/q file',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,8 @@ parser.add_argument('--min_length', type
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('gff_out', help='Name of output gff file')
options = parser.parse_args()
@@ -566,8 +608,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.fastaq_to_orfs_gff(options.infile, options.gff_out, min_length=options.min_length)
---- a/scripts/fastaq_to_perfect_reads
-+++ b/scripts/fastaq_to_perfect_reads
+Index: fastaq/scripts/fastaq_to_perfect_reads
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_perfect_reads
++++ fastaq/scripts/fastaq_to_perfect_reads
@@ -1,10 +1,6 @@
#!/usr/bin/env python3
@@ -579,7 +623,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.',
-@@ -20,6 +16,12 @@
+@@ -20,6 +16,12 @@ parser.add_argument('--no_n', action='st
parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT')
options = parser.parse_args()
@@ -592,27 +636,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
random.seed(a=options.seed)
seq_reader = sequences.file_reader(options.infile)
---- a/scripts/fastaq_to_quasr_primers_file
-+++ b/scripts/fastaq_to_quasr_primers_file
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python3
-
- import argparse
--from fastaq import tasks
-
- parser = argparse.ArgumentParser(
- description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
-@@ -9,4 +8,8 @@
- parser.add_argument('infile', help='Name of input fasta/q file')
- parser.add_argument('outfile', help='Name of output file')
- options = parser.parse_args()
-+
-+
-+from fastaq import tasks
-+
- tasks.to_quasr_primers(options.infile, options.outfile)
---- a/scripts/fastaq_to_random_subset
-+++ b/scripts/fastaq_to_random_subset
+Index: fastaq/scripts/fastaq_to_random_subset
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_random_subset
++++ fastaq/scripts/fastaq_to_random_subset
@@ -1,9 +1,6 @@
#!/usr/bin/env python3
@@ -623,7 +650,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
-@@ -15,6 +12,11 @@
+@@ -15,6 +12,11 @@ parser.add_argument('outfile', help='Nam
parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT')
options = parser.parse_args()
@@ -635,8 +662,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
seq_reader = sequences.file_reader(options.infile)
fout = utils.open_file_write(options.outfile)
---- a/scripts/fastaq_to_tiling_bam
-+++ b/scripts/fastaq_to_tiling_bam
+Index: fastaq/scripts/fastaq_to_tiling_bam
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_tiling_bam
++++ fastaq/scripts/fastaq_to_tiling_bam
@@ -1,9 +1,6 @@
#!/usr/bin/env python3
@@ -647,7 +676,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome',
-@@ -17,6 +14,11 @@
+@@ -17,6 +14,11 @@ parser.add_argument('outfile', help='Nam
parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42')
options = parser.parse_args()
@@ -659,8 +688,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
# make a header first - we need to add the @RG line to the default header made by samtools
tmp_empty_file = options.outfile + '.tmp.empty'
f = utils.open_file_write(tmp_empty_file)
---- a/scripts/fastaq_to_unique_by_id
-+++ b/scripts/fastaq_to_unique_by_id
+Index: fastaq/scripts/fastaq_to_unique_by_id
+===================================================================
+--- fastaq.orig/scripts/fastaq_to_unique_by_id
++++ fastaq/scripts/fastaq_to_unique_by_id
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -669,7 +700,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -678,8 +709,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.to_unique_by_id(options.infile, options.outfile)
---- a/scripts/fastaq_translate
-+++ b/scripts/fastaq_translate
+Index: fastaq/scripts/fastaq_translate
+===================================================================
+--- fastaq.orig/scripts/fastaq_translate
++++ fastaq/scripts/fastaq_translate
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -688,7 +721,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format',
-@@ -10,4 +9,8 @@
+@@ -10,4 +9,8 @@ parser.add_argument('--frame', type=int,
parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q')
parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta')
options = parser.parse_args()
@@ -697,8 +730,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.translate(options.infile, options.outfile, frame=options.frame)
---- a/scripts/fastaq_trim_Ns_at_end
-+++ b/scripts/fastaq_trim_Ns_at_end
+Index: fastaq/scripts/fastaq_trim_Ns_at_end
+===================================================================
+--- fastaq.orig/scripts/fastaq_trim_Ns_at_end
++++ fastaq/scripts/fastaq_trim_Ns_at_end
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -707,7 +742,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends',
-@@ -9,4 +8,8 @@
+@@ -9,4 +8,8 @@ parser = argparse.ArgumentParser(
parser.add_argument('infile', help='Name of input fasta/q file')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
@@ -716,8 +751,10 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
+from fastaq import tasks
+
tasks.trim_Ns_at_end(options.infile, options.outfile)
---- a/scripts/fastaq_trim_ends
-+++ b/scripts/fastaq_trim_ends
+Index: fastaq/scripts/fastaq_trim_ends
+===================================================================
+--- fastaq.orig/scripts/fastaq_trim_ends
++++ fastaq/scripts/fastaq_trim_ends
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
@@ -726,7 +763,7 @@ Author: DMPT <debian-med-packaging at lists.alioth.debian.org>
parser = argparse.ArgumentParser(
description = 'Trims set number of bases off each sequence in a fasta/q file',
-@@ -11,4 +10,8 @@
+@@ -11,4 +10,8 @@ parser.add_argument('start_trim', type=i
parser.add_argument('end_trim', type=int, help='Number of bases to trim off end')
parser.add_argument('outfile', help='Name of output fasta/q file')
options = parser.parse_args()
diff --git a/debian/usage_to_man b/debian/usage_to_man
index 32f28b8..7d5116a 100755
--- a/debian/usage_to_man
+++ b/debian/usage_to_man
@@ -12,7 +12,7 @@ sub createManPages {
my $source= 'scripts';
my $destination= 'debian/man';
my $app_name = 'Fastaq';
-
+ my $descriptions = shortDescription();
unless ( -d $destination ) {
system(mkdir $destination);
@@ -57,7 +57,7 @@ sub createManPages {
$output_line =~ s/(\.TH) ("\d+") ("[a-zA-Z0-9_ ]*") ("[a-zA-Z0-9_<>\[\]\/\.\(\), ]*") ("[a-zA-Z0-9_]*")/$1 $uc_filename $2 $3 "$app_name" "Fastaq executables"/;
}
- $output_line =~ s/ \\- $filename/$filename/;
+ $output_line =~ s/ \\- $filename/$filename \- $descriptions{$filename}/;
if ( $output_line =~ m/^.PP/ && $output[$i + 1] =~ m/^$filename\:/ ) {
$output_line = $output[$i + 1] = '';
@@ -97,3 +97,46 @@ END_OF_C_BLURB
print $man_fh "$copyright_blurb\n";
}
+
+
+sub shortDescription {
+
+ my %descriptions = (
+ fastaq_capillary_to_pairs => 'makes an interleaved file of read pairs',
+ fastaq_chunker => 'splits a multi fasta/q file into separate files',
+ fastaq_count_sequences => 'counts the number of sequences in a fasta/q file',
+ fastaq_deinterleave => 'deinterleaves fasta/q file',
+ fastaq_enumerate_names => 'renames sequences in a file, calling them 1,2,3...',
+ fastaq_expand_nucleotides => 'makes all combinations of sequences in input file',
+ fastaq_extend_gaps => 'extends the length of all gaps in a fasta/q file',
+ fastaq_fasta_to_fastq => 'given a fasta and qual file, makes a fastq file',
+ fastaq_filter => 'filters a fasta/q file by sequence length and/or by name',
+ fastaq_get_ids => 'gets ids from each sequence in a fasta or fastq file',
+ fastaq_get_seq_flanking_gaps => 'gets the sequences either side of gaps in a fasta/q file',
+ fastaq_insert_or_delete_bases => 'deletes or inserts bases at given position(s)',
+ fastaq_interleave => 'interleaves two fasta/q files',
+ fastaq_long_read_simulate => 'simulates long reads from a fasta/q file',
+ fastaq_make_random_contigs => 'makes a multi-fasta file of random sequences',
+ fastaq_merge => 'converts multi fasta/q file to single sequence file',
+ fastaq_replace_bases => 'replaces all occurences of one letter with another',
+ fastaq_reverse_complement => 'reverse complements all sequences',
+ fastaq_scaffolds_to_contigs => 'creates a file of contigs from a file of scaffolds',
+ fastaq_search_for_seq => 'searches for an exact match on a given string and its reverese complement. guaranteed to find all hits',
+ fastaq_sequence_trim => 'trims sequences off the start of all sequences in a pair of fasta/q files',
+ fastaq_split_by_base_count => 'splits a multi fasta/q file into separate files',
+ fastaq_strip_illumina_suffix => 'strips /1 or /2 off the end of every read name',
+ fastaq_to_fake_qual => 'makes fake quality scores file',
+ fastaq_to_fasta => 'converts sequence file to fasta format',
+ fastaq_to_mira_xml => 'creates an xml file from a fasta/q file of reads, for use with mira assembler',
+ fastaq_to_orfs_gff => 'writes a gff file of open reading frames',
+ fastaq_to_perfect_reads => 'makes perfect paired end fastq reads',
+ fastaq_to_random_subset => 'takes a random subset of reads',
+ fastaq_to_tiling_bam => 'makes a bam file containing perfect (unpaired) reads tiling the whole genome',
+ fastaq_to_unique_by_id => 'removes duplicate sequences',
+ fastaq_translate => 'translates all sequences',
+ fastaq_trim_ends => 'trims set number of bases off each sequence',
+ fastaq_trim_ns_at_end => 'trims any ns off each sequence'
+ );
+
+ return(\%descriptions);
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list