[med-svn] [fastaq] 01/01: removed src folder as this is now scripts

Jorge Soares jssoares-guest at moszumanska.debian.org
Wed Oct 8 13:40:33 UTC 2014


This is an automated email from the git hooks/post-receive script.

jssoares-guest pushed a commit to branch master
in repository fastaq.

commit 037b3fb353ac351729daa733ac631ebfeecdfd4e
Author: Jorge Soares <j.s.soares at gmail.com>
Date:   Wed Oct 8 14:48:38 2014 +0100

    removed src folder as this is now scripts
---
 src/fastaq_capillary_to_pairs     | 12 -----
 src/fastaq_chunker                | 21 ---------
 src/fastaq_count_sequences        | 11 -----
 src/fastaq_deinterleave           | 14 ------
 src/fastaq_enumerate_names        | 19 --------
 src/fastaq_extend_gaps            | 13 ------
 src/fastaq_fasta_to_fastq         | 13 ------
 src/fastaq_filter                 | 24 ----------
 src/fastaq_get_ids                | 12 -----
 src/fastaq_get_seq_flanking_gaps  | 14 ------
 src/fastaq_insert_or_delete_bases | 94 ---------------------------------------
 src/fastaq_interleave             | 13 ------
 src/fastaq_make_random_contigs    | 25 -----------
 src/fastaq_replace_bases          | 14 ------
 src/fastaq_reverse_complement     | 12 -----
 src/fastaq_scaffolds_to_contigs   | 13 ------
 src/fastaq_search_for_seq         | 13 ------
 src/fastaq_split_by_base_count    | 15 -------
 src/fastaq_strip_illumina_suffix  | 12 -----
 src/fastaq_to_fasta               | 19 --------
 src/fastaq_to_mira_xml            | 12 -----
 src/fastaq_to_perfect_reads       | 86 -----------------------------------
 src/fastaq_to_quasr_primers_file  | 12 -----
 src/fastaq_to_random_subset       | 36 ---------------
 src/fastaq_to_tiling_bam          | 79 --------------------------------
 src/fastaq_to_unique_by_id        | 12 -----
 src/fastaq_translate              | 13 ------
 src/fastaq_trim_Ns_at_end         | 12 -----
 src/fastaq_trim_ends              | 14 ------
 29 files changed, 659 deletions(-)

diff --git a/src/fastaq_capillary_to_pairs b/src/fastaq_capillary_to_pairs
deleted file mode 100755
index 0d4a48f..0000000
--- a/src/fastaq_capillary_to_pairs
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2',
-    usage = '%(prog)s <infile> <outfiles prefix>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix')
-options = parser.parse_args()
-tasks.capillary_to_pairs(options.infile, options.outprefix)
diff --git a/src/fastaq_chunker b/src/fastaq_chunker
deleted file mode 100755
index d1aeb68..0000000
--- a/src/fastaq_chunker
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
-    usage = '%(prog)s [options] <fasta/q in> <prefix of output files> <chunk size> <tolerance>')
-parser.add_argument('infile', help='Name of input fasta/q file to be split')
-parser.add_argument('outprefix', help='Name of output fasta/q file')
-parser.add_argument('chunk_size', type=int, help='Size of each chunk')
-parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
-parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
-options = parser.parse_args()
-tasks.split_by_fixed_size(
-    options.infile,
-    options.outprefix,
-    options.chunk_size,
-    options.tolerance,
-    skip_if_all_Ns=options.skip_all_Ns
-)
diff --git a/src/fastaq_count_sequences b/src/fastaq_count_sequences
deleted file mode 100755
index fcb7911..0000000
--- a/src/fastaq_count_sequences
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Counts the number of sequences in a fasta/q file',
-    usage = '%(prog)s <fasta/q in>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-options = parser.parse_args()
-print(tasks.count_sequences(options.infile))
diff --git a/src/fastaq_deinterleave b/src/fastaq_deinterleave
deleted file mode 100755
index a28c505..0000000
--- a/src/fastaq_deinterleave
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files',
-    usage = '%(prog)s [options] <fasta/q in> <out_fwd> <out_rev>')
-parser.add_argument('--fasta_out', action='store_true', help='Use this to write output as fasta (default is same as input)', default=False)
-parser.add_argument('infile', help='Name of fasta/q file to be deinterleaved')
-parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads')
-parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads')
-options = parser.parse_args()
-tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out)
diff --git a/src/fastaq_enumerate_names b/src/fastaq_enumerate_names
deleted file mode 100755
index 89831cb..0000000
--- a/src/fastaq_enumerate_names
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Renames sequences in a file, calling them 1,2,3... etc',
-    usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('--start_index', type=int, help='Starting number [%(default)s]', default=1)
-parser.add_argument('--rename_file', help='If used, will write a file of old name to new name')
-parser.add_argument('--keep_suffix', action='store_true', help='Use this to keep a /1 or /2 suffix at the end of each name')
-parser.add_argument('infile', help='Name of fasta/q file to be read')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.enumerate_names(options.infile,
-                      options.outfile,
-                      start_index=options.start_index,
-                      keep_illumina_suffix=options.keep_suffix,
-                      rename_file=options.rename_file)
diff --git a/src/fastaq_extend_gaps b/src/fastaq_extend_gaps
deleted file mode 100755
index e8622c3..0000000
--- a/src/fastaq_extend_gaps
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost',
-    usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('--trim_number', type=int, help='Number of bases to trim around each gap, and off ends of each sequence [%(default)s]', default=100)
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.extend_gaps(options.infile, options.outfile, options.trim_number)
diff --git a/src/fastaq_fasta_to_fastq b/src/fastaq_fasta_to_fastq
deleted file mode 100755
index 18b6edb..0000000
--- a/src/fastaq_fasta_to_fastq
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Given a fasta and qual file, makes a fastq file',
-    usage = '%(prog)s <fasta in> <qual in> <fastq out>')
-parser.add_argument('fasta', help='Name of input fasta file', metavar='fasta in')
-parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in')
-parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out')
-options = parser.parse_args()
-tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile)
diff --git a/src/fastaq_filter b/src/fastaq_filter
deleted file mode 100755
index cb260e6..0000000
--- a/src/fastaq_filter
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression',
-    usage = '%(prog)s [options] <infile> <outfile>')
-parser.add_argument('--min_length', type=int, help='Minimum length of sequence to keep [%(default)s]', default=0, metavar='INT')
-parser.add_argument('--max_length', type=float, help='Maximum length of sequence to keep [%(default)s]', default=float('inf'), metavar='INT')
-parser.add_argument('--regex', help='If given, only reads with a name matching the regular expression will be kept')
-parser.add_argument('--ids_file', help='If given, only reads whose ID is in th given file will be used. One ID per line of file.')
-parser.add_argument('-v', '--invert', action='store_true', help='Keep sequences that do not match the filters')
-parser.add_argument('infile', help='Name of fasta/q file to be filtered')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.filter(options.infile,
-             options.outfile,
-             minlength=options.min_length,
-             maxlength=options.max_length,
-             regex=options.regex,
-             ids_file=options.ids_file,
-             invert=options.invert
-)
diff --git a/src/fastaq_get_ids b/src/fastaq_get_ids
deleted file mode 100755
index 59b9e0e..0000000
--- a/src/fastaq_get_ids
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Gets IDs from each sequence in a fasta or fastq file',
-    usage = '%(prog)s <infile> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.get_ids(options.infile, options.outfile)
diff --git a/src/fastaq_get_seq_flanking_gaps b/src/fastaq_get_seq_flanking_gaps
deleted file mode 100755
index 0c54154..0000000
--- a/src/fastaq_get_seq_flanking_gaps
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Gets the sequences either side of gaps in a fasta/q file',
-    usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('--left', type=int, help='Number of bases to get to left of gap [%(default)s]', default=25, metavar='INT')
-parser.add_argument('--right', type=int, help='Number of bases to get to right of gap [%(default)s]', default=25, metavar='INT')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right)
diff --git a/src/fastaq_insert_or_delete_bases b/src/fastaq_insert_or_delete_bases
deleted file mode 100755
index 61e1e80..0000000
--- a/src/fastaq_insert_or_delete_bases
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import sys
-import random
-from fastaq import sequences, utils, intervals
-
-parser = argparse.ArgumentParser(
-    description = 'Deletes or inserts bases at given position(s) from a fasta/q file',
-    usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of fasta/q file to be read')
-parser.add_argument('outfile', help='Name of output file')
-parser.add_argument('-d','--delete', action='append', help='Delete the given bases from the given sequence. Format same as samtools view: name:start-end. This option can be used multiple times (once for each region to delete). Overlapping coords will be merged before deleting', metavar='Name:start:bases')
-parser.add_argument('--delete_range', help='Deletes bases starting at position P in each sequence of the input file. Deletes start + (n-1)*step bases from sequence n.', metavar='P,start,step')
-parser.add_argument('-i','--insert', action='append', help='Insert a random string of bases at the given position. Format is name:position:number_to_add. Bases are added after the position. This option can be used multiple times', metavar='Name:start:bases')
-parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step')
-options = parser.parse_args()
-
-test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]]
-
-if sum(test_ops) != 1:
-    print('Must use one of --delete, --insert, --delete_range, --insert_range. Cannot continue', file=sys.stderr)
-    sys.exit(1)
-
-
-def range2dic(range_in):
-    if range_in is None:
-        return {}
-    (pos, start, step) = range_in.split(',')
-    d = {}
-    d['pos'] = int(pos) - 1
-    d['bases'] = int(start)
-    d['step'] = int(step)
-    return d
-
-delete_range = range2dic(options.delete_range)
-insert_range = range2dic(options.insert_range)
-
-
-# convert the -d regions into sequence name, start and end coords
-to_delete = {}
-if options.delete:
-    for s in options.delete:
-        id, coords = s.rsplit(':')
-        start, end = [int(x)-1 for x in coords.split('-')]
-        if id not in to_delete:
-            to_delete[id] = []
-        to_delete[id].append(intervals.Interval(start, end))
-
-
-to_insert = {}
-if options.insert:
-    for s in options.insert:
-        id, pos, bases = s.rsplit(':',2)
-        pos = int(pos) - 1
-        bases = int(bases)
-        if id not in to_insert:
-            to_insert[id] = []
-        to_insert[id].append((pos, bases))
-
-
-assert len(to_delete) * len(to_insert) == 0
-
-# merge overlapping regions to be deleted
-for l in to_delete.values():
-    intervals.merge_overlapping_in_list(l)
-
-# sort positions to be inserted
-for l in to_insert.values():
-    l.sort()
-
-# read in the fasta/q file and print outfile with deleted sequences
-seq_reader = sequences.file_reader(options.infile)
-f = utils.open_file_write(options.outfile)
-
-for seq in seq_reader:
-    if seq.id in to_delete:
-        # delete regions for this sequence, but start at the end so the
-        # coords don't get messed up after the first deletion
-        for inter in reversed(to_delete[seq.id]):
-            seq.seq = seq.seq[:inter.start] + seq.seq[inter.end + 1:]
-    elif options.delete_range:
-        seq.seq = seq.seq[:delete_range['pos']] + seq.seq[delete_range['pos'] + delete_range['bases']:]
-        delete_range['bases'] += delete_range['step']
-    elif seq.id in to_insert:
-        for pos, bases in reversed(to_insert[seq.id]):
-            seq.seq = seq.seq[:pos + 1] + ''.join([random.choice('ACGT') for x in range(bases)]) + seq.seq[pos + 1:]
-    elif options.insert_range:
-        seq.seq = seq.seq[:insert_range['pos'] + 1] + ''.join([random.choice('ACGT') for x in range(insert_range['bases'])]) +  seq.seq[insert_range['pos'] + 1:]
-        insert_range['bases'] += insert_range['step']
-
-    print(seq, file=f)
-
-utils.close(f)
diff --git a/src/fastaq_interleave b/src/fastaq_interleave
deleted file mode 100755
index 4b39a3e..0000000
--- a/src/fastaq_interleave
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file',
-    usage = '%(prog)s [options] <fasta/q 1> <fasta/q 2> <outfile>')
-parser.add_argument('infile_1', help='Name of first input fasta/q file')
-parser.add_argument('infile_2', help='Name of second input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads')
-options = parser.parse_args()
-tasks.interleave(options.infile_1, options.infile_2, options.outfile)
diff --git a/src/fastaq_make_random_contigs b/src/fastaq_make_random_contigs
deleted file mode 100755
index c6774fe..0000000
--- a/src/fastaq_make_random_contigs
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T',
-    usage = '%(prog)s [options] <number of sequences> <length of each sequence> <fasta out>')
-parser.add_argument('--first_number', type=int, help='If numbering the sequences, the first sequence gets this number [%(default)s]', default=1)
-parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z')
-parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='')
-parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None)
-parser.add_argument('contigs', type=int, help='Nunber of contigs to make')
-parser.add_argument('length', type=int, help='Length of each contig')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.make_random_contigs(
-    options.contigs,
-    options.length,
-    options.outfile,
-    name_by_letters=options.name_by_letters,
-    prefix=options.prefix,
-    seed=options.seed,
-    first_number=options.first_number
-)
diff --git a/src/fastaq_replace_bases b/src/fastaq_replace_bases
deleted file mode 100755
index 6ce2fc0..0000000
--- a/src/fastaq_replace_bases
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Replaces all occurences of one letter with another in a fasta/q file',
-    usage = '%(prog)s <fasta/q in> <outfile> <old> <new>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-parser.add_argument('old', help='Base to be replaced')
-parser.add_argument('new', help='Replace with this letter')
-options = parser.parse_args()
-tasks.replace_bases(options.infile, options.outfile, options.old, options.new)
diff --git a/src/fastaq_reverse_complement b/src/fastaq_reverse_complement
deleted file mode 100755
index 147e01f..0000000
--- a/src/fastaq_reverse_complement
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Reverse complements all sequences in a fasta/q file',
-    usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.reverse_complement(options.infile, options.outfile)
diff --git a/src/fastaq_scaffolds_to_contigs b/src/fastaq_scaffolds_to_contigs
deleted file mode 100755
index 46d4861..0000000
--- a/src/fastaq_scaffolds_to_contigs
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input',
-    usage = '%(prog)s [options] <infile> <outfile>')
-parser.add_argument('--number_contigs', action='store_true', help='Use this to enumerate contig names 1,2,3,... within each scaffold')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output contigs file')
-options = parser.parse_args()
-tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs)
diff --git a/src/fastaq_search_for_seq b/src/fastaq_search_for_seq
deleted file mode 100755
index c00ed7a..0000000
--- a/src/fastaq_search_for_seq
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits',
-    usage = '%(prog)s [options] <fasta/q in> <outfile> <search_string>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand')
-parser.add_argument('search_string', help='String to search for in the sequences')
-options = parser.parse_args()
-tasks.search_for_seq(options.infile, options.outfile, options.search_string)
diff --git a/src/fastaq_split_by_base_count b/src/fastaq_split_by_base_count
deleted file mode 100755
index dd7b43d..0000000
--- a/src/fastaq_split_by_base_count
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.',
-    usage = '%(prog)s [options] <fasta/q in> <prefix of output files> <max_bases>')
-parser.add_argument('infile', help='Name of input fasta/q file to be split')
-parser.add_argument('outprefix', help='Name of output fasta/q file')
-parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases')
-parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT')
-
-options = parser.parse_args()
-tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs)
diff --git a/src/fastaq_strip_illumina_suffix b/src/fastaq_strip_illumina_suffix
deleted file mode 100755
index 6a29a42..0000000
--- a/src/fastaq_strip_illumina_suffix
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Strips /1 or /2 off the end of every read name in a fasta/q file',
-    usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.strip_illumina_suffix(options.infile, options.outfile)
diff --git a/src/fastaq_to_fasta b/src/fastaq_to_fasta
deleted file mode 100755
index 742e95f..0000000
--- a/src/fastaq_to_fasta
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Converts sequence file to FASTA format',
-    usage = '%(prog)s <infile> <outfile>')
-parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
-parser.add_argument('outfile', help='Name of output file')
-parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60)
-parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name')
-options = parser.parse_args()
-tasks.to_fasta(
-    options.infile,
-    options.outfile,
-    line_length=options.line_length,
-    strip_after_first_whitespace=options.strip_after_whitespace
-)
diff --git a/src/fastaq_to_mira_xml b/src/fastaq_to_mira_xml
deleted file mode 100755
index 582d669..0000000
--- a/src/fastaq_to_mira_xml
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler',
-    usage = '%(prog)s [options] <fastq_in> <xml_out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('xml_out', help='Name of output xml file')
-options = parser.parse_args()
-tasks.fastaq_to_mira_xml(options.infile, options.xml_out)
diff --git a/src/fastaq_to_perfect_reads b/src/fastaq_to_perfect_reads
deleted file mode 100755
index 6f3ca10..0000000
--- a/src/fastaq_to_perfect_reads
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import random
-from math import floor, ceil
-from fastaq import sequences, utils
-import sys
-
-parser = argparse.ArgumentParser(
-    description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.',
-    usage = '%(prog)s <fasta/q in> <out.fastq> <mean insert size> <insert std deviation> <mean coverage> <read length>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fastq file')
-parser.add_argument('mean_insert', type=int, help='Mean insert size of read pairs', metavar='mean insert size')
-parser.add_argument('insert_std', type=float, help='Standard devation of insert size', metavar='insert std deviation')
-parser.add_argument('coverage', type=float, help='Mean coverage of the reads', metavar='mean coverage')
-parser.add_argument('readlength', type=int, help='Length of each read', metavar='read length')
-parser.add_argument('--fragments', help='Write FASTA sequences of fragments (i.e. read pairs plus sequences in between them) to the given filename', metavar='FILENAME')
-parser.add_argument('--no_n', action='store_true', help='Don\'t allow any N or n characters in the reads')
-parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT')
-options = parser.parse_args()
-
-random.seed(a=options.seed)
-
-seq_reader = sequences.file_reader(options.infile)
-fout = utils.open_file_write(options.outfile)
-pair_counter = 1
-
-if options.fragments:
-    fout_frags = utils.open_file_write(options.fragments)
-
-for ref in seq_reader:
-    # check if current seq is long enough
-    if len(ref) < options.mean_insert + 4 * options.insert_std:
-        print('Warning, sequence ', ref.id, ' too short.  Skipping it...', file=sys.stderr)
-        continue
-
-    # work out how many reads to simulate
-    read_pairs = int(0.5 * options.coverage * len(ref) / options.readlength)
-
-    # it's possible that we pick the same fragment twice, in which case the
-    # reads would get the same name. So remember the frag coords
-    used_fragments = {}  # (middle_position, length) => count
-
-    # do the simulation:  pick insert size from normal distribution, and
-    # position in genome from uniform distribution
-    x = 0
-    while x < read_pairs:
-        isize = int(random.normalvariate(options.mean_insert, options.insert_std))
-        while isize > len(ref) or isize < options.readlength:
-            isize = int(random.normalvariate(options.mean_insert, options.insert_std))
-        middle_pos = random.randint(ceil(0.5 *isize), floor(len(ref) - 0.5 * isize))
-        read_start1 = int(middle_pos - ceil(0.5 * isize))
-        read_start2 = read_start1 + isize - options.readlength
-
-        readname = ':'.join([ref.id, str(pair_counter), str(read_start1+1), str(read_start2+1)])
-
-        fragment = (middle_pos, isize)
-        if fragment in used_fragments:
-            used_fragments[fragment] += 1
-            readname += '.dup.' + str(used_fragments[fragment])
-        else:
-            used_fragments[fragment] = 1
-
-        read1 = sequences.Fastq(readname + '/1', ref.seq[read_start1:read_start1 + options.readlength], 'I' * options.readlength)
-        read2 = sequences.Fastq(readname + '/2', ref.seq[read_start2:read_start2 + options.readlength], 'I' * options.readlength)
-
-
-        if options.no_n and ('n' in read1.seq or 'N' in read1.seq or 'n' in read2.seq or 'N' in read2.seq):
-            continue
-
-        read2.revcomp()
-
-        print(read1, file=fout)
-        print(read2, file=fout)
-
-        if options.fragments:
-            frag = sequences.Fasta(readname, ref.seq[read_start1:read_start2 + options.readlength])
-            print(frag, file=fout_frags)
-
-        pair_counter += 1
-        x += 1
-
-utils.close(fout)
-if options.fragments:
-    utils.close(fout_frags)
diff --git a/src/fastaq_to_quasr_primers_file b/src/fastaq_to_quasr_primers_file
deleted file mode 100755
index 8e5bf7c..0000000
--- a/src/fastaq_to_quasr_primers_file
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
-    usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.to_quasr_primers(options.infile, options.outfile)
diff --git a/src/fastaq_to_random_subset b/src/fastaq_to_random_subset
deleted file mode 100755
index b4f11c5..0000000
--- a/src/fastaq_to_random_subset
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import argparse
-import random
-from fastaq import sequences, utils
-
-parser = argparse.ArgumentParser(
-    description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
-                  'from a mates file.  Ouptut is interleaved if mates file given',
-    usage = '%(prog)s [options] <fasta/q in> <outfile> <probablilty of keeping read (pair) in [0,100]>')
-parser.add_argument('--mate_file', help='Name of fasta/q mates file')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of fasta/q output file')
-parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT')
-options = parser.parse_args()
-
-seq_reader = sequences.file_reader(options.infile)
-fout = utils.open_file_write(options.outfile)
-
-if options.mate_file:
-    mate_seq_reader = sequences.file_reader(options.mate_file)
-
-for seq in seq_reader:
-    if options.mate_file:
-        try:
-            mate_seq = next(mate_seq_reader)
-        except StopIteration:
-            print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
-            sys.exit(1)
-    if random.randint(0, 100) <= options.probability:
-        print(seq, file=fout)
-        if options.mate_file:
-            print(mate_seq, file=fout)
-
-utils.close(fout)
diff --git a/src/fastaq_to_tiling_bam b/src/fastaq_to_tiling_bam
deleted file mode 100755
index 9b9738d..0000000
--- a/src/fastaq_to_tiling_bam
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import sys
-import os
-from fastaq import sequences, utils
-
-parser = argparse.ArgumentParser(
-    description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome',
-    usage = '%(prog)s [options] <fasta/q in> <read length> <read step> <read prefix> <out.bam>',
-    epilog = 'Important: assumes that samtools is in your path')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('read_length', type=int, help='Length of reads')
-parser.add_argument('read_step', type=int, help='Distance between start of each read')
-parser.add_argument('read_prefix', help='Prefix of read names')
-parser.add_argument('outfile', help='Name of output BAM file')
-parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42')
-options = parser.parse_args()
-
-# make a header first  - we need to add the @RG line to the default header made by samtools
-tmp_empty_file = options.outfile + '.tmp.empty'
-f = utils.open_file_write(tmp_empty_file)
-utils.close(f)
-try:
-    f = os.popen('samtools view -H -T ' + options.infile + ' ' + tmp_empty_file)
-except IOError:
-    print('Error making tmp header file', file=sys.stderr)
-    sys.exit(1)
-
-header_lines = f.readlines()
-header_lines.append('@RG\tID:' + options.read_group + '\tSM:FAKE')
-f.close()
-os.unlink(tmp_empty_file)
-
-seq_reader = sequences.file_reader(options.infile)
-try:
-    f = os.popen('samtools view -hbS - > ' + options.outfile, 'w')
-except IOError:
-    print("Error opening for writing BAM file '" + options.outfile + "'", file=sys.stderr)
-    sys.exit(1)
-
-print(''.join(header_lines), file=f)
-
-for seq in seq_reader:
-    end_range = len(seq)
-    if len(seq) < options.read_length:
-        end_range = 1
-    for i in range(0, end_range, options.read_step):
-        if len(seq) <= options.read_length:
-            start = 0
-            end = len(seq) - 1
-        else:
-            start = i
-            end = start + options.read_length - 1
-
-            if end > len(seq) - 1:
-                end  = len(seq) - 1
-                start = end - options.read_length + 1
-
-        read = sequences.Fastq(options.read_prefix + ':' + seq.id + ':' + str(start + 1) + ':' + str(end + 1), seq[start:end+1], 'I' * (end - start + 1))
-
-        print ('\t'.join([read.id,
-                         '0',
-                         seq.id,
-                         str(start + 1),
-                         '60',
-                         str(len(read)) + 'M',
-                         '*',
-                         '*',
-                         '*',
-                         read.seq,
-                         read.qual,
-                         'RG:Z:' + options.read_group]), file=f)
-
-        if end == len(seq) - 1:
-            break
-
-f.close()
-
diff --git a/src/fastaq_to_unique_by_id b/src/fastaq_to_unique_by_id
deleted file mode 100755
index e743a92..0000000
--- a/src/fastaq_to_unique_by_id
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output',
-    usage = '%(prog)s <infile> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.to_unique_by_id(options.infile, options.outfile)
diff --git a/src/fastaq_translate b/src/fastaq_translate
deleted file mode 100755
index 9ec04c7..0000000
--- a/src/fastaq_translate
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format',
-    usage = '%(prog)s <in.fasta/q> <out.fasta>')
-parser.add_argument('--frame', type=int, choices=[0,1,2], help='Frame to translate [%(default)s]', default=0)
-parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q')
-parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta')
-options = parser.parse_args()
-tasks.translate(options.infile, options.outfile, frame=options.frame)
diff --git a/src/fastaq_trim_Ns_at_end b/src/fastaq_trim_Ns_at_end
deleted file mode 100755
index 200d71f..0000000
--- a/src/fastaq_trim_Ns_at_end
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends',
-    usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.trim_Ns_at_end(options.infile, options.outfile)
diff --git a/src/fastaq_trim_ends b/src/fastaq_trim_ends
deleted file mode 100755
index ffc662d..0000000
--- a/src/fastaq_trim_ends
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Trims set number of bases off each sequence in a fasta/q file',
-    usage = '%(prog)s [options] <fasta/q in> <bases off start> <bases off end> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('start_trim', type=int, help='Number of bases to trim off start')
-parser.add_argument('end_trim', type=int, help='Number of bases to trim off end')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git



More information about the debian-med-commit mailing list