[med-svn] [fastaq] 01/01: removed src folder as this is now scripts
Jorge Soares
jssoares-guest at moszumanska.debian.org
Wed Oct 8 13:40:33 UTC 2014
This is an automated email from the git hooks/post-receive script.
jssoares-guest pushed a commit to branch master
in repository fastaq.
commit 037b3fb353ac351729daa733ac631ebfeecdfd4e
Author: Jorge Soares <j.s.soares at gmail.com>
Date: Wed Oct 8 14:48:38 2014 +0100
removed src folder as this is now scripts
---
src/fastaq_capillary_to_pairs | 12 -----
src/fastaq_chunker | 21 ---------
src/fastaq_count_sequences | 11 -----
src/fastaq_deinterleave | 14 ------
src/fastaq_enumerate_names | 19 --------
src/fastaq_extend_gaps | 13 ------
src/fastaq_fasta_to_fastq | 13 ------
src/fastaq_filter | 24 ----------
src/fastaq_get_ids | 12 -----
src/fastaq_get_seq_flanking_gaps | 14 ------
src/fastaq_insert_or_delete_bases | 94 ---------------------------------------
src/fastaq_interleave | 13 ------
src/fastaq_make_random_contigs | 25 -----------
src/fastaq_replace_bases | 14 ------
src/fastaq_reverse_complement | 12 -----
src/fastaq_scaffolds_to_contigs | 13 ------
src/fastaq_search_for_seq | 13 ------
src/fastaq_split_by_base_count | 15 -------
src/fastaq_strip_illumina_suffix | 12 -----
src/fastaq_to_fasta | 19 --------
src/fastaq_to_mira_xml | 12 -----
src/fastaq_to_perfect_reads | 86 -----------------------------------
src/fastaq_to_quasr_primers_file | 12 -----
src/fastaq_to_random_subset | 36 ---------------
src/fastaq_to_tiling_bam | 79 --------------------------------
src/fastaq_to_unique_by_id | 12 -----
src/fastaq_translate | 13 ------
src/fastaq_trim_Ns_at_end | 12 -----
src/fastaq_trim_ends | 14 ------
29 files changed, 659 deletions(-)
diff --git a/src/fastaq_capillary_to_pairs b/src/fastaq_capillary_to_pairs
deleted file mode 100755
index 0d4a48f..0000000
--- a/src/fastaq_capillary_to_pairs
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Given a fasta/q file of capillary reads, makes an interleaved file of read pairs (where more than read from same ligation, takes the longest read) and a file of unpaired reads. Replaces the .p1k/.q1k part of read names to denote fwd/rev reads with /1 and /2',
- usage = '%(prog)s <infile> <outfiles prefix>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outprefix', help='Prefix of output files', metavar='outfiles prefix')
-options = parser.parse_args()
-tasks.capillary_to_pairs(options.infile, options.outprefix)
diff --git a/src/fastaq_chunker b/src/fastaq_chunker
deleted file mode 100755
index d1aeb68..0000000
--- a/src/fastaq_chunker
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Splits a multi fasta/q file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
- usage = '%(prog)s [options] <fasta/q in> <prefix of output files> <chunk size> <tolerance>')
-parser.add_argument('infile', help='Name of input fasta/q file to be split')
-parser.add_argument('outprefix', help='Name of output fasta/q file')
-parser.add_argument('chunk_size', type=int, help='Size of each chunk')
-parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
-parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
-options = parser.parse_args()
-tasks.split_by_fixed_size(
- options.infile,
- options.outprefix,
- options.chunk_size,
- options.tolerance,
- skip_if_all_Ns=options.skip_all_Ns
-)
diff --git a/src/fastaq_count_sequences b/src/fastaq_count_sequences
deleted file mode 100755
index fcb7911..0000000
--- a/src/fastaq_count_sequences
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Counts the number of sequences in a fasta/q file',
- usage = '%(prog)s <fasta/q in>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-options = parser.parse_args()
-print(tasks.count_sequences(options.infile))
diff --git a/src/fastaq_deinterleave b/src/fastaq_deinterleave
deleted file mode 100755
index a28c505..0000000
--- a/src/fastaq_deinterleave
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Deinterleaves fasta/q file, so that reads are written alternately between two output files',
- usage = '%(prog)s [options] <fasta/q in> <out_fwd> <out_rev>')
-parser.add_argument('--fasta_out', action='store_true', help='Use this to write output as fasta (default is same as input)', default=False)
-parser.add_argument('infile', help='Name of fasta/q file to be deinterleaved')
-parser.add_argument('out_fwd', help='Name of output fasta/q file of forwards reads')
-parser.add_argument('out_rev', help='Name of output fasta/q file of reverse reads')
-options = parser.parse_args()
-tasks.deinterleave(options.infile, options.out_fwd, options.out_rev, fasta_out=options.fasta_out)
diff --git a/src/fastaq_enumerate_names b/src/fastaq_enumerate_names
deleted file mode 100755
index 89831cb..0000000
--- a/src/fastaq_enumerate_names
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Renames sequences in a file, calling them 1,2,3... etc',
- usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('--start_index', type=int, help='Starting number [%(default)s]', default=1)
-parser.add_argument('--rename_file', help='If used, will write a file of old name to new name')
-parser.add_argument('--keep_suffix', action='store_true', help='Use this to keep a /1 or /2 suffix at the end of each name')
-parser.add_argument('infile', help='Name of fasta/q file to be read')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.enumerate_names(options.infile,
- options.outfile,
- start_index=options.start_index,
- keep_illumina_suffix=options.keep_suffix,
- rename_file=options.rename_file)
diff --git a/src/fastaq_extend_gaps b/src/fastaq_extend_gaps
deleted file mode 100755
index e8622c3..0000000
--- a/src/fastaq_extend_gaps
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Extends the length of all gaps (and trims the start/end of sequences) in a fasta/q file. Does this by replacing a set number of bases either side of each gap with Ns. Any sequence that ends up as all Ns is lost',
- usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('--trim_number', type=int, help='Number of bases to trim around each gap, and off ends of each sequence [%(default)s]', default=100)
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.extend_gaps(options.infile, options.outfile, options.trim_number)
diff --git a/src/fastaq_fasta_to_fastq b/src/fastaq_fasta_to_fastq
deleted file mode 100755
index 18b6edb..0000000
--- a/src/fastaq_fasta_to_fastq
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Given a fasta and qual file, makes a fastq file',
- usage = '%(prog)s <fasta in> <qual in> <fastq out>')
-parser.add_argument('fasta', help='Name of input fasta file', metavar='fasta in')
-parser.add_argument('qual', help='Name of input quality scores file', metavar='qual in')
-parser.add_argument('outfile', help='Name of output fastq file', metavar='fastq out')
-options = parser.parse_args()
-tasks.fasta_to_fastq(options.fasta, options.qual, options.outfile)
diff --git a/src/fastaq_filter b/src/fastaq_filter
deleted file mode 100755
index cb260e6..0000000
--- a/src/fastaq_filter
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Filters a fasta/q file by sequence length and/or by name matching a regular expression',
- usage = '%(prog)s [options] <infile> <outfile>')
-parser.add_argument('--min_length', type=int, help='Minimum length of sequence to keep [%(default)s]', default=0, metavar='INT')
-parser.add_argument('--max_length', type=float, help='Maximum length of sequence to keep [%(default)s]', default=float('inf'), metavar='INT')
-parser.add_argument('--regex', help='If given, only reads with a name matching the regular expression will be kept')
-parser.add_argument('--ids_file', help='If given, only reads whose ID is in th given file will be used. One ID per line of file.')
-parser.add_argument('-v', '--invert', action='store_true', help='Keep sequences that do not match the filters')
-parser.add_argument('infile', help='Name of fasta/q file to be filtered')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.filter(options.infile,
- options.outfile,
- minlength=options.min_length,
- maxlength=options.max_length,
- regex=options.regex,
- ids_file=options.ids_file,
- invert=options.invert
-)
diff --git a/src/fastaq_get_ids b/src/fastaq_get_ids
deleted file mode 100755
index 59b9e0e..0000000
--- a/src/fastaq_get_ids
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Gets IDs from each sequence in a fasta or fastq file',
- usage = '%(prog)s <infile> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.get_ids(options.infile, options.outfile)
diff --git a/src/fastaq_get_seq_flanking_gaps b/src/fastaq_get_seq_flanking_gaps
deleted file mode 100755
index 0c54154..0000000
--- a/src/fastaq_get_seq_flanking_gaps
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Gets the sequences either side of gaps in a fasta/q file',
- usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('--left', type=int, help='Number of bases to get to left of gap [%(default)s]', default=25, metavar='INT')
-parser.add_argument('--right', type=int, help='Number of bases to get to right of gap [%(default)s]', default=25, metavar='INT')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.get_seqs_flanking_gaps(options.infile, options.outfile, options.left, options.right)
diff --git a/src/fastaq_insert_or_delete_bases b/src/fastaq_insert_or_delete_bases
deleted file mode 100755
index 61e1e80..0000000
--- a/src/fastaq_insert_or_delete_bases
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import sys
-import random
-from fastaq import sequences, utils, intervals
-
-parser = argparse.ArgumentParser(
- description = 'Deletes or inserts bases at given position(s) from a fasta/q file',
- usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of fasta/q file to be read')
-parser.add_argument('outfile', help='Name of output file')
-parser.add_argument('-d','--delete', action='append', help='Delete the given bases from the given sequence. Format same as samtools view: name:start-end. This option can be used multiple times (once for each region to delete). Overlapping coords will be merged before deleting', metavar='Name:start:bases')
-parser.add_argument('--delete_range', help='Deletes bases starting at position P in each sequence of the input file. Deletes start + (n-1)*step bases from sequence n.', metavar='P,start,step')
-parser.add_argument('-i','--insert', action='append', help='Insert a random string of bases at the given position. Format is name:position:number_to_add. Bases are added after the position. This option can be used multiple times', metavar='Name:start:bases')
-parser.add_argument('--insert_range', help='Inserts random bases starting after position P in each sequence of the input file. Inserts start + (n-1)*step bases into sequence n.', metavar='P,start,step')
-options = parser.parse_args()
-
-test_ops = [int(x is not None) for x in [options.delete, options.insert, options.delete_range, options.insert_range]]
-
-if sum(test_ops) != 1:
- print('Must use one of --delete, --insert, --delete_range, --insert_range. Cannot continue', file=sys.stderr)
- sys.exit(1)
-
-
-def range2dic(range_in):
- if range_in is None:
- return {}
- (pos, start, step) = range_in.split(',')
- d = {}
- d['pos'] = int(pos) - 1
- d['bases'] = int(start)
- d['step'] = int(step)
- return d
-
-delete_range = range2dic(options.delete_range)
-insert_range = range2dic(options.insert_range)
-
-
-# convert the -d regions into sequence name, start and end coords
-to_delete = {}
-if options.delete:
- for s in options.delete:
- id, coords = s.rsplit(':')
- start, end = [int(x)-1 for x in coords.split('-')]
- if id not in to_delete:
- to_delete[id] = []
- to_delete[id].append(intervals.Interval(start, end))
-
-
-to_insert = {}
-if options.insert:
- for s in options.insert:
- id, pos, bases = s.rsplit(':',2)
- pos = int(pos) - 1
- bases = int(bases)
- if id not in to_insert:
- to_insert[id] = []
- to_insert[id].append((pos, bases))
-
-
-assert len(to_delete) * len(to_insert) == 0
-
-# merge overlapping regions to be deleted
-for l in to_delete.values():
- intervals.merge_overlapping_in_list(l)
-
-# sort positions to be inserted
-for l in to_insert.values():
- l.sort()
-
-# read in the fasta/q file and print outfile with deleted sequences
-seq_reader = sequences.file_reader(options.infile)
-f = utils.open_file_write(options.outfile)
-
-for seq in seq_reader:
- if seq.id in to_delete:
- # delete regions for this sequence, but start at the end so the
- # coords don't get messed up after the first deletion
- for inter in reversed(to_delete[seq.id]):
- seq.seq = seq.seq[:inter.start] + seq.seq[inter.end + 1:]
- elif options.delete_range:
- seq.seq = seq.seq[:delete_range['pos']] + seq.seq[delete_range['pos'] + delete_range['bases']:]
- delete_range['bases'] += delete_range['step']
- elif seq.id in to_insert:
- for pos, bases in reversed(to_insert[seq.id]):
- seq.seq = seq.seq[:pos + 1] + ''.join([random.choice('ACGT') for x in range(bases)]) + seq.seq[pos + 1:]
- elif options.insert_range:
- seq.seq = seq.seq[:insert_range['pos'] + 1] + ''.join([random.choice('ACGT') for x in range(insert_range['bases'])]) + seq.seq[insert_range['pos'] + 1:]
- insert_range['bases'] += insert_range['step']
-
- print(seq, file=f)
-
-utils.close(f)
diff --git a/src/fastaq_interleave b/src/fastaq_interleave
deleted file mode 100755
index 4b39a3e..0000000
--- a/src/fastaq_interleave
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Interleaves two fasta/q files, so that reads are written alternately first/second in output file',
- usage = '%(prog)s [options] <fasta/q 1> <fasta/q 2> <outfile>')
-parser.add_argument('infile_1', help='Name of first input fasta/q file')
-parser.add_argument('infile_2', help='Name of second input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file of interleaved reads')
-options = parser.parse_args()
-tasks.interleave(options.infile_1, options.infile_2, options.outfile)
diff --git a/src/fastaq_make_random_contigs b/src/fastaq_make_random_contigs
deleted file mode 100755
index c6774fe..0000000
--- a/src/fastaq_make_random_contigs
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Makes a multi-fasta file of random sequences, all of the same length. Each base has equal chance of being A,C,G or T',
- usage = '%(prog)s [options] <number of sequences> <length of each sequence> <fasta out>')
-parser.add_argument('--first_number', type=int, help='If numbering the sequences, the first sequence gets this number [%(default)s]', default=1)
-parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z')
-parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='')
-parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None)
-parser.add_argument('contigs', type=int, help='Nunber of contigs to make')
-parser.add_argument('length', type=int, help='Length of each contig')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.make_random_contigs(
- options.contigs,
- options.length,
- options.outfile,
- name_by_letters=options.name_by_letters,
- prefix=options.prefix,
- seed=options.seed,
- first_number=options.first_number
-)
diff --git a/src/fastaq_replace_bases b/src/fastaq_replace_bases
deleted file mode 100755
index 6ce2fc0..0000000
--- a/src/fastaq_replace_bases
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Replaces all occurences of one letter with another in a fasta/q file',
- usage = '%(prog)s <fasta/q in> <outfile> <old> <new>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-parser.add_argument('old', help='Base to be replaced')
-parser.add_argument('new', help='Replace with this letter')
-options = parser.parse_args()
-tasks.replace_bases(options.infile, options.outfile, options.old, options.new)
diff --git a/src/fastaq_reverse_complement b/src/fastaq_reverse_complement
deleted file mode 100755
index 147e01f..0000000
--- a/src/fastaq_reverse_complement
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Reverse complements all sequences in a fasta/q file',
- usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.reverse_complement(options.infile, options.outfile)
diff --git a/src/fastaq_scaffolds_to_contigs b/src/fastaq_scaffolds_to_contigs
deleted file mode 100755
index 46d4861..0000000
--- a/src/fastaq_scaffolds_to_contigs
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Creates a file of contigs from a file of scaffolds - i.e. breaks at every gap in the input',
- usage = '%(prog)s [options] <infile> <outfile>')
-parser.add_argument('--number_contigs', action='store_true', help='Use this to enumerate contig names 1,2,3,... within each scaffold')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output contigs file')
-options = parser.parse_args()
-tasks.scaffolds_to_contigs(options.infile, options.outfile, number_contigs=options.number_contigs)
diff --git a/src/fastaq_search_for_seq b/src/fastaq_search_for_seq
deleted file mode 100755
index c00ed7a..0000000
--- a/src/fastaq_search_for_seq
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Searches for an exact match on a given string and its reverese complement, in every sequences of a fasta/q file. Case insensitive. Guaranteed to find all hits',
- usage = '%(prog)s [options] <fasta/q in> <outfile> <search_string>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of outputfile. Tab-delimited output: sequence name, position, strand')
-parser.add_argument('search_string', help='String to search for in the sequences')
-options = parser.parse_args()
-tasks.search_for_seq(options.infile, options.outfile, options.search_string)
diff --git a/src/fastaq_split_by_base_count b/src/fastaq_split_by_base_count
deleted file mode 100755
index dd7b43d..0000000
--- a/src/fastaq_split_by_base_count
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Splits a multi fasta/q file into separate files. Does not split sequences. Puts up to max_bases into each split file. The exception is that any sequence longer than max_bases is put into its own file.',
- usage = '%(prog)s [options] <fasta/q in> <prefix of output files> <max_bases>')
-parser.add_argument('infile', help='Name of input fasta/q file to be split')
-parser.add_argument('outprefix', help='Name of output fasta/q file')
-parser.add_argument('max_bases', type=int, help='Max bases in each output split file', metavar='max_bases')
-parser.add_argument('--max_seqs', type=int, help='Max number of sequences in each output split file [no limit]', metavar='INT')
-
-options = parser.parse_args()
-tasks.split_by_base_count(options.infile, options.outprefix, options.max_bases, options.max_seqs)
diff --git a/src/fastaq_strip_illumina_suffix b/src/fastaq_strip_illumina_suffix
deleted file mode 100755
index 6a29a42..0000000
--- a/src/fastaq_strip_illumina_suffix
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Strips /1 or /2 off the end of every read name in a fasta/q file',
- usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.strip_illumina_suffix(options.infile, options.outfile)
diff --git a/src/fastaq_to_fasta b/src/fastaq_to_fasta
deleted file mode 100755
index 742e95f..0000000
--- a/src/fastaq_to_fasta
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Converts sequence file to FASTA format',
- usage = '%(prog)s <infile> <outfile>')
-parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
-parser.add_argument('outfile', help='Name of output file')
-parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file [%(default)s]', default=60)
-parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitesapce in every sequence name')
-options = parser.parse_args()
-tasks.to_fasta(
- options.infile,
- options.outfile,
- line_length=options.line_length,
- strip_after_first_whitespace=options.strip_after_whitespace
-)
diff --git a/src/fastaq_to_mira_xml b/src/fastaq_to_mira_xml
deleted file mode 100755
index 582d669..0000000
--- a/src/fastaq_to_mira_xml
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Creates an xml file from a fasta/q file of reads, for use with Mira assembler',
- usage = '%(prog)s [options] <fastq_in> <xml_out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('xml_out', help='Name of output xml file')
-options = parser.parse_args()
-tasks.fastaq_to_mira_xml(options.infile, options.xml_out)
diff --git a/src/fastaq_to_perfect_reads b/src/fastaq_to_perfect_reads
deleted file mode 100755
index 6f3ca10..0000000
--- a/src/fastaq_to_perfect_reads
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import random
-from math import floor, ceil
-from fastaq import sequences, utils
-import sys
-
-parser = argparse.ArgumentParser(
- description = 'Makes perfect paired end fastq reads from a fasta/q file, with insert sizes sampled from a normal distribution. Read orientation is innies. Output is an interleaved fastq file.',
- usage = '%(prog)s <fasta/q in> <out.fastq> <mean insert size> <insert std deviation> <mean coverage> <read length>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fastq file')
-parser.add_argument('mean_insert', type=int, help='Mean insert size of read pairs', metavar='mean insert size')
-parser.add_argument('insert_std', type=float, help='Standard devation of insert size', metavar='insert std deviation')
-parser.add_argument('coverage', type=float, help='Mean coverage of the reads', metavar='mean coverage')
-parser.add_argument('readlength', type=int, help='Length of each read', metavar='read length')
-parser.add_argument('--fragments', help='Write FASTA sequences of fragments (i.e. read pairs plus sequences in between them) to the given filename', metavar='FILENAME')
-parser.add_argument('--no_n', action='store_true', help='Don\'t allow any N or n characters in the reads')
-parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None, metavar='INT')
-options = parser.parse_args()
-
-random.seed(a=options.seed)
-
-seq_reader = sequences.file_reader(options.infile)
-fout = utils.open_file_write(options.outfile)
-pair_counter = 1
-
-if options.fragments:
- fout_frags = utils.open_file_write(options.fragments)
-
-for ref in seq_reader:
- # check if current seq is long enough
- if len(ref) < options.mean_insert + 4 * options.insert_std:
- print('Warning, sequence ', ref.id, ' too short. Skipping it...', file=sys.stderr)
- continue
-
- # work out how many reads to simulate
- read_pairs = int(0.5 * options.coverage * len(ref) / options.readlength)
-
- # it's possible that we pick the same fragment twice, in which case the
- # reads would get the same name. So remember the frag coords
- used_fragments = {} # (middle_position, length) => count
-
- # do the simulation: pick insert size from normal distribution, and
- # position in genome from uniform distribution
- x = 0
- while x < read_pairs:
- isize = int(random.normalvariate(options.mean_insert, options.insert_std))
- while isize > len(ref) or isize < options.readlength:
- isize = int(random.normalvariate(options.mean_insert, options.insert_std))
- middle_pos = random.randint(ceil(0.5 *isize), floor(len(ref) - 0.5 * isize))
- read_start1 = int(middle_pos - ceil(0.5 * isize))
- read_start2 = read_start1 + isize - options.readlength
-
- readname = ':'.join([ref.id, str(pair_counter), str(read_start1+1), str(read_start2+1)])
-
- fragment = (middle_pos, isize)
- if fragment in used_fragments:
- used_fragments[fragment] += 1
- readname += '.dup.' + str(used_fragments[fragment])
- else:
- used_fragments[fragment] = 1
-
- read1 = sequences.Fastq(readname + '/1', ref.seq[read_start1:read_start1 + options.readlength], 'I' * options.readlength)
- read2 = sequences.Fastq(readname + '/2', ref.seq[read_start2:read_start2 + options.readlength], 'I' * options.readlength)
-
-
- if options.no_n and ('n' in read1.seq or 'N' in read1.seq or 'n' in read2.seq or 'N' in read2.seq):
- continue
-
- read2.revcomp()
-
- print(read1, file=fout)
- print(read2, file=fout)
-
- if options.fragments:
- frag = sequences.Fasta(readname, ref.seq[read_start1:read_start2 + options.readlength])
- print(frag, file=fout_frags)
-
- pair_counter += 1
- x += 1
-
-utils.close(fout)
-if options.fragments:
- utils.close(fout_frags)
diff --git a/src/fastaq_to_quasr_primers_file b/src/fastaq_to_quasr_primers_file
deleted file mode 100755
index 8e5bf7c..0000000
--- a/src/fastaq_to_quasr_primers_file
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
- usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.to_quasr_primers(options.infile, options.outfile)
diff --git a/src/fastaq_to_random_subset b/src/fastaq_to_random_subset
deleted file mode 100755
index b4f11c5..0000000
--- a/src/fastaq_to_random_subset
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import argparse
-import random
-from fastaq import sequences, utils
-
-parser = argparse.ArgumentParser(
- description = 'Takes a random subset of reads from a fasta/q file and optionally the corresponding read ' +
- 'from a mates file. Ouptut is interleaved if mates file given',
- usage = '%(prog)s [options] <fasta/q in> <outfile> <probablilty of keeping read (pair) in [0,100]>')
-parser.add_argument('--mate_file', help='Name of fasta/q mates file')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of fasta/q output file')
-parser.add_argument('probability', type=int, help='Probability of keeping any given read (pair) in [0,100]', metavar='INT')
-options = parser.parse_args()
-
-seq_reader = sequences.file_reader(options.infile)
-fout = utils.open_file_write(options.outfile)
-
-if options.mate_file:
- mate_seq_reader = sequences.file_reader(options.mate_file)
-
-for seq in seq_reader:
- if options.mate_file:
- try:
- mate_seq = next(mate_seq_reader)
- except StopIteration:
- print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
- sys.exit(1)
- if random.randint(0, 100) <= options.probability:
- print(seq, file=fout)
- if options.mate_file:
- print(mate_seq, file=fout)
-
-utils.close(fout)
diff --git a/src/fastaq_to_tiling_bam b/src/fastaq_to_tiling_bam
deleted file mode 100755
index 9b9738d..0000000
--- a/src/fastaq_to_tiling_bam
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import sys
-import os
-from fastaq import sequences, utils
-
-parser = argparse.ArgumentParser(
- description = 'Takes a fasta/q file. Makes a BAM file containing perfect (unpaired) reads tiling the whole genome',
- usage = '%(prog)s [options] <fasta/q in> <read length> <read step> <read prefix> <out.bam>',
- epilog = 'Important: assumes that samtools is in your path')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('read_length', type=int, help='Length of reads')
-parser.add_argument('read_step', type=int, help='Distance between start of each read')
-parser.add_argument('read_prefix', help='Prefix of read names')
-parser.add_argument('outfile', help='Name of output BAM file')
-parser.add_argument('--read_group', help='Add the given read group ID to all reads [%(default)s]' ,default='42')
-options = parser.parse_args()
-
-# make a header first - we need to add the @RG line to the default header made by samtools
-tmp_empty_file = options.outfile + '.tmp.empty'
-f = utils.open_file_write(tmp_empty_file)
-utils.close(f)
-try:
- f = os.popen('samtools view -H -T ' + options.infile + ' ' + tmp_empty_file)
-except IOError:
- print('Error making tmp header file', file=sys.stderr)
- sys.exit(1)
-
-header_lines = f.readlines()
-header_lines.append('@RG\tID:' + options.read_group + '\tSM:FAKE')
-f.close()
-os.unlink(tmp_empty_file)
-
-seq_reader = sequences.file_reader(options.infile)
-try:
- f = os.popen('samtools view -hbS - > ' + options.outfile, 'w')
-except IOError:
- print("Error opening for writing BAM file '" + options.outfile + "'", file=sys.stderr)
- sys.exit(1)
-
-print(''.join(header_lines), file=f)
-
-for seq in seq_reader:
- end_range = len(seq)
- if len(seq) < options.read_length:
- end_range = 1
- for i in range(0, end_range, options.read_step):
- if len(seq) <= options.read_length:
- start = 0
- end = len(seq) - 1
- else:
- start = i
- end = start + options.read_length - 1
-
- if end > len(seq) - 1:
- end = len(seq) - 1
- start = end - options.read_length + 1
-
- read = sequences.Fastq(options.read_prefix + ':' + seq.id + ':' + str(start + 1) + ':' + str(end + 1), seq[start:end+1], 'I' * (end - start + 1))
-
- print ('\t'.join([read.id,
- '0',
- seq.id,
- str(start + 1),
- '60',
- str(len(read)) + 'M',
- '*',
- '*',
- '*',
- read.seq,
- read.qual,
- 'RG:Z:' + options.read_group]), file=f)
-
- if end == len(seq) - 1:
- break
-
-f.close()
-
diff --git a/src/fastaq_to_unique_by_id b/src/fastaq_to_unique_by_id
deleted file mode 100755
index e743a92..0000000
--- a/src/fastaq_to_unique_by_id
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Removes duplicate sequences from a fasta/q file, based on their names. If the same name is found more than once, then the longest sequence is kept. Order of sequences is preserved in output',
- usage = '%(prog)s <infile> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.to_unique_by_id(options.infile, options.outfile)
diff --git a/src/fastaq_translate b/src/fastaq_translate
deleted file mode 100755
index 9ec04c7..0000000
--- a/src/fastaq_translate
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Translates all sequences in a fasta or fastq file. Output is always fasta format',
- usage = '%(prog)s <in.fasta/q> <out.fasta>')
-parser.add_argument('--frame', type=int, choices=[0,1,2], help='Frame to translate [%(default)s]', default=0)
-parser.add_argument('infile', help='Name of fasta/q file to be translated', metavar='in.fasta/q')
-parser.add_argument('outfile', help='Name of output fasta file', metavar='out.fasta')
-options = parser.parse_args()
-tasks.translate(options.infile, options.outfile, frame=options.frame)
diff --git a/src/fastaq_trim_Ns_at_end b/src/fastaq_trim_Ns_at_end
deleted file mode 100755
index 200d71f..0000000
--- a/src/fastaq_trim_Ns_at_end
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Trims any Ns off each sequence in a fasta/q file. Does nothing to gaps in the middle, just trims the ends',
- usage = '%(prog)s [options] <fasta/q in> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.trim_Ns_at_end(options.infile, options.outfile)
diff --git a/src/fastaq_trim_ends b/src/fastaq_trim_ends
deleted file mode 100755
index ffc662d..0000000
--- a/src/fastaq_trim_ends
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
- description = 'Trims set number of bases off each sequence in a fasta/q file',
- usage = '%(prog)s [options] <fasta/q in> <bases off start> <bases off end> <fasta/q out>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('start_trim', type=int, help='Number of bases to trim off start')
-parser.add_argument('end_trim', type=int, help='Number of bases to trim off end')
-parser.add_argument('outfile', help='Name of output fasta/q file')
-options = parser.parse_args()
-tasks.trim(options.infile, options.outfile, options.start_trim, options.end_trim)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list