[med-svn] [fastaq] 02/04: Revert "Revert "Merge tag 'upstream/3.15.0'""
Sascha Steinbiss
satta at debian.org
Sun Jun 18 17:31:08 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository fastaq.
commit 11f33d4fe0209ef719ed2e95748b130362a517df
Author: Sascha Steinbiss <satta at debian.org>
Date: Sun Jun 18 17:21:40 2017 +0000
Revert "Revert "Merge tag 'upstream/3.15.0'""
This reverts commit fbcbefdb8d1c46dc72186a52466ecc15216dd975.
---
README.md | 1 +
pyfastaq/runners/make_random_contigs.py | 2 +-
pyfastaq/runners/sort_by_name.py | 14 ++++++++++++++
pyfastaq/tasks.py | 12 ++++++++++++
pyfastaq/tests/data/tasks_test_sort_by_name.in.fa | 16 ++++++++++++++++
pyfastaq/tests/data/tasks_test_sort_by_name.out.fa | 16 ++++++++++++++++
pyfastaq/tests/tasks_test.py | 9 +++++++++
scripts/fastaq | 3 ++-
setup.py | 2 +-
9 files changed, 72 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index c17c54f..675cb2f 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,7 @@ Available commands
| scaffolds_to_contigs | Creates a file of contigs from a file of scaffolds |
| search_for_seq | Find all exact matches to a string (and its reverse complement) |
| sequence_trim | Trim exact matches to a given string off the start of every sequence |
+| sort_by_name | Sorts sequences in lexographical (name) order |
| sort_by_size | Sorts sequences in length order |
| split_by_base_count | Split multi sequence file into separate files |
| strip_illumina_suffix | Strips /1 or /2 off the end of every read name |
diff --git a/pyfastaq/runners/make_random_contigs.py b/pyfastaq/runners/make_random_contigs.py
index 5337120..6b5febb 100644
--- a/pyfastaq/runners/make_random_contigs.py
+++ b/pyfastaq/runners/make_random_contigs.py
@@ -9,7 +9,7 @@ def run(description):
parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z')
parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='')
parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None)
- parser.add_argument('contigs', type=int, help='Nunber of contigs to make')
+ parser.add_argument('contigs', type=int, help='Number of contigs to make')
parser.add_argument('length', type=int, help='Length of each contig')
parser.add_argument('outfile', help='Name of output file')
options = parser.parse_args()
diff --git a/pyfastaq/runners/sort_by_name.py b/pyfastaq/runners/sort_by_name.py
new file mode 100644
index 0000000..f57911f
--- /dev/null
+++ b/pyfastaq/runners/sort_by_name.py
@@ -0,0 +1,14 @@
+import argparse
+from pyfastaq import tasks
+
+def run(description):
+ parser = argparse.ArgumentParser(
+ description = description,
+ usage = 'fastaq sort_by_name <infile> <outfile>')
+ parser.add_argument('infile', help='Name of input file')
+ parser.add_argument('outfile', help='Name of output file')
+ options = parser.parse_args()
+ tasks.sort_by_name(
+ options.infile,
+ options.outfile
+ )
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index 3107672..b788672 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -556,6 +556,18 @@ def sort_by_size(infile, outfile, smallest_first=False):
utils.close(fout)
+def sort_by_name(infile, outfile):
+ '''Sorts input sequence file by sort -d -k1,1, writes sorted output file.'''
+ seqs = {}
+ file_to_dict(infile, seqs)
+ #seqs = list(seqs.values())
+ #seqs.sort()
+ fout = utils.open_file_write(outfile)
+ for name in sorted(seqs):
+ print(seqs[name], file=fout)
+ utils.close(fout)
+
+
def to_fastg(infile, outfile, circular=None):
'''Writes a FASTG file in SPAdes format from input file. Currently only whether or not a sequence is circular is supported. Put circular=set of ids, or circular=filename to make those sequences circular in the output. Puts coverage=1 on all contigs'''
if circular is None:
diff --git a/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa b/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa
new file mode 100644
index 0000000..26c1d8f
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa
@@ -0,0 +1,16 @@
+>scaffold1
+AGTCA
+>scaffold2
+ACGTTT
+>scaffold10
+A
+>scaffold12
+ACG
+>contig1
+AGTCA
+>contig2
+ACGTTT
+>contig10
+A
+>contig12
+ACG
\ No newline at end of file
diff --git a/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa b/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa
new file mode 100644
index 0000000..662b583
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa
@@ -0,0 +1,16 @@
+>contig1
+AGTCA
+>contig10
+A
+>contig12
+ACG
+>contig2
+ACGTTT
+>scaffold1
+AGTCA
+>scaffold10
+A
+>scaffold12
+ACG
+>scaffold2
+ACGTTT
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index b77dbf8..5db41d4 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -595,6 +595,15 @@ class TestSortBySize(unittest.TestCase):
self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_sort_by_size.out.rev.fa'), tmpfile, shallow=False))
os.unlink(tmpfile)
+class TestSortByName(unittest.TestCase):
+ def test_sort_by_name(self):
+ '''Test sort_by_name'''
+ infile = os.path.join(data_dir, 'tasks_test_sort_by_name.in.fa')
+ tmpfile = 'tmp.sort_by_name.fa'
+ tasks.sort_by_name(infile, tmpfile)
+ self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_sort_by_name.out.fa'), tmpfile, shallow=False))
+ os.unlink(tmpfile)
+
class TestStripIlluminaSuffix(unittest.TestCase):
def test_strip_illumina_suffix(self):
diff --git a/scripts/fastaq b/scripts/fastaq
index e0c470a..881af29 100755
--- a/scripts/fastaq
+++ b/scripts/fastaq
@@ -25,8 +25,9 @@ tasks = {
'scaffolds_to_contigs': 'Creates a file of contigs from a file of scaffolds',
'search_for_seq': 'Find all exact matches to a string (and its reverse complement)',
'sequence_trim': 'Trim exact matches to a given string off the start of every sequence',
+ 'sort_by_name': 'Sorts sequences in lexographical (name) order',
+ 'sort_by_size': 'Sorts sequences in length order',
'split_by_base_count': 'Split multi sequence file into separate files',
- 'sort_by_size': 'Sorts sequences in length order',
'strip_illumina_suffix': 'Strips /1 or /2 off the end of every read name',
'to_boulderio': 'Converts to Boulder-IO format, used by primer3',
'to_fasta': 'Converts a variety of input formats to nicely formatted FASTA format',
diff --git a/setup.py b/setup.py
index f9a6ed2..46f813f 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name='pyfastaq',
- version='3.14.0',
+ version='3.15.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list