[med-svn] [fastaq] 02/04: Revert "Revert "Merge tag 'upstream/3.15.0'""

Sun Jun 18 17:31:08 UTC 2017

This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository fastaq.

commit 11f33d4fe0209ef719ed2e95748b130362a517df
Author: Sascha Steinbiss <satta at debian.org>
Date:   Sun Jun 18 17:21:40 2017 +0000

    Revert "Revert "Merge tag 'upstream/3.15.0'""
    
    This reverts commit fbcbefdb8d1c46dc72186a52466ecc15216dd975.
---
 README.md                                          |  1 +
 pyfastaq/runners/make_random_contigs.py            |  2 +-
 pyfastaq/runners/sort_by_name.py                   | 14 ++++++++++++++
 pyfastaq/tasks.py                                  | 12 ++++++++++++
 pyfastaq/tests/data/tasks_test_sort_by_name.in.fa  | 16 ++++++++++++++++
 pyfastaq/tests/data/tasks_test_sort_by_name.out.fa | 16 ++++++++++++++++
 pyfastaq/tests/tasks_test.py                       |  9 +++++++++
 scripts/fastaq                                     |  3 ++-
 setup.py                                           |  2 +-
 9 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index c17c54f..675cb2f 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,7 @@ Available commands
 | scaffolds_to_contigs  | Creates a file of contigs from a file of scaffolds                   |
 | search_for_seq        | Find all exact matches to a string (and its reverse complement)      |
 | sequence_trim         | Trim exact matches to a given string off the start of every sequence |
+| sort_by_name          | Sorts sequences in lexographical (name) order                        |
 | sort_by_size          | Sorts sequences in length order                                      |
 | split_by_base_count   | Split multi sequence file into separate files                        |
 | strip_illumina_suffix | Strips /1 or /2 off the end of every read name                       |
diff --git a/pyfastaq/runners/make_random_contigs.py b/pyfastaq/runners/make_random_contigs.py
index 5337120..6b5febb 100644
--- a/pyfastaq/runners/make_random_contigs.py
+++ b/pyfastaq/runners/make_random_contigs.py
@@ -9,7 +9,7 @@ def run(description):
     parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z')
     parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='')
     parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None)
-    parser.add_argument('contigs', type=int, help='Nunber of contigs to make')
+    parser.add_argument('contigs', type=int, help='Number of contigs to make')
     parser.add_argument('length', type=int, help='Length of each contig')
     parser.add_argument('outfile', help='Name of output file')
     options = parser.parse_args()
diff --git a/pyfastaq/runners/sort_by_name.py b/pyfastaq/runners/sort_by_name.py
new file mode 100644
index 0000000..f57911f
--- /dev/null
+++ b/pyfastaq/runners/sort_by_name.py
@@ -0,0 +1,14 @@
+import argparse
+from pyfastaq import tasks
+
+def run(description):
+    parser = argparse.ArgumentParser(
+        description = description,
+        usage = 'fastaq sort_by_name <infile> <outfile>')
+    parser.add_argument('infile', help='Name of input file')
+    parser.add_argument('outfile', help='Name of output file')
+    options = parser.parse_args()
+    tasks.sort_by_name(
+        options.infile,
+        options.outfile
+    )
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index 3107672..b788672 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -556,6 +556,18 @@ def sort_by_size(infile, outfile, smallest_first=False):
     utils.close(fout)
 
 
+def sort_by_name(infile, outfile):
+    '''Sorts input sequence file by sort -d -k1,1, writes sorted output file.'''
+    seqs = {}
+    file_to_dict(infile, seqs)
+    #seqs = list(seqs.values())
+    #seqs.sort()
+    fout = utils.open_file_write(outfile)
+    for name in sorted(seqs):
+        print(seqs[name], file=fout)
+    utils.close(fout)
+
+
 def to_fastg(infile, outfile, circular=None):
     '''Writes a FASTG file in SPAdes format from input file. Currently only whether or not a sequence is circular is supported. Put circular=set of ids, or circular=filename to make those sequences circular in the output. Puts coverage=1 on all contigs'''
     if circular is None:
diff --git a/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa b/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa
new file mode 100644
index 0000000..26c1d8f
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa
@@ -0,0 +1,16 @@
+>scaffold1
+AGTCA
+>scaffold2
+ACGTTT
+>scaffold10
+A
+>scaffold12
+ACG
+>contig1
+AGTCA
+>contig2
+ACGTTT
+>contig10
+A
+>contig12
+ACG
\ No newline at end of file
diff --git a/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa b/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa
new file mode 100644
index 0000000..662b583
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa
@@ -0,0 +1,16 @@
+>contig1
+AGTCA
+>contig10
+A
+>contig12
+ACG
+>contig2
+ACGTTT
+>scaffold1
+AGTCA
+>scaffold10
+A
+>scaffold12
+ACG
+>scaffold2
+ACGTTT
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index b77dbf8..5db41d4 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -595,6 +595,15 @@ class TestSortBySize(unittest.TestCase):
         self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_sort_by_size.out.rev.fa'), tmpfile, shallow=False))
         os.unlink(tmpfile)
 
+class TestSortByName(unittest.TestCase):
+    def test_sort_by_name(self):
+        '''Test sort_by_name'''
+        infile = os.path.join(data_dir, 'tasks_test_sort_by_name.in.fa')
+        tmpfile = 'tmp.sort_by_name.fa'
+        tasks.sort_by_name(infile, tmpfile)
+        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_sort_by_name.out.fa'), tmpfile, shallow=False))
+        os.unlink(tmpfile)
+
 
 class TestStripIlluminaSuffix(unittest.TestCase):
     def test_strip_illumina_suffix(self):
diff --git a/scripts/fastaq b/scripts/fastaq
index e0c470a..881af29 100755
--- a/scripts/fastaq
+++ b/scripts/fastaq
@@ -25,8 +25,9 @@ tasks = {
     'scaffolds_to_contigs':   'Creates a file of contigs from a file of scaffolds',
     'search_for_seq':         'Find all exact matches to a string (and its reverse complement)',
     'sequence_trim':          'Trim exact matches to a given string off the start of every sequence',
+    'sort_by_name':           'Sorts sequences in lexographical (name) order',  
+    'sort_by_size':           'Sorts sequences in length order', 
     'split_by_base_count':    'Split multi sequence file into separate files',
-    'sort_by_size':           'Sorts sequences in length order',
     'strip_illumina_suffix':  'Strips /1 or /2 off the end of every read name',
     'to_boulderio':           'Converts to Boulder-IO format, used by primer3',
     'to_fasta':               'Converts a variety of input formats to nicely formatted FASTA format',
diff --git a/setup.py b/setup.py
index f9a6ed2..46f813f 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
 
 setup(
     name='pyfastaq',
-    version='3.14.0',
+    version='3.15.0',
     description='Script to manipulate FASTA and FASTQ files, plus API for developers',
     packages = find_packages(),
     author='Martin Hunt',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git