[med-svn] [fastaq] 01/03: Imported Upstream version 3.8.0

Sascha Steinbiss sascha-guest at moszumanska.debian.org
Sat Sep 26 15:22:58 UTC 2015


This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository fastaq.

commit 07caf61f1c3a84c73ce4170adffd6bbdc44236c9
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Sat Sep 26 14:42:02 2015 +0000

    Imported Upstream version 3.8.0
---
 pyfastaq/common.py                                 |  2 +-
 pyfastaq/tasks.py                                  | 29 ++++++++++++++++++++
 .../data/tasks_test_stats_from_fai.in.empty.fai    |  0
 .../tests/data/tasks_test_stats_from_fai.in.fai    |  5 ++++
 pyfastaq/tests/tasks_test.py                       | 31 ++++++++++++++++++++++
 setup.py                                           |  2 +-
 6 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/pyfastaq/common.py b/pyfastaq/common.py
index cef5770..f19280d 100644
--- a/pyfastaq/common.py
+++ b/pyfastaq/common.py
@@ -1 +1 @@
-version = '3.7.0'
+version = '3.8.0'
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index 4199a9e..9518176 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -821,6 +821,35 @@ def strip_illumina_suffix(infile, outfile):
     utils.close(f_out)
 
 
+def stats_from_fai(infile):
+    '''Returns dictionary of length stats from an fai file. Keys are: longest, shortest, mean, total_length, N50, number'''
+    f = utils.open_file_read(infile)
+    try:
+        lengths = sorted([int(line.split('\t')[1]) for line in f], reverse=True)
+    except:
+        raise Error('Error getting lengths from fai file ' + infile)
+    utils.close(f)
+
+    stats = {}
+    if len(lengths) > 0:
+        stats['longest'] = max(lengths)
+        stats['shortest'] = min(lengths)
+        stats['total_length'] = sum(lengths)
+        stats['mean'] = stats['total_length'] / len(lengths)
+        stats['number'] = len(lengths)
+ 
+        cumulative_length = 0
+        for length in lengths:
+            cumulative_length += length
+            if cumulative_length >= 0.5 * stats['total_length']:
+                stats['N50'] = length
+                break
+    else:
+        stats = {x: 0 for x in ('longest', 'shortest', 'mean', 'N50', 'total_length', 'number')}
+           
+    return stats
+
+
 def to_boulderio(infile, outfile):
     '''Converts input sequence file into a "Boulder-IO format", as used by primer3'''
     seq_reader = sequences.file_reader(infile)
diff --git a/pyfastaq/tests/data/tasks_test_stats_from_fai.in.empty.fai b/pyfastaq/tests/data/tasks_test_stats_from_fai.in.empty.fai
new file mode 100644
index 0000000..e69de29
diff --git a/pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai b/pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai
new file mode 100644
index 0000000..7c64d73
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai
@@ -0,0 +1,5 @@
+seq1	3	6	3	4
+seq2	4	16	4	5
+seq3	10	27	10	11
+seq4	3	44	3	4
+seq5	1	54	1	2
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index db367f2..7527160 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -589,6 +589,37 @@ class TestStripIlluminaSuffix(unittest.TestCase):
         os.unlink(tmpfile)
 
 
+class TestStatsFromFai(unittest.TestCase):
+    def test_stats_from_fai_nonempty(self):
+        '''Test task stats_from_fai non-empty file'''
+        infile = os.path.join(data_dir, 'tasks_test_stats_from_fai.in.fai')
+        got = tasks.stats_from_fai(infile)
+        expected = {
+            'longest': 10,
+            'shortest': 1,
+            'N50': 4,
+            'mean': 4.2,
+            'number': 5,
+            'total_length': 21
+        }
+        self.assertEqual(expected, got)
+
+
+    def test_stats_from_fai_empty(self):
+        '''Test task stats_from_fai empty file'''
+        infile = os.path.join(data_dir, 'tasks_test_stats_from_fai.in.empty.fai')
+        got = tasks.stats_from_fai(infile)
+        expected = {
+            'longest': 0,
+            'shortest': 0,
+            'N50': 0,
+            'mean': 0,
+            'number': 0,
+            'total_length': 0
+        }
+        self.assertEqual(expected, got)
+
+
 class TestToBoulderio(unittest.TestCase):
     def test_to_boulderio(self):
         '''Test task to_boulderio'''
diff --git a/setup.py b/setup.py
index 23a26e6..03a237d 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
 
 setup(
     name='pyfastaq',
-    version='3.7.0',
+    version='3.8.0',
     description='Script to manipulate FASTA and FASTQ files, plus API for developers',
     packages = find_packages(),
     author='Martin Hunt',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git



More information about the debian-med-commit mailing list