[med-svn] [fastaq] 01/03: Imported Upstream version 3.8.0
Sascha Steinbiss
sascha-guest at moszumanska.debian.org
Sat Sep 26 15:22:58 UTC 2015
This is an automated email from the git hooks/post-receive script.
sascha-guest pushed a commit to branch master
in repository fastaq.
commit 07caf61f1c3a84c73ce4170adffd6bbdc44236c9
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Sat Sep 26 14:42:02 2015 +0000
Imported Upstream version 3.8.0
---
pyfastaq/common.py | 2 +-
pyfastaq/tasks.py | 29 ++++++++++++++++++++
.../data/tasks_test_stats_from_fai.in.empty.fai | 0
.../tests/data/tasks_test_stats_from_fai.in.fai | 5 ++++
pyfastaq/tests/tasks_test.py | 31 ++++++++++++++++++++++
setup.py | 2 +-
6 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/pyfastaq/common.py b/pyfastaq/common.py
index cef5770..f19280d 100644
--- a/pyfastaq/common.py
+++ b/pyfastaq/common.py
@@ -1 +1 @@
-version = '3.7.0'
+version = '3.8.0'
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index 4199a9e..9518176 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -821,6 +821,35 @@ def strip_illumina_suffix(infile, outfile):
utils.close(f_out)
+def stats_from_fai(infile):
+ '''Returns dictionary of length stats from an fai file. Keys are: longest, shortest, mean, total_length, N50, number'''
+ f = utils.open_file_read(infile)
+ try:
+ lengths = sorted([int(line.split('\t')[1]) for line in f], reverse=True)
+ except:
+ raise Error('Error getting lengths from fai file ' + infile)
+ utils.close(f)
+
+ stats = {}
+ if len(lengths) > 0:
+ stats['longest'] = max(lengths)
+ stats['shortest'] = min(lengths)
+ stats['total_length'] = sum(lengths)
+ stats['mean'] = stats['total_length'] / len(lengths)
+ stats['number'] = len(lengths)
+
+ cumulative_length = 0
+ for length in lengths:
+ cumulative_length += length
+ if cumulative_length >= 0.5 * stats['total_length']:
+ stats['N50'] = length
+ break
+ else:
+ stats = {x: 0 for x in ('longest', 'shortest', 'mean', 'N50', 'total_length', 'number')}
+
+ return stats
+
+
def to_boulderio(infile, outfile):
'''Converts input sequence file into a "Boulder-IO format", as used by primer3'''
seq_reader = sequences.file_reader(infile)
diff --git a/pyfastaq/tests/data/tasks_test_stats_from_fai.in.empty.fai b/pyfastaq/tests/data/tasks_test_stats_from_fai.in.empty.fai
new file mode 100644
index 0000000..e69de29
diff --git a/pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai b/pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai
new file mode 100644
index 0000000..7c64d73
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_stats_from_fai.in.fai
@@ -0,0 +1,5 @@
+seq1 3 6 3 4
+seq2 4 16 4 5
+seq3 10 27 10 11
+seq4 3 44 3 4
+seq5 1 54 1 2
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index db367f2..7527160 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -589,6 +589,37 @@ class TestStripIlluminaSuffix(unittest.TestCase):
os.unlink(tmpfile)
+class TestStatsFromFai(unittest.TestCase):
+ def test_stats_from_fai_nonempty(self):
+ '''Test task stats_from_fai non-empty file'''
+ infile = os.path.join(data_dir, 'tasks_test_stats_from_fai.in.fai')
+ got = tasks.stats_from_fai(infile)
+ expected = {
+ 'longest': 10,
+ 'shortest': 1,
+ 'N50': 4,
+ 'mean': 4.2,
+ 'number': 5,
+ 'total_length': 21
+ }
+ self.assertEqual(expected, got)
+
+
+ def test_stats_from_fai_empty(self):
+ '''Test task stats_from_fai empty file'''
+ infile = os.path.join(data_dir, 'tasks_test_stats_from_fai.in.empty.fai')
+ got = tasks.stats_from_fai(infile)
+ expected = {
+ 'longest': 0,
+ 'shortest': 0,
+ 'N50': 0,
+ 'mean': 0,
+ 'number': 0,
+ 'total_length': 0
+ }
+ self.assertEqual(expected, got)
+
+
class TestToBoulderio(unittest.TestCase):
def test_to_boulderio(self):
'''Test task to_boulderio'''
diff --git a/setup.py b/setup.py
index 23a26e6..03a237d 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name='pyfastaq',
- version='3.7.0',
+ version='3.8.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list