[med-svn] [fastaq] 01/03: Imported Upstream version 3.9.0
Sascha Steinbiss
sascha-guest at moszumanska.debian.org
Thu Oct 15 05:12:36 UTC 2015
This is an automated email from the git hooks/post-receive script.
sascha-guest pushed a commit to branch master
in repository fastaq.
commit b7d2572be8eeca7f500a8dd461f3792a0afc705e
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Wed Oct 14 19:32:21 2015 +0000
Imported Upstream version 3.9.0
---
pyfastaq/common.py | 2 +-
pyfastaq/tasks.py | 23 ++++++++++++++++++++++
.../data/tasks_test_length_offsets_from_fai.fa | 6 ++++++
.../data/tasks_test_length_offsets_from_fai.fa.fai | 3 +++
pyfastaq/tests/tasks_test.py | 8 ++++++++
setup.py | 2 +-
6 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/pyfastaq/common.py b/pyfastaq/common.py
index f19280d..9b9d5ab 100644
--- a/pyfastaq/common.py
+++ b/pyfastaq/common.py
@@ -1 +1 @@
-version = '3.8.0'
+version = '3.9.0'
diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py
index 9518176..41755c6 100644
--- a/pyfastaq/tasks.py
+++ b/pyfastaq/tasks.py
@@ -680,6 +680,29 @@ def lengths_from_fai(fai_file, d):
utils.close(f)
+def length_offsets_from_fai(fai_file):
+ '''Returns a dictionary of positions of the start of each sequence, as
+ if all the sequences were catted into one sequence.
+ eg if file has three sequences, seq1 10bp, seq2 30bp, seq3 20bp, then
+ the output would be: {'seq1': 0, 'seq2': 10, 'seq3': 40}'''
+ positions = {}
+ total_length = 0
+ f = utils.open_file_read(fai_file)
+
+ for line in f:
+ try:
+ (name, length) = line.rstrip().split()[:2]
+ length = int(length)
+ except:
+ raise Error('Error reading the following line of fai file ' + fai_file + '\n' + line)
+
+ positions[name] = total_length
+ total_length += length
+
+ utils.close(f)
+ return positions
+
+
def split_by_base_count(infile, outfiles_prefix, max_bases, max_seqs=None):
'''Splits a fasta/q file into separate files, file size determined by number of bases.
diff --git a/pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa b/pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa
new file mode 100644
index 0000000..7ba49bf
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa
@@ -0,0 +1,6 @@
+>seq1
+ACGCTCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGA
+>seq2
+A
+>seq3
+ACGTGTT
diff --git a/pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa.fai b/pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa.fai
new file mode 100644
index 0000000..c1c0e22
--- /dev/null
+++ b/pyfastaq/tests/data/tasks_test_length_offsets_from_fai.fa.fai
@@ -0,0 +1,3 @@
+seq1 42 6 42 43
+seq2 1 55 1 2
+seq3 7 63 7 8
diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py
index 7527160..0bae920 100644
--- a/pyfastaq/tests/tasks_test.py
+++ b/pyfastaq/tests/tasks_test.py
@@ -435,6 +435,14 @@ class TestLengthsFromFai(unittest.TestCase):
self.assertEqual(int(i), d[i])
+class TestLengthOffsetsFromFai(unittest.TestCase):
+ def test_length_offsets_from_fai(self):
+ '''Test length_offsets_from_fai'''
+ got = tasks.length_offsets_from_fai(os.path.join(data_dir, 'tasks_test_length_offsets_from_fai.fa.fai'))
+ expected = {'seq1': 0, 'seq2': 42, 'seq3': 43}
+ self.assertEqual(expected, got)
+
+
class TestSplit(unittest.TestCase):
def test_split_by_base_count(self):
'''Check that fasta/q files get split by base count correctly'''
diff --git a/setup.py b/setup.py
index 03a237d..eca24c8 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name='pyfastaq',
- version='3.8.0',
+ version='3.9.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list