[med-svn] [Git][med-team/seqmagick][master] 5 commits: New upstream version 0.8.6
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sun Jun 11 10:25:05 BST 2023
Étienne Mollier pushed to branch master at Debian Med / seqmagick
Commits:
6372a06e by Étienne Mollier at 2023-06-11T11:10:49+02:00
New upstream version 0.8.6
- - - - -
d54d9d22 by Étienne Mollier at 2023-06-11T11:10:49+02:00
routine-update: New upstream version
- - - - -
f169584f by Étienne Mollier at 2023-06-11T11:10:51+02:00
Update upstream source from tag 'upstream/0.8.6'
Update to upstream version '0.8.6'
with Debian dir cb25a2d49f550fb644884b32db8713c847cb19bd
- - - - -
69f37050 by Étienne Mollier at 2023-06-11T11:10:51+02:00
routine-update: Standards-Version: 4.6.2
- - - - -
da784792 by Étienne Mollier at 2023-06-11T11:13:19+02:00
routine-update: Ready to upload to unstable
- - - - -
8 changed files:
- debian/changelog
- debian/control
- docs/conf.py
- docs/index.rst
- − docs/primer_trim.rst
- seqmagick/subcommands/__init__.py
- − seqmagick/subcommands/primer_trim.py
- − seqmagick/test/test_primer_trim.py
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+seqmagick (0.8.6-1) unstable; urgency=medium
+
+ * New upstream version
+ * Standards-Version: 4.6.2 (routine-update)
+
+ -- Étienne Mollier <emollier at debian.org> Sun, 11 Jun 2023 11:11:40 +0200
+
seqmagick (0.8.4-3) unstable; urgency=medium
[ Étienne Mollier ]
=====================================
debian/control
=====================================
@@ -11,7 +11,7 @@ Build-Depends: debhelper-compat (= 13),
python3-pytest <!nocheck>,
python3-biopython,
python3-pygtrie
-Standards-Version: 4.6.0
+Standards-Version: 4.6.2
Vcs-Browser: https://salsa.debian.org/med-team/seqmagick
Vcs-Git: https://salsa.debian.org/med-team/seqmagick.git
Homepage: https://github.com/fhcrc/seqmagick/
=====================================
docs/conf.py
=====================================
@@ -64,7 +64,6 @@ subcommands = [
'convert',
'extract-ids',
'info',
- 'primer-trim',
'quality-filter',
]
=====================================
docs/index.rst
=====================================
@@ -84,7 +84,6 @@ List of Subcommands
extract_ids
info
quality_filter
- primer_trim
Supported File Extensions
=========================
=====================================
docs/primer_trim.rst deleted
=====================================
@@ -1,7 +0,0 @@
-``primer-trim``
----------------
-
-``primer-trim`` trims an alignment to a region defined by a set of forward and
-reverse primers. Usage is as follows:
-
-.. literalinclude:: primer_trim.help
=====================================
seqmagick/subcommands/__init__.py
=====================================
@@ -1,4 +1,4 @@
-commands = 'convert', 'info', 'mogrify', 'primer_trim', 'quality_filter', \
+commands = 'convert', 'info', 'mogrify', 'quality_filter', \
'extract_ids', 'backtrans_align'
=====================================
seqmagick/subcommands/primer_trim.py deleted
=====================================
@@ -1,328 +0,0 @@
-"""
-Find a primer sequence in a gapped alignment, trim to amplicon
-"""
-import argparse
-import itertools
-import logging
-import operator
-import sys
-
-from Bio import SeqIO, pairwise2
-from Bio.Seq import Seq
-
-from seqmagick import transform, fileformat
-
-from . import common
-
-
-def build_parser(parser):
- parser.add_argument(
- 'source_file', type=argparse.FileType('r'), help="Source alignment file")
- parser.add_argument(
- 'output_file', type=argparse.FileType('w'), help="Destination trimmed file")
- parser.add_argument(
- 'forward_primer', type=iupac_ambiguous_sequence,
- help="The forward primer used")
- parser.add_argument(
- 'reverse_primer', type=iupac_ambiguous_sequence,
- help="""The reverse primer used. By default the reverse primer
- is assumed to be a subsequence of the top strand (that is,
- the reverse complement of an actual downstream PCR
- primer). Use --reverse-is-revcomp if this is not the
- case.""")
- parser.add_argument(
- '--reverse-is-revcomp', dest="reverse_complement", default=False,
- action='store_true', help="""Reverse primer is written as the
- reverse complement of the top strand (default:
- %(default)s)""")
- parser.add_argument(
- '--source-format', default=None,
- help='Alignment format (default: detect from extension')
- parser.add_argument(
- '--output-format', default=None,
- help='Alignment format (default: detect from extension')
- parser.add_argument(
- '--include-primers', action="store_true", default=False,
- help="""Include the primers in the output (default: %(default)s)""")
- parser.add_argument(
- '--max-hamming-distance', type=common.positive_value(int), default=1,
- help="""Maximum Hamming
- distance between primer and alignment site (default: %(default)s).
- IUPAC ambiguous bases in the primer matching unambiguous bases in
- the alignment are not penalized""")
- parser.add_argument(
- '--prune-action', default='trim', choices=list(_ACTIONS.keys()),
- help="""Action to take. Options are trim (trim to the region
- defined by the two primers, decreasing the width of the alignment),
- or isolate (convert all characters outside the primer-defined area
- to gaps). default: %(default)s""")
-
-
-# Sequence-related functions
-def ungap_index_map(sequence, gap_chars='-'):
- """
- Returns a dict mapping from an index in the ungapped sequence to an index
- in the gapped sequence.
-
- >>> ungap_index_map('AC-TG-')
- {0: 0, 1: 1, 2: 3, 3: 4}
- """
- counter = itertools.count(0).__next__
- ungap_indexes = [
- counter() if c not in gap_chars else None for c in iter(sequence)
- ]
- return dict(
- (ungapped, gapped)
- for ungapped, gapped in zip(ungap_indexes, range(len(sequence)))
- if ungapped is not None)
-
-
-def gap_index_map(sequence, gap_chars='-'):
- """
- Opposite of ungap_index_map: returns mapping from gapped index to ungapped
- index.
-
- >>> gap_index_map('AC-TG-')
- {0: 0, 1: 1, 3: 2, 4: 3}
- """
- return dict(
- (v, k) for k, v in list(ungap_index_map(sequence, gap_chars).items()))
-
-
-def _iupac_ambiguous_equal(ambig_base, unambig_base):
- """
- Tests two bases for equality, accounting for IUPAC ambiguous DNA
-
- ambiguous base may be IUPAC ambiguous, unambiguous must be one of ACGT
- """
- iupac_translation = {
- 'A': 'A',
- 'C': 'C',
- 'G': 'G',
- 'T': 'T',
- 'U': 'U',
- 'R': 'AG',
- 'Y': 'CT',
- 'S': 'GC',
- 'W': 'AT',
- 'K': 'GT',
- 'M': 'AC',
- 'B': 'CGT',
- 'D': 'AGT',
- 'H': 'ACT',
- 'V': 'ACG',
- 'N': 'ACGT',
- '-': '-'
- }
- for i in (ambig_base, unambig_base):
- if not len(i) == 1:
- raise ValueError("only one base may be passed.")
-
- return unambig_base.upper() in iupac_translation[ambig_base.upper()]
-
-
-def hamming_distance(s1, s2, equality_function=operator.eq):
- """
- Returns the hamming distance between two strings.
- """
- if not len(s1) == len(s2):
- raise ValueError("String lengths are not equal")
-
- # Number of non-matching characters:
- return sum(not equality_function(c1, c2) for c1, c2 in zip(s1, s2))
-
-
-class PrimerNotFound(Exception):
- pass
-
-
-class PrimerOrderError(Exception):
- def __init__(self, forward_indexes, reverse_indexes):
- super(PrimerOrderError, self).__init__(
- "Reverse primer before forward primer: {0} > {1}".format(
- forward_indexes, reverse_indexes))
-
-
-class PrimerAligner(object):
- """
- Get positions of pairwise alignments of a primer to a sequence.
- """
-
- def __init__(self, primer, match=5, difference=-4, gap_open=-10,
- gap_extend=-0.5, penalize_end_gaps=False):
- self.primer = primer
- self.match = match
- self.difference = difference
- self.gap_open = gap_open
- self.gap_extend = gap_extend
- self.penalize_end_gaps = penalize_end_gaps
-
- def align(self, sequence):
- """
- Aligns the primer to the given query sequence, returning a tuple of:
-
- hamming_distance, start, end
-
- Where hamming distance is the distance between the primer and aligned
- sequence, and start and end give the start and end index of the primer
- relative to the input sequence.
- """
- seq_aln, primer_aln, score, start, end = pairwise2.align.globalms(
- str(sequence).upper(), str(self.primer).upper(),
- self.match, self.difference, self.gap_open,
- self.gap_extend, one_alignment_only=True,
- penalize_end_gaps=self.penalize_end_gaps)[0]
-
- # Get an ungapped mapping on the sequence
- index_map = gap_index_map(seq_aln)
- ungap_map = ungap_index_map(primer_aln)
-
- # Trim to primer
- start = ungap_map[0]
- end = ungap_map[len(self.primer) - 1]
-
- trimmed = seq_aln[start:end + 1]
-
- ham_dist = hamming_distance(primer_aln[start:end + 1], trimmed,
- _iupac_ambiguous_equal)
- # assert primer_aln[start:end].replace('-', '') == str(self.primer)
-
- # TODO: handle start or end being gap better. For now, just give up
- # and return maxint for the hamming distance
- if trimmed.endswith('-'):
- tail = len(trimmed) - len(trimmed.rstrip('-'))
- end = index_map[end - tail] + 1
- ham_dist = sys.maxsize
- else:
- end = index_map[end]
- if trimmed.startswith('-'):
- start = 0
- ham_dist = sys.maxsize
- else:
- start = index_map[start]
-
- return ham_dist, start, end
-
- @property
- def max_score(self):
- """
- Maximum possible alignment score
- """
- return len(self.primer) * self.match
-
-
-# Types for argparse
-def iupac_ambiguous_sequence(string):
- return Seq(string, IUPAC.ambiguous_dna)
-
-
-def locate_primers(sequences, forward_primer, reverse_primer,
- reverse_complement, max_hamming_distance):
- """
- Find forward and reverse primers in a set of sequences, return two tuples:
- (forward_start, forward_end), (reverse_start, reverse_end)
- """
- forward_loc = None
- reverse_loc = None
- seq_length = None
-
- # Reverse complement the reverse primer, if appropriate
- if reverse_complement:
- reverse_primer = reverse_primer.reverse_complement()
-
- forward_aligner = PrimerAligner(forward_primer)
- reverse_aligner = PrimerAligner(reverse_primer)
-
- for i, sequence in enumerate(sequences):
- if seq_length is None:
- seq_length = len(sequence)
- elif len(sequence) != seq_length:
- raise ValueError(("Sequence Length Heterogeneity: {0} != {1}. "
- "Is this an alignment?").format(
- len(sequence), seq_length))
- index_map = ungap_index_map(sequence.seq)
- if forward_loc is None:
- ham_dist, start, end = forward_aligner.align(sequence.seq.ungap())
- if ham_dist <= max_hamming_distance:
- forward_loc = index_map[start], index_map[end]
- logging.info("Forward in sequence %d: indexes %d to %d", i + 1,
- *forward_loc)
- if reverse_loc is None:
- ham_dist, start, end = reverse_aligner.align(sequence.seq.ungap())
- if ham_dist <= max_hamming_distance:
- reverse_loc = index_map[start], index_map[end]
- logging.info("Reverse in sequence %d: indexes %d to %d", i + 1,
- *reverse_loc)
- if forward_loc and reverse_loc:
- # Both found
- # Check order
- if forward_loc[0] > reverse_loc[0]:
- raise PrimerOrderError(forward_loc[0], reverse_loc[0])
- return forward_loc, reverse_loc
- else:
- logging.debug(
- "Sequence %d: %d/2 primers found", i + 1,
- sum(j is not None for j in (forward_loc, reverse_loc)))
-
- # Did not find either the forward or reverse primer:
- if not forward_loc:
- raise PrimerNotFound(forward_primer)
- else:
- raise PrimerNotFound(reverse_primer)
-
-
-def trim(sequences, start, end):
- """
- Slice the input sequences from start to end
- """
- logging.info("Trimming from %d to %d", start, end)
- return (sequence[start:end] for sequence in sequences)
-
-
-# Prune actions
-_ACTIONS = {'trim': trim, 'isolate': transform.isolate_region}
-
-
-def action(arguments):
- """
- Trim the alignment as specified
- """
- # Determine file format for input and output
- source_format = (arguments.source_format or
- fileformat.from_handle(arguments.source_file))
- output_format = (arguments.output_format or
- fileformat.from_handle(arguments.output_file))
-
- # Load the alignment
- with arguments.source_file:
- sequences = SeqIO.parse(
- arguments.source_file,
- source_format)
-
- # Locate primers
- (forward_start, forward_end), (reverse_start, reverse_end) = locate_primers(
- sequences, arguments.forward_primer,
- arguments.reverse_primer, arguments.reverse_complement,
- arguments.max_hamming_distance)
-
- # Generate slice indexes
- if arguments.include_primers:
- start = forward_start
- end = reverse_end + 1
- else:
- start = forward_end + 1
- end = reverse_start
-
- # Rewind the input file
- arguments.source_file.seek(0)
- sequences = SeqIO.parse(
- arguments.source_file,
- source_format)
-
- # Apply the transformation
- prune_action = _ACTIONS[arguments.prune_action]
- transformed_sequences = prune_action(sequences, start, end)
-
- with arguments.output_file:
- SeqIO.write(transformed_sequences, arguments.output_file,
- output_format)
=====================================
seqmagick/test/test_primer_trim.py deleted
=====================================
@@ -1,116 +0,0 @@
-"""
-Tests for primer trim
-"""
-import unittest
-
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
-
-from seqmagick.subcommands import primer_trim
-
-
-class PrimerAlignerTestCase(unittest.TestCase):
- def setUp(self):
- self.primer = 'AACTGCATTTGAATGG'
- self.instance = primer_trim.PrimerAligner(
- self.primer, match=5.0, gap_open=-10.0)
-
- def test_max_score(self):
- self.assertEqual(len(self.primer) * 5.0, self.instance.max_score)
-
- def test_align_exact(self):
- sequence = ('ACTCTGTGTCACTTTAAACTGCATTTGAATGGAAGAGTAATAGTAGCAATAACGGCA'
- 'CTGATCAG')
- hamming_distance, start, end = self.instance.align(sequence)
- self.assertEqual(0, hamming_distance)
- self.assertEqual(16, start)
- self.assertEqual(31, end)
-
- def test_align_gap(self):
- sequence = ('ACTCTGTGTCACTTTAAACTGCATTGAATGGAAGAGTAATAGTAGCAATAACGGCA'
- 'CTGATCAG')
- hamming_distance, start, end = self.instance.align(sequence)
- expected_distance = 1
- self.assertEqual(expected_distance, hamming_distance)
- self.assertEqual(16, start)
- self.assertEqual(30, end)
-
-
-class HammingDistanceTestCase(unittest.TestCase):
- def test_unequal_length(self):
- s1 = 'test'
- s2 = 'te'
- self.assertRaises(ValueError, primer_trim.hamming_distance, s1, s2)
-
- def test_no_difference(self):
- s1 = s2 = 'test'
- self.assertEqual(0, primer_trim.hamming_distance(s1, s2))
-
- def test_all_different(self):
- s1 = 'test'
- s2 = 'ACGT'
- self.assertEqual(4, primer_trim.hamming_distance(s1, s2))
-
- def test_basic(self):
- s1 = 'ACGT'
- s2 = 'AGGT'
- self.assertEqual(1, primer_trim.hamming_distance(s1, s2))
-
- def test_ambiguous(self):
- s1 = 'ACYT'
- s2 = 'ACCT'
- self.assertEqual(0,
- primer_trim.hamming_distance(
- s1, s2, primer_trim._iupac_ambiguous_equal))
- s2 = 'ACTT'
- self.assertEqual(0,
- primer_trim.hamming_distance(
- s1, s2, primer_trim._iupac_ambiguous_equal))
-
-
-def _alignment_record(sequence):
- return SeqRecord(Seq(sequence))
-
-
-class LocatePrimersTestCase(unittest.TestCase):
- """
- Test for locate primers
- """
-
- def setUp(self):
- self.sequences = [_alignment_record('--A--ACTGGACGTATTC-CCCC')]
-
- def test_basic(self):
- forward = 'TGG'
- reverse = 'TTC'
-
- forward_idx, reverse_idx = primer_trim.locate_primers(
- self.sequences, forward, reverse, False, 1)
-
- self.assertEqual((7, 9), forward_idx)
- self.assertEqual((15, 17), reverse_idx)
-
- def test_no_forward(self):
- forward = 'GGGGGG'
- reverse = 'TTC'
- self.assertRaises(primer_trim.PrimerNotFound,
- primer_trim.locate_primers, self.sequences, forward,
- reverse, False, 1)
-
- def test_no_reverse(self):
- forward = 'TGG'
- reverse = 'GGGG'
- self.assertRaises(primer_trim.PrimerNotFound,
- primer_trim.locate_primers, self.sequences, forward,
- reverse, False, 1)
-
- def test_bad_order(self):
- """
- Should fail if reverse primer occurs before forward primer
- """
- reverse = 'TGG'
- forward = 'TTC'
-
- self.assertRaises(primer_trim.PrimerOrderError,
- primer_trim.locate_primers, self.sequences, forward,
- reverse, False, 1)
View it on GitLab: https://salsa.debian.org/med-team/seqmagick/-/compare/e0e4a53c9741c8511b32a59d47b3859074d5c810...da7847924ed44a681a30f9c969f5ba17ca512615
--
View it on GitLab: https://salsa.debian.org/med-team/seqmagick/-/compare/e0e4a53c9741c8511b32a59d47b3859074d5c810...da7847924ed44a681a30f9c969f5ba17ca512615
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230611/358b2a9b/attachment-0001.htm>
More information about the debian-med-commit
mailing list