[med-svn] [Git][med-team/seqmagick][master] 5 commits: New upstream version 0.8.6

Sun Jun 11 10:25:05 BST 2023


Étienne Mollier pushed to branch master at Debian Med / seqmagick


Commits:
6372a06e by Étienne Mollier at 2023-06-11T11:10:49+02:00
New upstream version 0.8.6
- - - - -
d54d9d22 by Étienne Mollier at 2023-06-11T11:10:49+02:00
routine-update: New upstream version

- - - - -
f169584f by Étienne Mollier at 2023-06-11T11:10:51+02:00
Update upstream source from tag 'upstream/0.8.6'

Update to upstream version '0.8.6'
with Debian dir cb25a2d49f550fb644884b32db8713c847cb19bd
- - - - -
69f37050 by Étienne Mollier at 2023-06-11T11:10:51+02:00
routine-update: Standards-Version: 4.6.2

- - - - -
da784792 by Étienne Mollier at 2023-06-11T11:13:19+02:00
routine-update: Ready to upload to unstable

- - - - -


8 changed files:

- debian/changelog
- debian/control
- docs/conf.py
- docs/index.rst
- − docs/primer_trim.rst
- seqmagick/subcommands/__init__.py
- − seqmagick/subcommands/primer_trim.py
- − seqmagick/test/test_primer_trim.py


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+seqmagick (0.8.6-1) unstable; urgency=medium
+
+  * New upstream version
+  * Standards-Version: 4.6.2 (routine-update)
+
+ -- Étienne Mollier <emollier at debian.org>  Sun, 11 Jun 2023 11:11:40 +0200
+
 seqmagick (0.8.4-3) unstable; urgency=medium
 
   [ Étienne Mollier ]


=====================================
debian/control
=====================================
@@ -11,7 +11,7 @@ Build-Depends: debhelper-compat (= 13),
                python3-pytest <!nocheck>,
                python3-biopython,
                python3-pygtrie
-Standards-Version: 4.6.0
+Standards-Version: 4.6.2
 Vcs-Browser: https://salsa.debian.org/med-team/seqmagick
 Vcs-Git: https://salsa.debian.org/med-team/seqmagick.git
 Homepage: https://github.com/fhcrc/seqmagick/


=====================================
docs/conf.py
=====================================
@@ -64,7 +64,6 @@ subcommands = [
     'convert',
     'extract-ids',
     'info',
-    'primer-trim',
     'quality-filter',
 ]
 


=====================================
docs/index.rst
=====================================
@@ -84,7 +84,6 @@ List of Subcommands
    extract_ids
    info
    quality_filter
-   primer_trim
 
 Supported File Extensions
 =========================


=====================================
docs/primer_trim.rst deleted
=====================================
@@ -1,7 +0,0 @@
-``primer-trim``
----------------
-
-``primer-trim`` trims an alignment to a region defined by a set of forward and
-reverse primers. Usage is as follows:
-
-.. literalinclude:: primer_trim.help


=====================================
seqmagick/subcommands/__init__.py
=====================================
@@ -1,4 +1,4 @@
-commands = 'convert', 'info', 'mogrify', 'primer_trim', 'quality_filter', \
+commands = 'convert', 'info', 'mogrify', 'quality_filter', \
         'extract_ids', 'backtrans_align'
 
 


=====================================
seqmagick/subcommands/primer_trim.py deleted
=====================================
@@ -1,328 +0,0 @@
-"""
-Find a primer sequence in a gapped alignment, trim to amplicon
-"""
-import argparse
-import itertools
-import logging
-import operator
-import sys
-
-from Bio import SeqIO, pairwise2
-from Bio.Seq import Seq
-
-from seqmagick import transform, fileformat
-
-from . import common
-
-
-def build_parser(parser):
-    parser.add_argument(
-        'source_file', type=argparse.FileType('r'), help="Source alignment file")
-    parser.add_argument(
-        'output_file', type=argparse.FileType('w'), help="Destination trimmed file")
-    parser.add_argument(
-        'forward_primer', type=iupac_ambiguous_sequence,
-        help="The forward primer used")
-    parser.add_argument(
-        'reverse_primer', type=iupac_ambiguous_sequence,
-        help="""The reverse primer used. By default the reverse primer
-            is assumed to be a subsequence of the top strand (that is,
-            the reverse complement of an actual downstream PCR
-            primer). Use --reverse-is-revcomp if this is not the
-            case.""")
-    parser.add_argument(
-        '--reverse-is-revcomp', dest="reverse_complement", default=False,
-        action='store_true', help="""Reverse primer is written as the
-            reverse complement of the top strand (default:
-            %(default)s)""")
-    parser.add_argument(
-        '--source-format', default=None,
-        help='Alignment format (default: detect from extension')
-    parser.add_argument(
-        '--output-format', default=None,
-        help='Alignment format (default: detect from extension')
-    parser.add_argument(
-        '--include-primers', action="store_true", default=False,
-        help="""Include the primers in the output (default: %(default)s)""")
-    parser.add_argument(
-        '--max-hamming-distance', type=common.positive_value(int), default=1,
-        help="""Maximum Hamming
-            distance between primer and alignment site (default: %(default)s).
-            IUPAC ambiguous bases in the primer matching unambiguous bases in
-            the alignment are not penalized""")
-    parser.add_argument(
-        '--prune-action', default='trim', choices=list(_ACTIONS.keys()),
-        help="""Action to take. Options are trim (trim to the region
-            defined by the two primers, decreasing the width of the alignment),
-            or isolate (convert all characters outside the primer-defined area
-            to gaps). default: %(default)s""")
-
-
-# Sequence-related functions
-def ungap_index_map(sequence, gap_chars='-'):
-    """
-    Returns a dict mapping from an index in the ungapped sequence to an index
-    in the gapped sequence.
-
-    >>> ungap_index_map('AC-TG-')
-    {0: 0, 1: 1, 2: 3, 3: 4}
-    """
-    counter = itertools.count(0).__next__
-    ungap_indexes = [
-        counter() if c not in gap_chars else None for c in iter(sequence)
-    ]
-    return dict(
-        (ungapped, gapped)
-        for ungapped, gapped in zip(ungap_indexes, range(len(sequence)))
-        if ungapped is not None)
-
-
-def gap_index_map(sequence, gap_chars='-'):
-    """
-    Opposite of ungap_index_map: returns mapping from gapped index to ungapped
-    index.
-
-    >>> gap_index_map('AC-TG-')
-    {0: 0, 1: 1, 3: 2, 4: 3}
-    """
-    return dict(
-        (v, k) for k, v in list(ungap_index_map(sequence, gap_chars).items()))
-
-
-def _iupac_ambiguous_equal(ambig_base, unambig_base):
-    """
-    Tests two bases for equality, accounting for IUPAC ambiguous DNA
-
-    ambiguous base may be IUPAC ambiguous, unambiguous must be one of ACGT
-    """
-    iupac_translation = {
-        'A': 'A',
-        'C': 'C',
-        'G': 'G',
-        'T': 'T',
-        'U': 'U',
-        'R': 'AG',
-        'Y': 'CT',
-        'S': 'GC',
-        'W': 'AT',
-        'K': 'GT',
-        'M': 'AC',
-        'B': 'CGT',
-        'D': 'AGT',
-        'H': 'ACT',
-        'V': 'ACG',
-        'N': 'ACGT',
-        '-': '-'
-    }
-    for i in (ambig_base, unambig_base):
-        if not len(i) == 1:
-            raise ValueError("only one base may be passed.")
-
-    return unambig_base.upper() in iupac_translation[ambig_base.upper()]
-
-
-def hamming_distance(s1, s2, equality_function=operator.eq):
-    """
-    Returns the hamming distance between two strings.
-    """
-    if not len(s1) == len(s2):
-        raise ValueError("String lengths are not equal")
-
-    # Number of non-matching characters:
-    return sum(not equality_function(c1, c2) for c1, c2 in zip(s1, s2))
-
-
-class PrimerNotFound(Exception):
-    pass
-
-
-class PrimerOrderError(Exception):
-    def __init__(self, forward_indexes, reverse_indexes):
-        super(PrimerOrderError, self).__init__(
-            "Reverse primer before forward primer: {0} > {1}".format(
-                forward_indexes, reverse_indexes))
-
-
-class PrimerAligner(object):
-    """
-    Get positions of pairwise alignments of a primer to a sequence.
-    """
-
-    def __init__(self, primer, match=5, difference=-4, gap_open=-10,
-                 gap_extend=-0.5, penalize_end_gaps=False):
-        self.primer = primer
-        self.match = match
-        self.difference = difference
-        self.gap_open = gap_open
-        self.gap_extend = gap_extend
-        self.penalize_end_gaps = penalize_end_gaps
-
-    def align(self, sequence):
-        """
-        Aligns the primer to the given query sequence, returning a tuple of:
-
-            hamming_distance, start, end
-
-        Where hamming distance is the distance between the primer and aligned
-        sequence, and start and end give the start and end index of the primer
-        relative to the input sequence.
-        """
-        seq_aln, primer_aln, score, start, end = pairwise2.align.globalms(
-            str(sequence).upper(), str(self.primer).upper(),
-            self.match, self.difference, self.gap_open,
-            self.gap_extend, one_alignment_only=True,
-            penalize_end_gaps=self.penalize_end_gaps)[0]
-
-        # Get an ungapped mapping on the sequence
-        index_map = gap_index_map(seq_aln)
-        ungap_map = ungap_index_map(primer_aln)
-
-        # Trim to primer
-        start = ungap_map[0]
-        end = ungap_map[len(self.primer) - 1]
-
-        trimmed = seq_aln[start:end + 1]
-
-        ham_dist = hamming_distance(primer_aln[start:end + 1], trimmed,
-                                    _iupac_ambiguous_equal)
-        # assert primer_aln[start:end].replace('-', '') == str(self.primer)
-
-        # TODO: handle start or end being gap better. For now, just give up
-        # and return maxint for the hamming distance
-        if trimmed.endswith('-'):
-            tail = len(trimmed) - len(trimmed.rstrip('-'))
-            end = index_map[end - tail] + 1
-            ham_dist = sys.maxsize
-        else:
-            end = index_map[end]
-        if trimmed.startswith('-'):
-            start = 0
-            ham_dist = sys.maxsize
-        else:
-            start = index_map[start]
-
-        return ham_dist, start, end
-
-    @property
-    def max_score(self):
-        """
-        Maximum possible alignment score
-        """
-        return len(self.primer) * self.match
-
-
-# Types for argparse
-def iupac_ambiguous_sequence(string):
-    return Seq(string, IUPAC.ambiguous_dna)
-
-
-def locate_primers(sequences, forward_primer, reverse_primer,
-                   reverse_complement, max_hamming_distance):
-    """
-    Find forward and reverse primers in a set of sequences, return two tuples:
-    (forward_start, forward_end), (reverse_start, reverse_end)
-    """
-    forward_loc = None
-    reverse_loc = None
-    seq_length = None
-
-    # Reverse complement the reverse primer, if appropriate
-    if reverse_complement:
-        reverse_primer = reverse_primer.reverse_complement()
-
-    forward_aligner = PrimerAligner(forward_primer)
-    reverse_aligner = PrimerAligner(reverse_primer)
-
-    for i, sequence in enumerate(sequences):
-        if seq_length is None:
-            seq_length = len(sequence)
-        elif len(sequence) != seq_length:
-            raise ValueError(("Sequence Length Heterogeneity: {0} != {1}. "
-                              "Is this an alignment?").format(
-                                  len(sequence), seq_length))
-        index_map = ungap_index_map(sequence.seq)
-        if forward_loc is None:
-            ham_dist, start, end = forward_aligner.align(sequence.seq.ungap())
-            if ham_dist <= max_hamming_distance:
-                forward_loc = index_map[start], index_map[end]
-                logging.info("Forward in sequence %d: indexes %d to %d", i + 1,
-                             *forward_loc)
-        if reverse_loc is None:
-            ham_dist, start, end = reverse_aligner.align(sequence.seq.ungap())
-            if ham_dist <= max_hamming_distance:
-                reverse_loc = index_map[start], index_map[end]
-                logging.info("Reverse in sequence %d: indexes %d to %d", i + 1,
-                             *reverse_loc)
-        if forward_loc and reverse_loc:
-            # Both found
-            # Check order
-            if forward_loc[0] > reverse_loc[0]:
-                raise PrimerOrderError(forward_loc[0], reverse_loc[0])
-            return forward_loc, reverse_loc
-        else:
-            logging.debug(
-                "Sequence %d: %d/2 primers found", i + 1,
-                sum(j is not None for j in (forward_loc, reverse_loc)))
-
-    # Did not find either the forward or reverse primer:
-    if not forward_loc:
-        raise PrimerNotFound(forward_primer)
-    else:
-        raise PrimerNotFound(reverse_primer)
-
-
-def trim(sequences, start, end):
-    """
-    Slice the input sequences from start to end
-    """
-    logging.info("Trimming from %d to %d", start, end)
-    return (sequence[start:end] for sequence in sequences)
-
-
-# Prune actions
-_ACTIONS = {'trim': trim, 'isolate': transform.isolate_region}
-
-
-def action(arguments):
-    """
-    Trim the alignment as specified
-    """
-    # Determine file format for input and output
-    source_format = (arguments.source_format or
-                     fileformat.from_handle(arguments.source_file))
-    output_format = (arguments.output_format or
-                     fileformat.from_handle(arguments.output_file))
-
-    # Load the alignment
-    with arguments.source_file:
-        sequences = SeqIO.parse(
-            arguments.source_file,
-            source_format)
-
-        # Locate primers
-        (forward_start, forward_end), (reverse_start, reverse_end) = locate_primers(
-            sequences, arguments.forward_primer,
-            arguments.reverse_primer, arguments.reverse_complement,
-            arguments.max_hamming_distance)
-
-        # Generate slice indexes
-        if arguments.include_primers:
-            start = forward_start
-            end = reverse_end + 1
-        else:
-            start = forward_end + 1
-            end = reverse_start
-
-        # Rewind the input file
-        arguments.source_file.seek(0)
-        sequences = SeqIO.parse(
-            arguments.source_file,
-            source_format)
-
-        # Apply the transformation
-        prune_action = _ACTIONS[arguments.prune_action]
-        transformed_sequences = prune_action(sequences, start, end)
-
-        with arguments.output_file:
-            SeqIO.write(transformed_sequences, arguments.output_file,
-                        output_format)


=====================================
seqmagick/test/test_primer_trim.py deleted
=====================================
@@ -1,116 +0,0 @@
-"""
-Tests for primer trim
-"""
-import unittest
-
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
-
-from seqmagick.subcommands import primer_trim
-
-
-class PrimerAlignerTestCase(unittest.TestCase):
-    def setUp(self):
-        self.primer = 'AACTGCATTTGAATGG'
-        self.instance = primer_trim.PrimerAligner(
-            self.primer, match=5.0, gap_open=-10.0)
-
-    def test_max_score(self):
-        self.assertEqual(len(self.primer) * 5.0, self.instance.max_score)
-
-    def test_align_exact(self):
-        sequence = ('ACTCTGTGTCACTTTAAACTGCATTTGAATGGAAGAGTAATAGTAGCAATAACGGCA'
-                    'CTGATCAG')
-        hamming_distance, start, end = self.instance.align(sequence)
-        self.assertEqual(0, hamming_distance)
-        self.assertEqual(16, start)
-        self.assertEqual(31, end)
-
-    def test_align_gap(self):
-        sequence = ('ACTCTGTGTCACTTTAAACTGCATTGAATGGAAGAGTAATAGTAGCAATAACGGCA'
-                    'CTGATCAG')
-        hamming_distance, start, end = self.instance.align(sequence)
-        expected_distance = 1
-        self.assertEqual(expected_distance, hamming_distance)
-        self.assertEqual(16, start)
-        self.assertEqual(30, end)
-
-
-class HammingDistanceTestCase(unittest.TestCase):
-    def test_unequal_length(self):
-        s1 = 'test'
-        s2 = 'te'
-        self.assertRaises(ValueError, primer_trim.hamming_distance, s1, s2)
-
-    def test_no_difference(self):
-        s1 = s2 = 'test'
-        self.assertEqual(0, primer_trim.hamming_distance(s1, s2))
-
-    def test_all_different(self):
-        s1 = 'test'
-        s2 = 'ACGT'
-        self.assertEqual(4, primer_trim.hamming_distance(s1, s2))
-
-    def test_basic(self):
-        s1 = 'ACGT'
-        s2 = 'AGGT'
-        self.assertEqual(1, primer_trim.hamming_distance(s1, s2))
-
-    def test_ambiguous(self):
-        s1 = 'ACYT'
-        s2 = 'ACCT'
-        self.assertEqual(0,
-                         primer_trim.hamming_distance(
-                             s1, s2, primer_trim._iupac_ambiguous_equal))
-        s2 = 'ACTT'
-        self.assertEqual(0,
-                         primer_trim.hamming_distance(
-                             s1, s2, primer_trim._iupac_ambiguous_equal))
-
-
-def _alignment_record(sequence):
-    return SeqRecord(Seq(sequence))
-
-
-class LocatePrimersTestCase(unittest.TestCase):
-    """
-    Test for locate primers
-    """
-
-    def setUp(self):
-        self.sequences = [_alignment_record('--A--ACTGGACGTATTC-CCCC')]
-
-    def test_basic(self):
-        forward = 'TGG'
-        reverse = 'TTC'
-
-        forward_idx, reverse_idx = primer_trim.locate_primers(
-            self.sequences, forward, reverse, False, 1)
-
-        self.assertEqual((7, 9), forward_idx)
-        self.assertEqual((15, 17), reverse_idx)
-
-    def test_no_forward(self):
-        forward = 'GGGGGG'
-        reverse = 'TTC'
-        self.assertRaises(primer_trim.PrimerNotFound,
-                          primer_trim.locate_primers, self.sequences, forward,
-                          reverse, False, 1)
-
-    def test_no_reverse(self):
-        forward = 'TGG'
-        reverse = 'GGGG'
-        self.assertRaises(primer_trim.PrimerNotFound,
-                          primer_trim.locate_primers, self.sequences, forward,
-                          reverse, False, 1)
-
-    def test_bad_order(self):
-        """
-        Should fail if reverse primer occurs before forward primer
-        """
-        reverse = 'TGG'
-        forward = 'TTC'
-
-        self.assertRaises(primer_trim.PrimerOrderError,
-                          primer_trim.locate_primers, self.sequences, forward,
-                          reverse, False, 1)



View it on GitLab: https://salsa.debian.org/med-team/seqmagick/-/compare/e0e4a53c9741c8511b32a59d47b3859074d5c810...da7847924ed44a681a30f9c969f5ba17ca512615

-- 
View it on GitLab: https://salsa.debian.org/med-team/seqmagick/-/compare/e0e4a53c9741c8511b32a59d47b3859074d5c810...da7847924ed44a681a30f9c969f5ba17ca512615
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230611/358b2a9b/attachment-0001.htm>