[med-svn] [Git][med-team/q2-demux][upstream] New upstream version 2024.5.0+dfsg

Michael R. Crusoe (@crusoe) gitlab at salsa.debian.org
Wed Jun 26 14:13:16 BST 2024



Michael R. Crusoe pushed to branch upstream at Debian Med / q2-demux


Commits:
e4d40ae8 by Michael R. Crusoe at 2024-06-26T14:53:55+02:00
New upstream version 2024.5.0+dfsg
- - - - -


13 changed files:

- q2_demux/__init__.py
- q2_demux/_demux.py
- − q2_demux/_format.py
- q2_demux/_subsample.py
- q2_demux/_summarize/_visualizer.py
- q2_demux/_tabulate.py
- − q2_demux/_transformer.py
- − q2_demux/_type.py
- q2_demux/_version.py
- q2_demux/plugin_setup.py
- q2_demux/tests/test_demux.py
- − q2_demux/tests/test_format.py
- − q2_demux/tests/test_transformer.py


Changes:

=====================================
q2_demux/__init__.py
=====================================
@@ -6,13 +6,13 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
-from ._demux import (emp_single, emp_paired, partition_samples_single,
-                     partition_samples_paired)
-from ._subsample import subsample_single, subsample_paired
-from ._summarize import summarize
-from ._filter import filter_samples
-from ._version import get_versions
-from ._tabulate import tabulate_read_counts
+from q2_demux._demux import (emp_single, emp_paired, partition_samples_single,
+                             partition_samples_paired)
+from q2_demux._subsample import subsample_single, subsample_paired
+from q2_demux._summarize import summarize
+from q2_demux._filter import filter_samples
+from q2_demux._version import get_versions
+from q2_demux._tabulate import tabulate_read_counts
 
 
 __version__ = get_versions()['version']


=====================================
q2_demux/_demux.py
=====================================
@@ -8,9 +8,7 @@
 
 import gzip
 import yaml
-import itertools
-import collections
-import collections.abc
+
 import random
 import resource
 import re
@@ -27,14 +25,15 @@ from q2_types.per_sample_sequences import (
     SingleLanePerSampleSingleEndFastqDirFmt,
     SingleLanePerSamplePairedEndFastqDirFmt,
     FastqManifestFormat, YamlFormat)
+from q2_types.multiplexed_sequences import ErrorCorrectionDetailsFmt
+from q2_types.feature_data import (
+    BarcodeSequenceFastqIterator, BarcodePairedSequenceFastqIterator
+)
+
 from ._ecc import GolayDecoder
-from ._format import ErrorCorrectionDetailsFmt
 from qiime2.util import duplicate
 
 
-FastqHeader = collections.namedtuple('FastqHeader', ['id', 'description'])
-
-
 class ECDetails:
     COLUMNS = ['id',
                'sample',
@@ -61,37 +60,6 @@ class ECDetails:
         self._fp.close()
 
 
-def _read_fastq_seqs(filepath):
-    # This function is adapted from @jairideout's SO post:
-    # http://stackoverflow.com/a/39302117/3424666
-    fh = gzip.open(filepath, 'rt')
-    for seq_header, seq, qual_header, qual in itertools.zip_longest(*[fh] * 4):
-        yield (seq_header.strip(), seq.strip(), qual_header.strip(),
-               qual.strip())
-
-
-def _trim_id(id):
-    return id.rsplit('/', 1)[0]
-
-
-def _trim_description(desc):
-    # The first number of ':' seperated description is the read number
-    if ':' in desc:
-        desc = desc.split(':', 1)[1]
-    return desc.rsplit('/', 1)[0]
-
-
-def _record_to_fastq_header(record):
-    tokens = record[0][1:].split(' ', maxsplit=1)
-    if len(tokens) == 1:
-        id, = tokens
-        description = None
-    else:
-        id, description = tokens
-
-    return FastqHeader(id=id, description=description)
-
-
 # This is global so that it can be tested without changing the actual ulimits.
 # NOTE: UNIX only
 OPEN_FH_LIMIT, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
@@ -131,127 +99,6 @@ def _maintain_open_fh_count(per_sample_fastqs, paired=False):
             rand_fh.close()
 
 
-class BarcodeSequenceFastqIterator(collections.abc.Iterable):
-    def __init__(self, barcode_generator, sequence_generator,
-                 ignore_description_mismatch=False):
-        self.barcode_generator = barcode_generator
-        self.sequence_generator = sequence_generator
-        self.ignore_description_mismatch = ignore_description_mismatch
-
-    def __iter__(self):
-        # Adapted from q2-types
-        for barcode_record, sequence_record in itertools.zip_longest(
-                self.barcode_generator, self.sequence_generator):
-            if barcode_record is None:
-                raise ValueError('More sequences were provided than barcodes.')
-            if sequence_record is None:
-                raise ValueError('More barcodes were provided than sequences.')
-            # The id or description fields may end with "/read-number", which
-            # will differ between the sequence and barcode reads. Confirm that
-            # they are identical up until the last /
-            barcode_header = _record_to_fastq_header(barcode_record)
-            sequence_header = _record_to_fastq_header(sequence_record)
-
-            # confirm that the id fields are equal
-            if _trim_id(barcode_header.id) != \
-               _trim_id(sequence_header.id):
-                raise ValueError(
-                    'Mismatched sequence ids: %s and %s' %
-                    (_trim_id(barcode_header.id),
-                     _trim_id(sequence_header.id)))
-
-            if not self.ignore_description_mismatch:
-                # if a description field is present, confirm that they're equal
-                if barcode_header.description is None and \
-                   sequence_header.description is None:
-                    pass
-                elif barcode_header.description is None:
-                    raise ValueError(
-                        'Barcode header lines do not contain description '
-                        'fields but sequence header lines do.')
-                elif sequence_header.description is None:
-                    raise ValueError(
-                        'Sequence header lines do not contain description '
-                        'fields but barcode header lines do.')
-                elif _trim_description(barcode_header.description) != \
-                        _trim_description(sequence_header.description):
-                    raise ValueError(
-                        'Mismatched sequence descriptions: %s and %s' %
-                        (_trim_description(barcode_header.description),
-                         _trim_description(sequence_header.description)))
-
-            yield barcode_record, sequence_record
-
-
-class BarcodePairedSequenceFastqIterator(collections.abc.Iterable):
-    def __init__(self, barcode_generator, forward_generator,
-                 reverse_generator, ignore_description_mismatch=False):
-        self.barcode_generator = barcode_generator
-        self.forward_generator = forward_generator
-        self.reverse_generator = reverse_generator
-        self.ignore_description_mismatch = ignore_description_mismatch
-
-    def __iter__(self):
-        # Adapted from q2-types
-        for barcode_record, forward_record, reverse_record \
-                in itertools.zip_longest(self.barcode_generator,
-                                         self.forward_generator,
-                                         self.reverse_generator):
-            if barcode_record is None:
-                raise ValueError('More sequences were provided than barcodes.')
-            if forward_record is None:
-                raise ValueError('More barcodes were provided than '
-                                 'forward-sequences.')
-            elif reverse_record is None:
-                raise ValueError('More barcodes were provided than '
-                                 'reverse-sequences.')
-            # The id or description fields may end with "/read-number", which
-            # will differ between the sequence and barcode reads. Confirm that
-            # they are identical up until the last /
-            barcode_header = _record_to_fastq_header(barcode_record)
-            forward_header = _record_to_fastq_header(forward_record)
-            reverse_header = _record_to_fastq_header(reverse_record)
-
-            # confirm that the id fields are equal
-            if not (_trim_id(barcode_header.id) ==
-                    _trim_id(forward_header.id) ==
-                    _trim_id(reverse_header.id)):
-                raise ValueError(
-                    'Mismatched sequence ids: %s, %s, and %s' %
-                    (_trim_id(barcode_header.id),
-                     _trim_id(forward_header.id),
-                     _trim_id(reverse_header.id)))
-
-            if not self.ignore_description_mismatch:
-                # if a description field is present, confirm that they're equal
-                if barcode_header.description is None and \
-                   forward_header.description is None and \
-                   reverse_header.description is None:
-                    pass
-                elif barcode_header.description is None:
-                    raise ValueError(
-                        'Barcode header lines do not contain description '
-                        'fields but sequence header lines do.')
-                elif forward_header.description is None:
-                    raise ValueError(
-                        'Forward-read header lines do not contain description '
-                        'fields but barcode header lines do.')
-                elif reverse_header.description is None:
-                    raise ValueError(
-                        'Reverse-read header lines do not contain description '
-                        'fields but barcode header lines do.')
-                elif not (_trim_description(barcode_header.description) ==
-                          _trim_description(forward_header.description) ==
-                          _trim_description(reverse_header.description)):
-                    raise ValueError(
-                        'Mismatched sequence descriptions: %s, %s, and %s' %
-                        (_trim_description(barcode_header.description),
-                         _trim_description(forward_header.description),
-                         _trim_description(reverse_header.description)))
-
-            yield barcode_record, forward_record, reverse_record
-
-
 def _make_barcode_map(barcodes, rev_comp_mapping_barcodes):
     barcode_map = {}
     barcode_len = None


=====================================
q2_demux/_format.py deleted
=====================================
@@ -1,86 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from q2_types.per_sample_sequences import FastqGzFormat
-import qiime2.plugin.model as model
-from qiime2.plugin import ValidationError
-
-
-# TODO: deprecate this and alias it
-class EMPMultiplexedDirFmt(model.DirectoryFormat):
-    sequences = model.File(
-        r'sequences.fastq.gz', format=FastqGzFormat)
-
-    barcodes = model.File(
-        r'barcodes.fastq.gz', format=FastqGzFormat)
-
-
-# The new cannonical name for EMPMultiplexedDirFmt
-class EMPSingleEndDirFmt(EMPMultiplexedDirFmt):
-    pass  # contents inherited
-
-
-class EMPPairedEndDirFmt(model.DirectoryFormat):
-    forward = model.File(
-        r'forward.fastq.gz', format=FastqGzFormat)
-
-    reverse = model.File(
-        r'reverse.fastq.gz', format=FastqGzFormat)
-
-    barcodes = model.File(
-        r'barcodes.fastq.gz', format=FastqGzFormat)
-
-
-# Originally called EMPMultiplexedSingleEndDirFmt, rename was possible as no
-# artifacts where created with this view, it is just for import.
-class EMPSingleEndCasavaDirFmt(model.DirectoryFormat):
-    # TODO: generalize this with a regex when we have validation in place for
-    # model.FileCollections. The file names are currently designed more
-    # specificially for handling MiSeq data.
-    sequences = model.File(
-        r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat)
-
-    barcodes = model.File(
-        r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
-
-
-class EMPPairedEndCasavaDirFmt(model.DirectoryFormat):
-    forward = model.File(
-        r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat)
-
-    reverse = model.File(
-        r'Undetermined_S0_L001_R2_001.fastq.gz', format=FastqGzFormat)
-
-    barcodes = model.File(
-        r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
-
-
-class ErrorCorrectionDetailsFmt(model.TextFileFormat):
-    METADATA_COLUMNS = {
-        'sample',
-        'barcode-sequence-id',
-        'barcode-uncorrected',
-        'barcode-corrected',
-        'barcode-errors',
-    }
-
-    def _validate_(self, level):
-        with open(str(self)) as fh:
-            line = fh.readline()
-
-        if len(line.strip()) == 0:
-            raise ValidationError("Failed to locate header.")
-
-        header = set(line.strip().split('\t'))
-        for column in sorted(self.METADATA_COLUMNS):
-            if column not in header:
-                raise ValidationError(f"{column} is not a column")
-
-
-ErrorCorrectionDetailsDirFmt = model.SingleFileDirectoryFormat(
-    'ErrorCorrectionDetailsDirFmt', 'details.tsv', ErrorCorrectionDetailsFmt)


=====================================
q2_demux/_subsample.py
=====================================
@@ -17,7 +17,7 @@ from q2_types.per_sample_sequences import (
     SingleLanePerSamplePairedEndFastqDirFmt,
     CasavaOneEightSingleLanePerSampleDirFmt)
 
-from q2_demux._demux import _read_fastq_seqs
+from q2_types.feature_data._util import _read_fastq_seqs
 
 
 def subsample_single(sequences: SingleLanePerSampleSingleEndFastqDirFmt,


=====================================
q2_demux/_summarize/_visualizer.py
=====================================
@@ -17,7 +17,7 @@ import pandas as pd
 import seaborn as sns
 import numpy as np
 
-from q2_demux._demux import _read_fastq_seqs
+from q2_types.feature_data._util import (_read_fastq_seqs, _PlotQualView)
 import q2templates
 
 TEMPLATES = pkg_resources.resource_filename('q2_demux', '_summarize')
@@ -29,17 +29,6 @@ def _decode_qual_to_phred33(qual_str):
     return qual
 
 
-# TODO: Remove _PlotQualView once QIIME 2 #220 completed
-class _PlotQualView:
-    """
-    A very simple pass-through view which is made up of a single-end or
-    paired-end directory format with a bool indicating if single or paired.
-    """
-    def __init__(self, directory_format, paired):
-        self.directory_format = directory_format
-        self.paired = paired
-
-
 def _link_sample_n_to_file(file_records, counts, subsample_ns, direction):
     results = collections.defaultdict(list)
     for num in subsample_ns:


=====================================
q2_demux/_tabulate.py
=====================================
@@ -14,7 +14,7 @@ import qiime2
 from q2_types.per_sample_sequences import (
     SingleLanePerSampleSingleEndFastqDirFmt)
 
-from q2_demux._demux import _read_fastq_seqs
+from q2_types.feature_data._util import _read_fastq_seqs
 
 
 def tabulate_read_counts(sequences: SingleLanePerSampleSingleEndFastqDirFmt


=====================================
q2_demux/_transformer.py deleted
=====================================
@@ -1,134 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-import shutil
-
-import pandas as pd
-from q2_types.per_sample_sequences import (
-    SingleLanePerSampleSingleEndFastqDirFmt,
-    SingleLanePerSamplePairedEndFastqDirFmt,
-    FastqGzFormat)
-from qiime2 import Metadata
-
-from .plugin_setup import plugin
-from ._demux import (BarcodeSequenceFastqIterator,
-                     BarcodePairedSequenceFastqIterator, _read_fastq_seqs)
-from ._format import (EMPMultiplexedDirFmt,
-                      EMPSingleEndDirFmt, EMPSingleEndCasavaDirFmt,
-                      EMPPairedEndDirFmt, EMPPairedEndCasavaDirFmt,
-                      ErrorCorrectionDetailsFmt)
-from ._summarize import _PlotQualView
-
-
- at plugin.register_transformer
-def _1(dirfmt: EMPSingleEndDirFmt) -> BarcodeSequenceFastqIterator:
-    barcode_generator = _read_fastq_seqs(
-        str(dirfmt.barcodes.view(FastqGzFormat)))
-    sequence_generator = _read_fastq_seqs(
-        str(dirfmt.sequences.view(FastqGzFormat)))
-    result = BarcodeSequenceFastqIterator(barcode_generator,
-                                          sequence_generator)
-    # ensure that dirfmt stays in scope as long as result does so these
-    # generators will work.
-    result.__dirfmt = dirfmt
-    return result
-
-
-# TODO: remove this when names are aliased
- at plugin.register_transformer
-def _1_legacy(dirfmt: EMPMultiplexedDirFmt) -> BarcodeSequenceFastqIterator:
-    return _1(dirfmt)
-
-
-# NOTE: a legacy transformer isn't needed for EMPMultiplexedSingleEndDirFmt
-# as no artifacts exist in this form, it is used for import only.
- at plugin.register_transformer
-def _2(dirfmt: EMPSingleEndCasavaDirFmt) -> EMPSingleEndDirFmt:
-    # TODO: revisit this API to simpify defining transformers
-    result = EMPMultiplexedDirFmt().path
-
-    sequences_fp = str(result / 'sequences.fastq.gz')
-    barcodes_fp = str(result / 'barcodes.fastq.gz')
-    shutil.copyfile(str(dirfmt.sequences.view(FastqGzFormat)), sequences_fp)
-    shutil.copyfile(str(dirfmt.barcodes.view(FastqGzFormat)), barcodes_fp)
-
-    return result
-
-
- at plugin.register_transformer
-def _3(dirfmt: EMPPairedEndCasavaDirFmt) -> EMPPairedEndDirFmt:
-    result = EMPMultiplexedDirFmt()
-    root = result.path
-
-    forward_fp = str(root / 'forward.fastq.gz')
-    reverse_fp = str(root / 'reverse.fastq.gz')
-    barcodes_fp = str(root / 'barcodes.fastq.gz')
-    shutil.copyfile(str(dirfmt.forward.view(FastqGzFormat)), forward_fp)
-    shutil.copyfile(str(dirfmt.reverse.view(FastqGzFormat)), reverse_fp)
-    shutil.copyfile(str(dirfmt.barcodes.view(FastqGzFormat)), barcodes_fp)
-
-    return result
-
-
- at plugin.register_transformer
-def _4(dirfmt: EMPPairedEndDirFmt) -> BarcodePairedSequenceFastqIterator:
-    barcode_generator = _read_fastq_seqs(
-        str(dirfmt.barcodes.view(FastqGzFormat)))
-    forward_generator = _read_fastq_seqs(
-        str(dirfmt.forward.view(FastqGzFormat)))
-    reverse_generator = _read_fastq_seqs(
-        str(dirfmt.reverse.view(FastqGzFormat)))
-    result = BarcodePairedSequenceFastqIterator(barcode_generator,
-                                                forward_generator,
-                                                reverse_generator)
-    # ensure that dirfmt stays in scope as long as result does so these
-    # generators will work.
-    result.__dirfmt = dirfmt
-    return result
-
-
-# TODO: Remove _PlotQualView once QIIME 2 #220 completed
- at plugin.register_transformer
-def _5(dirfmt: SingleLanePerSampleSingleEndFastqDirFmt) -> _PlotQualView:
-    return _PlotQualView(dirfmt, paired=False)
-
-
- at plugin.register_transformer
-def _6(dirfmt: SingleLanePerSamplePairedEndFastqDirFmt) -> _PlotQualView:
-    return _PlotQualView(dirfmt, paired=True)
-
-
- at plugin.register_transformer
-def _7(dirfmt: EMPPairedEndDirFmt) -> BarcodeSequenceFastqIterator:
-    barcode_generator = _read_fastq_seqs(
-        str(dirfmt.barcodes.view(FastqGzFormat)))
-    sequence_generator = _read_fastq_seqs(
-        str(dirfmt.forward.view(FastqGzFormat)))
-    result = BarcodeSequenceFastqIterator(barcode_generator,
-                                          sequence_generator)
-    # ensure that dirfmt stays in scope as long as result does so these
-    # generators will work.
-    result.__dirfmt = dirfmt
-    return result
-
-
- at plugin.register_transformer
-def _8(data: pd.DataFrame) -> ErrorCorrectionDetailsFmt:
-    ff = ErrorCorrectionDetailsFmt()
-    Metadata(data).save(str(ff))
-    return ff
-
-
- at plugin.register_transformer
-def _9(ff: ErrorCorrectionDetailsFmt) -> pd.DataFrame:
-    return Metadata.load(str(ff)).to_dataframe()
-
-
- at plugin.register_transformer
-def _10(ff: ErrorCorrectionDetailsFmt) -> Metadata:
-    return Metadata.load(str(ff))


=====================================
q2_demux/_type.py deleted
=====================================
@@ -1,18 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from qiime2.plugin import SemanticType
-
-# TODO: migrate these to q2-types someday
-RawSequences = SemanticType('RawSequences')
-
-EMPSingleEndSequences = SemanticType('EMPSingleEndSequences')
-
-EMPPairedEndSequences = SemanticType('EMPPairedEndSequences')
-
-ErrorCorrectionDetails = SemanticType('ErrorCorrectionDetails')


=====================================
q2_demux/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2024.2.0, Release-2024.2)"
-    git_full = "9e6af01fb0eff039b221f95ee596238f96a3b134"
-    git_date = "2024-02-16 21:56:22 +0000"
+    git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+    git_full = "48b44ac667ec8180f33adb3263c9b69adc2d2ff9"
+    git_date = "2024-05-29 04:14:29 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
q2_demux/plugin_setup.py
=====================================
@@ -6,7 +6,6 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
-import importlib
 
 from qiime2.plugin import (
     Plugin, Metadata, MetadataColumn, Categorical, Bool, Str, Int, Float,
@@ -19,13 +18,12 @@ from q2_types.per_sample_sequences import (
     SequencesWithQuality, PairedEndSequencesWithQuality,
     JoinedSequencesWithQuality)
 
-import q2_demux
-from ._type import (RawSequences, EMPSingleEndSequences, EMPPairedEndSequences,
-                    ErrorCorrectionDetails)
-from ._format import (EMPMultiplexedDirFmt, ErrorCorrectionDetailsDirFmt,
-                      EMPSingleEndDirFmt, EMPSingleEndCasavaDirFmt,
-                      EMPPairedEndDirFmt, EMPPairedEndCasavaDirFmt)
+from q2_types.multiplexed_sequences import (RawSequences,
+                                            EMPSingleEndSequences,
+                                            EMPPairedEndSequences,
+                                            ErrorCorrectionDetails)
 
+import q2_demux
 import q2_demux._examples as ex
 
 citations = Citations.load('citations.bib', package='q2_demux')
@@ -41,34 +39,6 @@ plugin = Plugin(
     short_description='Plugin for demultiplexing & viewing sequence quality.'
 )
 
-plugin.register_semantic_types(
-    RawSequences, EMPSingleEndSequences, EMPPairedEndSequences,
-    ErrorCorrectionDetails)
-
-plugin.register_formats(EMPMultiplexedDirFmt, ErrorCorrectionDetailsDirFmt,
-                        EMPSingleEndDirFmt, EMPSingleEndCasavaDirFmt,
-                        EMPPairedEndDirFmt, EMPPairedEndCasavaDirFmt)
-
-# TODO: remove when aliasing exists
-plugin.register_semantic_type_to_format(
-    RawSequences,
-    artifact_format=EMPSingleEndDirFmt
-)
-
-plugin.register_semantic_type_to_format(
-    EMPSingleEndSequences,
-    artifact_format=EMPSingleEndDirFmt
-)
-
-plugin.register_semantic_type_to_format(
-    EMPPairedEndSequences,
-    artifact_format=EMPPairedEndDirFmt
-)
-
-plugin.register_semantic_type_to_format(
-    ErrorCorrectionDetails,
-    artifact_format=ErrorCorrectionDetailsDirFmt
-)
 
 plugin.methods.register_function(
     function=q2_demux.emp_single,
@@ -347,5 +317,3 @@ plugin.methods.register_function(
                 'with the WHERE clause, and the `exclude_ids` parameter '
                 'allows for filtering of all samples not specified.',
 )
-
-importlib.import_module('q2_demux._transformer')


=====================================
q2_demux/tests/test_demux.py
=====================================
@@ -19,9 +19,11 @@ import skbio
 import qiime2
 import numpy.testing as npt
 
+from q2_types.feature_data._transformer import (
+    BarcodeSequenceFastqIterator,
+    BarcodePairedSequenceFastqIterator)
 from qiime2.plugin.testing import TestPluginBase, assert_no_nans_in_tables
-from q2_demux._demux import (BarcodeSequenceFastqIterator,
-                             BarcodePairedSequenceFastqIterator)
+
 from q2_demux import (emp_single, emp_paired, partition_samples_single,
                       partition_samples_paired, summarize)
 from q2_types.per_sample_sequences import (
@@ -39,161 +41,6 @@ class TestBase(TestPluginBase):
         self.execute_examples()
 
 
-class BarcodeSequenceFastqIteratorTests(unittest.TestCase):
-
-    def test_valid(self):
-        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
-                     ('@s4/1 abc/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        for i, (barcode, sequence) in enumerate(bsi):
-            self.assertEqual(barcode, barcodes[i])
-            self.assertEqual(sequence, sequences[i])
-
-    def test_too_few_barcodes(self):
-        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
-                     ('@s4/1 abc/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-    def test_too_few_sequences(self):
-        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-    def test_mismatched_id(self):
-        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
-                     ('@s5/1 abc/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-    def test_mismatched_description(self):
-        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
-                     ('@s4/1 abd/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-    def test_mismatch_description_override(self):
-        barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
-                     ('@s4/1 abd/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences,
-                                           ignore_description_mismatch=True)
-        self.assertEqual(len(list(bsi)), 4)
-
-    def test_mismatched_handles_slashes_in_id(self):
-        # mismatch is detected as being before the last slash, even if there
-        # is more than one slash
-        barcodes = [('@s1/2/2 abc/2', 'AAAA', '+', 'YYYY')]
-        sequences = [('@s1/1/1 abc/1', 'GGG', '+', 'YYY')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-    def test_mismatched_handles_slashes_in_description(self):
-        # mismatch is detected as being before the last slash, even if there
-        # is more than one slash
-        barcodes = [('@s1/2 a/2/2', 'AAAA', '+', 'YYYY')]
-        sequences = [('@s1/1 a/1/1', 'GGG', '+', 'YYY')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-    def test_no_description(self):
-        barcodes = [('@s1/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1', 'AAA', '+', 'PPP'),
-                     ('@s4/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        for i, (barcode, sequence) in enumerate(bsi):
-            self.assertEqual(barcode, barcodes[i])
-            self.assertEqual(sequence, sequences[i])
-
-    def test_only_one_description(self):
-        barcodes = [('@s1/2 abc', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2 abc', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2 abc', 'AACC', '+', 'PPPP'),
-                    ('@s4/2 abc', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1', 'GGG', '+', 'YYY'),
-                     ('@s2/1', 'CCC', '+', 'PPP'),
-                     ('@s3/1', 'AAA', '+', 'PPP'),
-                     ('@s4/1', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-        barcodes = [('@s1/2', 'AAAA', '+', 'YYYY'),
-                    ('@s2/2', 'AAAA', '+', 'PPPP'),
-                    ('@s3/2', 'AACC', '+', 'PPPP'),
-                    ('@s4/2', 'AACC', '+', 'PPPP')]
-
-        sequences = [('@s1/1 abc', 'GGG', '+', 'YYY'),
-                     ('@s2/1 abc', 'CCC', '+', 'PPP'),
-                     ('@s3/1 abc', 'AAA', '+', 'PPP'),
-                     ('@s4/1 abc', 'TTT', '+', 'PPP')]
-
-        bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
-        with self.assertRaises(ValueError):
-            list(bsi)
-
-
 class EmpTestingUtils:
     def _compare_sequence_to_record(self, sequence, fields):
         header_line = ' '.join([sequence.metadata['id'],


=====================================
q2_demux/tests/test_format.py deleted
=====================================
@@ -1,33 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from qiime2.plugin.testing import TestPluginBase
-from qiime2.plugin import ValidationError
-
-from q2_demux._format import ErrorCorrectionDetailsFmt
-
-
-class TestErrorCorrectionDetailsFmt(TestPluginBase):
-    package = 'q2_demux.tests'
-
-    def test_validate_positive(self):
-        fp = self.get_data_path('error_correction_details/positive.tsv')
-        # Should just work
-        ErrorCorrectionDetailsFmt(fp, mode='r').validate()
-
-    def test_validate_invalid_format(self):
-        fp = self.get_data_path('error_correction_details/invalid.tsv')
-        with self.assertRaisesRegex(ValidationError,
-                                    'Failed to locate header.'):
-            ErrorCorrectionDetailsFmt(fp, mode='r').validate()
-
-    def test_validate_missing_columns(self):
-        fp = self.get_data_path('error_correction_details/missing_columns.tsv')
-        with self.assertRaisesRegex(ValidationError,
-                                    'barcode-corrected.*is not a column'):
-            ErrorCorrectionDetailsFmt(fp, mode='r').validate()


=====================================
q2_demux/tests/test_transformer.py deleted
=====================================
@@ -1,168 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-import unittest
-import tempfile
-
-import pandas as pd
-import pandas.testing as pdt
-
-from q2_demux._format import (EMPSingleEndDirFmt,
-                              EMPSingleEndCasavaDirFmt,
-                              ErrorCorrectionDetailsFmt)
-from q2_demux._demux import BarcodeSequenceFastqIterator
-from qiime2.plugin.testing import TestPluginBase
-from qiime2.plugin import ValidationError
-import qiime2
-
-
-class TestTransformers(TestPluginBase):
-    package = 'q2_demux.tests'
-
-    def setUp(self):
-        # TODO generalize plugin lookup when ported to framework. This code
-        # is adapted from the base class.
-        try:
-            from q2_demux.plugin_setup import plugin
-        except ImportError:
-            self.fail("Could not import plugin object.")
-
-        self.plugin = plugin
-
-        # TODO use qiime temp dir when ported to framework, and when the
-        # configurable temp dir exists
-        self.temp_dir = tempfile.TemporaryDirectory(
-            prefix='q2-demux-test-temp-')
-
-    def test_emp_multiplexed_format_barcode_sequence_iterator(self):
-        transformer = self.get_transformer(EMPSingleEndDirFmt,
-                                           BarcodeSequenceFastqIterator)
-        dirname = 'emp_multiplexed'
-        dirpath = self.get_data_path(dirname)
-        bsi = transformer(EMPSingleEndDirFmt(dirpath, mode='r'))
-        bsi = list(bsi)
-        self.assertEqual(len(bsi), 250)
-        self.assertEqual(
-            bsi[0][0],
-            ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
-             'TTAGGCATCTCG',
-             '+',
-             'B@@FFFFFHHHH'))
-        self.assertEqual(
-            bsi[0][1],
-            ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
-             'GCTTAGGGATTTTATTGTTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAAC'
-             'CAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACT'
-             'GTTCACCATAAACGTGACGATGAGG',
-             '+',
-             'C at CFFFFFHHFHHGIJJ?FFHEIIIIHGEIIFHGIIJHGIGBGB?DHIIJJJJCFCHIEGIGG'
-             'HGFAEDCEDBCCEEA.;>?BB=288A?AB709@:3:A:C88CCD at CC444@>>34>>ACC:?C'
-             'CD<CDCA>A at A>:<?B@?<((2(>?'))
-
-    def test_emp_se_multiplexed_format_barcode_sequence_iterator(self):
-        transformer1 = self.get_transformer(EMPSingleEndCasavaDirFmt,
-                                            EMPSingleEndDirFmt)
-        transformer2 = self.get_transformer(EMPSingleEndDirFmt,
-                                            BarcodeSequenceFastqIterator)
-        dirname = 'emp_multiplexed_single_end'
-        dirpath = self.get_data_path(dirname)
-        emp_demultiplexed = \
-            transformer1(EMPSingleEndCasavaDirFmt(dirpath, mode='r'))
-        bsi = transformer2(EMPSingleEndDirFmt(emp_demultiplexed, mode='r'))
-        bsi = list(bsi)
-        self.assertEqual(len(bsi), 250)
-        self.assertEqual(
-            bsi[0][0],
-            ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
-             'TTAGGCATCTCG',
-             '+',
-             'B@@FFFFFHHHH'))
-        self.assertEqual(
-            bsi[0][1],
-            ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
-             'GCTTAGGGATTTTATTGTTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAAC'
-             'CAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACT'
-             'GTTCACCATAAACGTGACGATGAGG',
-             '+',
-             'C at CFFFFFHHFHHGIJJ?FFHEIIIIHGEIIFHGIIJHGIGBGB?DHIIJJJJCFCHIEGIGG'
-             'HGFAEDCEDBCCEEA.;>?BB=288A?AB709@:3:A:C88CCD at CC444@>>34>>ACC:?C'
-             'CD<CDCA>A at A>:<?B@?<((2(>?'))
-
-    def test_invalid(self):
-        dirname = 'bad'
-        dirpath = self.get_data_path(dirname)
-        transformer = self.get_transformer(EMPSingleEndDirFmt,
-                                           BarcodeSequenceFastqIterator)
-        with self.assertRaises(ValidationError):
-            transformer(EMPSingleEndDirFmt(dirpath, mode='r'))
-
-        transformer = self.get_transformer(EMPSingleEndCasavaDirFmt,
-                                           EMPSingleEndDirFmt)
-        with self.assertRaises(ValidationError):
-            transformer(EMPSingleEndCasavaDirFmt(dirpath, 'r'))
-
-
-class TestErrorCorrectionDetailsFmtTransformers(TestPluginBase):
-    package = 'q2_demux.tests'
-
-    def setUp(self):
-        super().setUp()
-
-        self.df = pd.DataFrame([
-                ['s1', 'seq-1',  'AAC', 'AAA', 2.],
-                ['s1', 'seq-4',  'ACA', 'AAA', 20.],
-                ['s2', 'seq-5',  'CCA', 'CCC', 1.],
-                ['s3', 'seq-50', 'GGT', 'GGG', 1.],
-            ],
-            columns=['sample', 'barcode-sequence-id', 'barcode-uncorrected',
-                     'barcode-corrected', 'barcode-errors'],
-            index=pd.Index(['record-1', 'record-2', 'record-3', 'record-4'],
-                           name='id'))
-
-        self.serialized = (
-            'id\tsample\tbarcode-sequence-id\tbarcode-uncorrected\t'
-            'barcode-corrected\tbarcode-errors\n'
-            '#q2:types\tcategorical\tcategorical\tcategorical\tcategorical\t'
-            'numeric\n'
-            'record-1\ts1\tseq-1\tAAC\tAAA\t2\n'
-            'record-2\ts1\tseq-4\tACA\tAAA\t20\n'
-            'record-3\ts2\tseq-5\tCCA\tCCC\t1\n'
-            'record-4\ts3\tseq-50\tGGT\tGGG\t1\n'
-        )
-
-    def test_df_to_error_correction_details_fmt(self):
-        transformer = self.get_transformer(
-            pd.DataFrame, ErrorCorrectionDetailsFmt)
-        obs = transformer(self.df)
-
-        with obs.open() as obs:
-            self.assertEqual(obs.read(), self.serialized)
-
-    def test_error_correction_details_fmt_to_df(self):
-        transformer = self.get_transformer(
-            ErrorCorrectionDetailsFmt, pd.DataFrame)
-        ff = ErrorCorrectionDetailsFmt()
-        with ff.open() as fh:
-            fh.write(self.serialized)
-        obs = transformer(ff)
-
-        pdt.assert_frame_equal(obs, self.df)
-
-    def test_error_correction_details_fmt_to_metadata(self):
-        transformer = self.get_transformer(
-            ErrorCorrectionDetailsFmt, qiime2.Metadata)
-        ff = ErrorCorrectionDetailsFmt()
-        with ff.open() as fh:
-            fh.write(self.serialized)
-        obs = transformer(ff)
-
-        self.assertEqual(obs, qiime2.Metadata(self.df))
-
-
-if __name__ == "__main__":
-    unittest.main()



View it on GitLab: https://salsa.debian.org/med-team/q2-demux/-/commit/e4d40ae8961b3cd2d25d0607ccb0c87a8d2dfc46

-- 
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/q2-demux/-/commit/e4d40ae8961b3cd2d25d0607ccb0c87a8d2dfc46
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240626/a39f45b3/attachment-0001.htm>


More information about the debian-med-commit mailing list