[med-svn] [Git][med-team/q2-demux][upstream] New upstream version 2024.5.0+dfsg
Michael R. Crusoe (@crusoe)
gitlab at salsa.debian.org
Wed Jun 26 14:13:16 BST 2024
Michael R. Crusoe pushed to branch upstream at Debian Med / q2-demux
Commits:
e4d40ae8 by Michael R. Crusoe at 2024-06-26T14:53:55+02:00
New upstream version 2024.5.0+dfsg
- - - - -
13 changed files:
- q2_demux/__init__.py
- q2_demux/_demux.py
- − q2_demux/_format.py
- q2_demux/_subsample.py
- q2_demux/_summarize/_visualizer.py
- q2_demux/_tabulate.py
- − q2_demux/_transformer.py
- − q2_demux/_type.py
- q2_demux/_version.py
- q2_demux/plugin_setup.py
- q2_demux/tests/test_demux.py
- − q2_demux/tests/test_format.py
- − q2_demux/tests/test_transformer.py
Changes:
=====================================
q2_demux/__init__.py
=====================================
@@ -6,13 +6,13 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
-from ._demux import (emp_single, emp_paired, partition_samples_single,
- partition_samples_paired)
-from ._subsample import subsample_single, subsample_paired
-from ._summarize import summarize
-from ._filter import filter_samples
-from ._version import get_versions
-from ._tabulate import tabulate_read_counts
+from q2_demux._demux import (emp_single, emp_paired, partition_samples_single,
+ partition_samples_paired)
+from q2_demux._subsample import subsample_single, subsample_paired
+from q2_demux._summarize import summarize
+from q2_demux._filter import filter_samples
+from q2_demux._version import get_versions
+from q2_demux._tabulate import tabulate_read_counts
__version__ = get_versions()['version']
=====================================
q2_demux/_demux.py
=====================================
@@ -8,9 +8,7 @@
import gzip
import yaml
-import itertools
-import collections
-import collections.abc
+
import random
import resource
import re
@@ -27,14 +25,15 @@ from q2_types.per_sample_sequences import (
SingleLanePerSampleSingleEndFastqDirFmt,
SingleLanePerSamplePairedEndFastqDirFmt,
FastqManifestFormat, YamlFormat)
+from q2_types.multiplexed_sequences import ErrorCorrectionDetailsFmt
+from q2_types.feature_data import (
+ BarcodeSequenceFastqIterator, BarcodePairedSequenceFastqIterator
+)
+
from ._ecc import GolayDecoder
-from ._format import ErrorCorrectionDetailsFmt
from qiime2.util import duplicate
-FastqHeader = collections.namedtuple('FastqHeader', ['id', 'description'])
-
-
class ECDetails:
COLUMNS = ['id',
'sample',
@@ -61,37 +60,6 @@ class ECDetails:
self._fp.close()
-def _read_fastq_seqs(filepath):
- # This function is adapted from @jairideout's SO post:
- # http://stackoverflow.com/a/39302117/3424666
- fh = gzip.open(filepath, 'rt')
- for seq_header, seq, qual_header, qual in itertools.zip_longest(*[fh] * 4):
- yield (seq_header.strip(), seq.strip(), qual_header.strip(),
- qual.strip())
-
-
-def _trim_id(id):
- return id.rsplit('/', 1)[0]
-
-
-def _trim_description(desc):
- # The first number of ':' seperated description is the read number
- if ':' in desc:
- desc = desc.split(':', 1)[1]
- return desc.rsplit('/', 1)[0]
-
-
-def _record_to_fastq_header(record):
- tokens = record[0][1:].split(' ', maxsplit=1)
- if len(tokens) == 1:
- id, = tokens
- description = None
- else:
- id, description = tokens
-
- return FastqHeader(id=id, description=description)
-
-
# This is global so that it can be tested without changing the actual ulimits.
# NOTE: UNIX only
OPEN_FH_LIMIT, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
@@ -131,127 +99,6 @@ def _maintain_open_fh_count(per_sample_fastqs, paired=False):
rand_fh.close()
-class BarcodeSequenceFastqIterator(collections.abc.Iterable):
- def __init__(self, barcode_generator, sequence_generator,
- ignore_description_mismatch=False):
- self.barcode_generator = barcode_generator
- self.sequence_generator = sequence_generator
- self.ignore_description_mismatch = ignore_description_mismatch
-
- def __iter__(self):
- # Adapted from q2-types
- for barcode_record, sequence_record in itertools.zip_longest(
- self.barcode_generator, self.sequence_generator):
- if barcode_record is None:
- raise ValueError('More sequences were provided than barcodes.')
- if sequence_record is None:
- raise ValueError('More barcodes were provided than sequences.')
- # The id or description fields may end with "/read-number", which
- # will differ between the sequence and barcode reads. Confirm that
- # they are identical up until the last /
- barcode_header = _record_to_fastq_header(barcode_record)
- sequence_header = _record_to_fastq_header(sequence_record)
-
- # confirm that the id fields are equal
- if _trim_id(barcode_header.id) != \
- _trim_id(sequence_header.id):
- raise ValueError(
- 'Mismatched sequence ids: %s and %s' %
- (_trim_id(barcode_header.id),
- _trim_id(sequence_header.id)))
-
- if not self.ignore_description_mismatch:
- # if a description field is present, confirm that they're equal
- if barcode_header.description is None and \
- sequence_header.description is None:
- pass
- elif barcode_header.description is None:
- raise ValueError(
- 'Barcode header lines do not contain description '
- 'fields but sequence header lines do.')
- elif sequence_header.description is None:
- raise ValueError(
- 'Sequence header lines do not contain description '
- 'fields but barcode header lines do.')
- elif _trim_description(barcode_header.description) != \
- _trim_description(sequence_header.description):
- raise ValueError(
- 'Mismatched sequence descriptions: %s and %s' %
- (_trim_description(barcode_header.description),
- _trim_description(sequence_header.description)))
-
- yield barcode_record, sequence_record
-
-
-class BarcodePairedSequenceFastqIterator(collections.abc.Iterable):
- def __init__(self, barcode_generator, forward_generator,
- reverse_generator, ignore_description_mismatch=False):
- self.barcode_generator = barcode_generator
- self.forward_generator = forward_generator
- self.reverse_generator = reverse_generator
- self.ignore_description_mismatch = ignore_description_mismatch
-
- def __iter__(self):
- # Adapted from q2-types
- for barcode_record, forward_record, reverse_record \
- in itertools.zip_longest(self.barcode_generator,
- self.forward_generator,
- self.reverse_generator):
- if barcode_record is None:
- raise ValueError('More sequences were provided than barcodes.')
- if forward_record is None:
- raise ValueError('More barcodes were provided than '
- 'forward-sequences.')
- elif reverse_record is None:
- raise ValueError('More barcodes were provided than '
- 'reverse-sequences.')
- # The id or description fields may end with "/read-number", which
- # will differ between the sequence and barcode reads. Confirm that
- # they are identical up until the last /
- barcode_header = _record_to_fastq_header(barcode_record)
- forward_header = _record_to_fastq_header(forward_record)
- reverse_header = _record_to_fastq_header(reverse_record)
-
- # confirm that the id fields are equal
- if not (_trim_id(barcode_header.id) ==
- _trim_id(forward_header.id) ==
- _trim_id(reverse_header.id)):
- raise ValueError(
- 'Mismatched sequence ids: %s, %s, and %s' %
- (_trim_id(barcode_header.id),
- _trim_id(forward_header.id),
- _trim_id(reverse_header.id)))
-
- if not self.ignore_description_mismatch:
- # if a description field is present, confirm that they're equal
- if barcode_header.description is None and \
- forward_header.description is None and \
- reverse_header.description is None:
- pass
- elif barcode_header.description is None:
- raise ValueError(
- 'Barcode header lines do not contain description '
- 'fields but sequence header lines do.')
- elif forward_header.description is None:
- raise ValueError(
- 'Forward-read header lines do not contain description '
- 'fields but barcode header lines do.')
- elif reverse_header.description is None:
- raise ValueError(
- 'Reverse-read header lines do not contain description '
- 'fields but barcode header lines do.')
- elif not (_trim_description(barcode_header.description) ==
- _trim_description(forward_header.description) ==
- _trim_description(reverse_header.description)):
- raise ValueError(
- 'Mismatched sequence descriptions: %s, %s, and %s' %
- (_trim_description(barcode_header.description),
- _trim_description(forward_header.description),
- _trim_description(reverse_header.description)))
-
- yield barcode_record, forward_record, reverse_record
-
-
def _make_barcode_map(barcodes, rev_comp_mapping_barcodes):
barcode_map = {}
barcode_len = None
=====================================
q2_demux/_format.py deleted
=====================================
@@ -1,86 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from q2_types.per_sample_sequences import FastqGzFormat
-import qiime2.plugin.model as model
-from qiime2.plugin import ValidationError
-
-
-# TODO: deprecate this and alias it
-class EMPMultiplexedDirFmt(model.DirectoryFormat):
- sequences = model.File(
- r'sequences.fastq.gz', format=FastqGzFormat)
-
- barcodes = model.File(
- r'barcodes.fastq.gz', format=FastqGzFormat)
-
-
-# The new cannonical name for EMPMultiplexedDirFmt
-class EMPSingleEndDirFmt(EMPMultiplexedDirFmt):
- pass # contents inherited
-
-
-class EMPPairedEndDirFmt(model.DirectoryFormat):
- forward = model.File(
- r'forward.fastq.gz', format=FastqGzFormat)
-
- reverse = model.File(
- r'reverse.fastq.gz', format=FastqGzFormat)
-
- barcodes = model.File(
- r'barcodes.fastq.gz', format=FastqGzFormat)
-
-
-# Originally called EMPMultiplexedSingleEndDirFmt, rename was possible as no
-# artifacts where created with this view, it is just for import.
-class EMPSingleEndCasavaDirFmt(model.DirectoryFormat):
- # TODO: generalize this with a regex when we have validation in place for
- # model.FileCollections. The file names are currently designed more
- # specificially for handling MiSeq data.
- sequences = model.File(
- r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat)
-
- barcodes = model.File(
- r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
-
-
-class EMPPairedEndCasavaDirFmt(model.DirectoryFormat):
- forward = model.File(
- r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat)
-
- reverse = model.File(
- r'Undetermined_S0_L001_R2_001.fastq.gz', format=FastqGzFormat)
-
- barcodes = model.File(
- r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
-
-
-class ErrorCorrectionDetailsFmt(model.TextFileFormat):
- METADATA_COLUMNS = {
- 'sample',
- 'barcode-sequence-id',
- 'barcode-uncorrected',
- 'barcode-corrected',
- 'barcode-errors',
- }
-
- def _validate_(self, level):
- with open(str(self)) as fh:
- line = fh.readline()
-
- if len(line.strip()) == 0:
- raise ValidationError("Failed to locate header.")
-
- header = set(line.strip().split('\t'))
- for column in sorted(self.METADATA_COLUMNS):
- if column not in header:
- raise ValidationError(f"{column} is not a column")
-
-
-ErrorCorrectionDetailsDirFmt = model.SingleFileDirectoryFormat(
- 'ErrorCorrectionDetailsDirFmt', 'details.tsv', ErrorCorrectionDetailsFmt)
=====================================
q2_demux/_subsample.py
=====================================
@@ -17,7 +17,7 @@ from q2_types.per_sample_sequences import (
SingleLanePerSamplePairedEndFastqDirFmt,
CasavaOneEightSingleLanePerSampleDirFmt)
-from q2_demux._demux import _read_fastq_seqs
+from q2_types.feature_data._util import _read_fastq_seqs
def subsample_single(sequences: SingleLanePerSampleSingleEndFastqDirFmt,
=====================================
q2_demux/_summarize/_visualizer.py
=====================================
@@ -17,7 +17,7 @@ import pandas as pd
import seaborn as sns
import numpy as np
-from q2_demux._demux import _read_fastq_seqs
+from q2_types.feature_data._util import (_read_fastq_seqs, _PlotQualView)
import q2templates
TEMPLATES = pkg_resources.resource_filename('q2_demux', '_summarize')
@@ -29,17 +29,6 @@ def _decode_qual_to_phred33(qual_str):
return qual
-# TODO: Remove _PlotQualView once QIIME 2 #220 completed
-class _PlotQualView:
- """
- A very simple pass-through view which is made up of a single-end or
- paired-end directory format with a bool indicating if single or paired.
- """
- def __init__(self, directory_format, paired):
- self.directory_format = directory_format
- self.paired = paired
-
-
def _link_sample_n_to_file(file_records, counts, subsample_ns, direction):
results = collections.defaultdict(list)
for num in subsample_ns:
=====================================
q2_demux/_tabulate.py
=====================================
@@ -14,7 +14,7 @@ import qiime2
from q2_types.per_sample_sequences import (
SingleLanePerSampleSingleEndFastqDirFmt)
-from q2_demux._demux import _read_fastq_seqs
+from q2_types.feature_data._util import _read_fastq_seqs
def tabulate_read_counts(sequences: SingleLanePerSampleSingleEndFastqDirFmt
=====================================
q2_demux/_transformer.py deleted
=====================================
@@ -1,134 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-import shutil
-
-import pandas as pd
-from q2_types.per_sample_sequences import (
- SingleLanePerSampleSingleEndFastqDirFmt,
- SingleLanePerSamplePairedEndFastqDirFmt,
- FastqGzFormat)
-from qiime2 import Metadata
-
-from .plugin_setup import plugin
-from ._demux import (BarcodeSequenceFastqIterator,
- BarcodePairedSequenceFastqIterator, _read_fastq_seqs)
-from ._format import (EMPMultiplexedDirFmt,
- EMPSingleEndDirFmt, EMPSingleEndCasavaDirFmt,
- EMPPairedEndDirFmt, EMPPairedEndCasavaDirFmt,
- ErrorCorrectionDetailsFmt)
-from ._summarize import _PlotQualView
-
-
- at plugin.register_transformer
-def _1(dirfmt: EMPSingleEndDirFmt) -> BarcodeSequenceFastqIterator:
- barcode_generator = _read_fastq_seqs(
- str(dirfmt.barcodes.view(FastqGzFormat)))
- sequence_generator = _read_fastq_seqs(
- str(dirfmt.sequences.view(FastqGzFormat)))
- result = BarcodeSequenceFastqIterator(barcode_generator,
- sequence_generator)
- # ensure that dirfmt stays in scope as long as result does so these
- # generators will work.
- result.__dirfmt = dirfmt
- return result
-
-
-# TODO: remove this when names are aliased
- at plugin.register_transformer
-def _1_legacy(dirfmt: EMPMultiplexedDirFmt) -> BarcodeSequenceFastqIterator:
- return _1(dirfmt)
-
-
-# NOTE: a legacy transformer isn't needed for EMPMultiplexedSingleEndDirFmt
-# as no artifacts exist in this form, it is used for import only.
- at plugin.register_transformer
-def _2(dirfmt: EMPSingleEndCasavaDirFmt) -> EMPSingleEndDirFmt:
- # TODO: revisit this API to simpify defining transformers
- result = EMPMultiplexedDirFmt().path
-
- sequences_fp = str(result / 'sequences.fastq.gz')
- barcodes_fp = str(result / 'barcodes.fastq.gz')
- shutil.copyfile(str(dirfmt.sequences.view(FastqGzFormat)), sequences_fp)
- shutil.copyfile(str(dirfmt.barcodes.view(FastqGzFormat)), barcodes_fp)
-
- return result
-
-
- at plugin.register_transformer
-def _3(dirfmt: EMPPairedEndCasavaDirFmt) -> EMPPairedEndDirFmt:
- result = EMPMultiplexedDirFmt()
- root = result.path
-
- forward_fp = str(root / 'forward.fastq.gz')
- reverse_fp = str(root / 'reverse.fastq.gz')
- barcodes_fp = str(root / 'barcodes.fastq.gz')
- shutil.copyfile(str(dirfmt.forward.view(FastqGzFormat)), forward_fp)
- shutil.copyfile(str(dirfmt.reverse.view(FastqGzFormat)), reverse_fp)
- shutil.copyfile(str(dirfmt.barcodes.view(FastqGzFormat)), barcodes_fp)
-
- return result
-
-
- at plugin.register_transformer
-def _4(dirfmt: EMPPairedEndDirFmt) -> BarcodePairedSequenceFastqIterator:
- barcode_generator = _read_fastq_seqs(
- str(dirfmt.barcodes.view(FastqGzFormat)))
- forward_generator = _read_fastq_seqs(
- str(dirfmt.forward.view(FastqGzFormat)))
- reverse_generator = _read_fastq_seqs(
- str(dirfmt.reverse.view(FastqGzFormat)))
- result = BarcodePairedSequenceFastqIterator(barcode_generator,
- forward_generator,
- reverse_generator)
- # ensure that dirfmt stays in scope as long as result does so these
- # generators will work.
- result.__dirfmt = dirfmt
- return result
-
-
-# TODO: Remove _PlotQualView once QIIME 2 #220 completed
- at plugin.register_transformer
-def _5(dirfmt: SingleLanePerSampleSingleEndFastqDirFmt) -> _PlotQualView:
- return _PlotQualView(dirfmt, paired=False)
-
-
- at plugin.register_transformer
-def _6(dirfmt: SingleLanePerSamplePairedEndFastqDirFmt) -> _PlotQualView:
- return _PlotQualView(dirfmt, paired=True)
-
-
- at plugin.register_transformer
-def _7(dirfmt: EMPPairedEndDirFmt) -> BarcodeSequenceFastqIterator:
- barcode_generator = _read_fastq_seqs(
- str(dirfmt.barcodes.view(FastqGzFormat)))
- sequence_generator = _read_fastq_seqs(
- str(dirfmt.forward.view(FastqGzFormat)))
- result = BarcodeSequenceFastqIterator(barcode_generator,
- sequence_generator)
- # ensure that dirfmt stays in scope as long as result does so these
- # generators will work.
- result.__dirfmt = dirfmt
- return result
-
-
- at plugin.register_transformer
-def _8(data: pd.DataFrame) -> ErrorCorrectionDetailsFmt:
- ff = ErrorCorrectionDetailsFmt()
- Metadata(data).save(str(ff))
- return ff
-
-
- at plugin.register_transformer
-def _9(ff: ErrorCorrectionDetailsFmt) -> pd.DataFrame:
- return Metadata.load(str(ff)).to_dataframe()
-
-
- at plugin.register_transformer
-def _10(ff: ErrorCorrectionDetailsFmt) -> Metadata:
- return Metadata.load(str(ff))
=====================================
q2_demux/_type.py deleted
=====================================
@@ -1,18 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from qiime2.plugin import SemanticType
-
-# TODO: migrate these to q2-types someday
-RawSequences = SemanticType('RawSequences')
-
-EMPSingleEndSequences = SemanticType('EMPSingleEndSequences')
-
-EMPPairedEndSequences = SemanticType('EMPPairedEndSequences')
-
-ErrorCorrectionDetails = SemanticType('ErrorCorrectionDetails')
=====================================
q2_demux/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2024.2.0, Release-2024.2)"
- git_full = "9e6af01fb0eff039b221f95ee596238f96a3b134"
- git_date = "2024-02-16 21:56:22 +0000"
+ git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+ git_full = "48b44ac667ec8180f33adb3263c9b69adc2d2ff9"
+ git_date = "2024-05-29 04:14:29 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
q2_demux/plugin_setup.py
=====================================
@@ -6,7 +6,6 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
-import importlib
from qiime2.plugin import (
Plugin, Metadata, MetadataColumn, Categorical, Bool, Str, Int, Float,
@@ -19,13 +18,12 @@ from q2_types.per_sample_sequences import (
SequencesWithQuality, PairedEndSequencesWithQuality,
JoinedSequencesWithQuality)
-import q2_demux
-from ._type import (RawSequences, EMPSingleEndSequences, EMPPairedEndSequences,
- ErrorCorrectionDetails)
-from ._format import (EMPMultiplexedDirFmt, ErrorCorrectionDetailsDirFmt,
- EMPSingleEndDirFmt, EMPSingleEndCasavaDirFmt,
- EMPPairedEndDirFmt, EMPPairedEndCasavaDirFmt)
+from q2_types.multiplexed_sequences import (RawSequences,
+ EMPSingleEndSequences,
+ EMPPairedEndSequences,
+ ErrorCorrectionDetails)
+import q2_demux
import q2_demux._examples as ex
citations = Citations.load('citations.bib', package='q2_demux')
@@ -41,34 +39,6 @@ plugin = Plugin(
short_description='Plugin for demultiplexing & viewing sequence quality.'
)
-plugin.register_semantic_types(
- RawSequences, EMPSingleEndSequences, EMPPairedEndSequences,
- ErrorCorrectionDetails)
-
-plugin.register_formats(EMPMultiplexedDirFmt, ErrorCorrectionDetailsDirFmt,
- EMPSingleEndDirFmt, EMPSingleEndCasavaDirFmt,
- EMPPairedEndDirFmt, EMPPairedEndCasavaDirFmt)
-
-# TODO: remove when aliasing exists
-plugin.register_semantic_type_to_format(
- RawSequences,
- artifact_format=EMPSingleEndDirFmt
-)
-
-plugin.register_semantic_type_to_format(
- EMPSingleEndSequences,
- artifact_format=EMPSingleEndDirFmt
-)
-
-plugin.register_semantic_type_to_format(
- EMPPairedEndSequences,
- artifact_format=EMPPairedEndDirFmt
-)
-
-plugin.register_semantic_type_to_format(
- ErrorCorrectionDetails,
- artifact_format=ErrorCorrectionDetailsDirFmt
-)
plugin.methods.register_function(
function=q2_demux.emp_single,
@@ -347,5 +317,3 @@ plugin.methods.register_function(
'with the WHERE clause, and the `exclude_ids` parameter '
'allows for filtering of all samples not specified.',
)
-
-importlib.import_module('q2_demux._transformer')
=====================================
q2_demux/tests/test_demux.py
=====================================
@@ -19,9 +19,11 @@ import skbio
import qiime2
import numpy.testing as npt
+from q2_types.feature_data._transformer import (
+ BarcodeSequenceFastqIterator,
+ BarcodePairedSequenceFastqIterator)
from qiime2.plugin.testing import TestPluginBase, assert_no_nans_in_tables
-from q2_demux._demux import (BarcodeSequenceFastqIterator,
- BarcodePairedSequenceFastqIterator)
+
from q2_demux import (emp_single, emp_paired, partition_samples_single,
partition_samples_paired, summarize)
from q2_types.per_sample_sequences import (
@@ -39,161 +41,6 @@ class TestBase(TestPluginBase):
self.execute_examples()
-class BarcodeSequenceFastqIteratorTests(unittest.TestCase):
-
- def test_valid(self):
- barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
- ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
- ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
- ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
- ('@s4/1 abc/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- for i, (barcode, sequence) in enumerate(bsi):
- self.assertEqual(barcode, barcodes[i])
- self.assertEqual(sequence, sequences[i])
-
- def test_too_few_barcodes(self):
- barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
- ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
- ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
- ('@s4/1 abc/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- def test_too_few_sequences(self):
- barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
- ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- def test_mismatched_id(self):
- barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
- ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
- ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
- ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
- ('@s5/1 abc/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- def test_mismatched_description(self):
- barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
- ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
- ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
- ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
- ('@s4/1 abd/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- def test_mismatch_description_override(self):
- barcodes = [('@s1/2 abc/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc/2', 'AACC', '+', 'PPPP'),
- ('@s4/2 abc/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc/1', 'GGG', '+', 'YYY'),
- ('@s2/1 abc/1', 'CCC', '+', 'PPP'),
- ('@s3/1 abc/1', 'AAA', '+', 'PPP'),
- ('@s4/1 abd/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences,
- ignore_description_mismatch=True)
- self.assertEqual(len(list(bsi)), 4)
-
- def test_mismatched_handles_slashes_in_id(self):
- # mismatch is detected as being before the last slash, even if there
- # is more than one slash
- barcodes = [('@s1/2/2 abc/2', 'AAAA', '+', 'YYYY')]
- sequences = [('@s1/1/1 abc/1', 'GGG', '+', 'YYY')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- def test_mismatched_handles_slashes_in_description(self):
- # mismatch is detected as being before the last slash, even if there
- # is more than one slash
- barcodes = [('@s1/2 a/2/2', 'AAAA', '+', 'YYYY')]
- sequences = [('@s1/1 a/1/1', 'GGG', '+', 'YYY')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- def test_no_description(self):
- barcodes = [('@s1/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2', 'AACC', '+', 'PPPP'),
- ('@s4/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1', 'GGG', '+', 'YYY'),
- ('@s2/1', 'CCC', '+', 'PPP'),
- ('@s3/1', 'AAA', '+', 'PPP'),
- ('@s4/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- for i, (barcode, sequence) in enumerate(bsi):
- self.assertEqual(barcode, barcodes[i])
- self.assertEqual(sequence, sequences[i])
-
- def test_only_one_description(self):
- barcodes = [('@s1/2 abc', 'AAAA', '+', 'YYYY'),
- ('@s2/2 abc', 'AAAA', '+', 'PPPP'),
- ('@s3/2 abc', 'AACC', '+', 'PPPP'),
- ('@s4/2 abc', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1', 'GGG', '+', 'YYY'),
- ('@s2/1', 'CCC', '+', 'PPP'),
- ('@s3/1', 'AAA', '+', 'PPP'),
- ('@s4/1', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
- barcodes = [('@s1/2', 'AAAA', '+', 'YYYY'),
- ('@s2/2', 'AAAA', '+', 'PPPP'),
- ('@s3/2', 'AACC', '+', 'PPPP'),
- ('@s4/2', 'AACC', '+', 'PPPP')]
-
- sequences = [('@s1/1 abc', 'GGG', '+', 'YYY'),
- ('@s2/1 abc', 'CCC', '+', 'PPP'),
- ('@s3/1 abc', 'AAA', '+', 'PPP'),
- ('@s4/1 abc', 'TTT', '+', 'PPP')]
-
- bsi = BarcodeSequenceFastqIterator(barcodes, sequences)
- with self.assertRaises(ValueError):
- list(bsi)
-
-
class EmpTestingUtils:
def _compare_sequence_to_record(self, sequence, fields):
header_line = ' '.join([sequence.metadata['id'],
=====================================
q2_demux/tests/test_format.py deleted
=====================================
@@ -1,33 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-from qiime2.plugin.testing import TestPluginBase
-from qiime2.plugin import ValidationError
-
-from q2_demux._format import ErrorCorrectionDetailsFmt
-
-
-class TestErrorCorrectionDetailsFmt(TestPluginBase):
- package = 'q2_demux.tests'
-
- def test_validate_positive(self):
- fp = self.get_data_path('error_correction_details/positive.tsv')
- # Should just work
- ErrorCorrectionDetailsFmt(fp, mode='r').validate()
-
- def test_validate_invalid_format(self):
- fp = self.get_data_path('error_correction_details/invalid.tsv')
- with self.assertRaisesRegex(ValidationError,
- 'Failed to locate header.'):
- ErrorCorrectionDetailsFmt(fp, mode='r').validate()
-
- def test_validate_missing_columns(self):
- fp = self.get_data_path('error_correction_details/missing_columns.tsv')
- with self.assertRaisesRegex(ValidationError,
- 'barcode-corrected.*is not a column'):
- ErrorCorrectionDetailsFmt(fp, mode='r').validate()
=====================================
q2_demux/tests/test_transformer.py deleted
=====================================
@@ -1,168 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2016-2023, QIIME 2 development team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# ----------------------------------------------------------------------------
-
-import unittest
-import tempfile
-
-import pandas as pd
-import pandas.testing as pdt
-
-from q2_demux._format import (EMPSingleEndDirFmt,
- EMPSingleEndCasavaDirFmt,
- ErrorCorrectionDetailsFmt)
-from q2_demux._demux import BarcodeSequenceFastqIterator
-from qiime2.plugin.testing import TestPluginBase
-from qiime2.plugin import ValidationError
-import qiime2
-
-
-class TestTransformers(TestPluginBase):
- package = 'q2_demux.tests'
-
- def setUp(self):
- # TODO generalize plugin lookup when ported to framework. This code
- # is adapted from the base class.
- try:
- from q2_demux.plugin_setup import plugin
- except ImportError:
- self.fail("Could not import plugin object.")
-
- self.plugin = plugin
-
- # TODO use qiime temp dir when ported to framework, and when the
- # configurable temp dir exists
- self.temp_dir = tempfile.TemporaryDirectory(
- prefix='q2-demux-test-temp-')
-
- def test_emp_multiplexed_format_barcode_sequence_iterator(self):
- transformer = self.get_transformer(EMPSingleEndDirFmt,
- BarcodeSequenceFastqIterator)
- dirname = 'emp_multiplexed'
- dirpath = self.get_data_path(dirname)
- bsi = transformer(EMPSingleEndDirFmt(dirpath, mode='r'))
- bsi = list(bsi)
- self.assertEqual(len(bsi), 250)
- self.assertEqual(
- bsi[0][0],
- ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
- 'TTAGGCATCTCG',
- '+',
- 'B@@FFFFFHHHH'))
- self.assertEqual(
- bsi[0][1],
- ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
- 'GCTTAGGGATTTTATTGTTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAAC'
- 'CAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACT'
- 'GTTCACCATAAACGTGACGATGAGG',
- '+',
- 'C at CFFFFFHHFHHGIJJ?FFHEIIIIHGEIIFHGIIJHGIGBGB?DHIIJJJJCFCHIEGIGG'
- 'HGFAEDCEDBCCEEA.;>?BB=288A?AB709@:3:A:C88CCD at CC444@>>34>>ACC:?C'
- 'CD<CDCA>A at A>:<?B@?<((2(>?'))
-
- def test_emp_se_multiplexed_format_barcode_sequence_iterator(self):
- transformer1 = self.get_transformer(EMPSingleEndCasavaDirFmt,
- EMPSingleEndDirFmt)
- transformer2 = self.get_transformer(EMPSingleEndDirFmt,
- BarcodeSequenceFastqIterator)
- dirname = 'emp_multiplexed_single_end'
- dirpath = self.get_data_path(dirname)
- emp_demultiplexed = \
- transformer1(EMPSingleEndCasavaDirFmt(dirpath, mode='r'))
- bsi = transformer2(EMPSingleEndDirFmt(emp_demultiplexed, mode='r'))
- bsi = list(bsi)
- self.assertEqual(len(bsi), 250)
- self.assertEqual(
- bsi[0][0],
- ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
- 'TTAGGCATCTCG',
- '+',
- 'B@@FFFFFHHHH'))
- self.assertEqual(
- bsi[0][1],
- ('@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0',
- 'GCTTAGGGATTTTATTGTTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAAC'
- 'CAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACT'
- 'GTTCACCATAAACGTGACGATGAGG',
- '+',
- 'C at CFFFFFHHFHHGIJJ?FFHEIIIIHGEIIFHGIIJHGIGBGB?DHIIJJJJCFCHIEGIGG'
- 'HGFAEDCEDBCCEEA.;>?BB=288A?AB709@:3:A:C88CCD at CC444@>>34>>ACC:?C'
- 'CD<CDCA>A at A>:<?B@?<((2(>?'))
-
- def test_invalid(self):
- dirname = 'bad'
- dirpath = self.get_data_path(dirname)
- transformer = self.get_transformer(EMPSingleEndDirFmt,
- BarcodeSequenceFastqIterator)
- with self.assertRaises(ValidationError):
- transformer(EMPSingleEndDirFmt(dirpath, mode='r'))
-
- transformer = self.get_transformer(EMPSingleEndCasavaDirFmt,
- EMPSingleEndDirFmt)
- with self.assertRaises(ValidationError):
- transformer(EMPSingleEndCasavaDirFmt(dirpath, 'r'))
-
-
-class TestErrorCorrectionDetailsFmtTransformers(TestPluginBase):
- package = 'q2_demux.tests'
-
- def setUp(self):
- super().setUp()
-
- self.df = pd.DataFrame([
- ['s1', 'seq-1', 'AAC', 'AAA', 2.],
- ['s1', 'seq-4', 'ACA', 'AAA', 20.],
- ['s2', 'seq-5', 'CCA', 'CCC', 1.],
- ['s3', 'seq-50', 'GGT', 'GGG', 1.],
- ],
- columns=['sample', 'barcode-sequence-id', 'barcode-uncorrected',
- 'barcode-corrected', 'barcode-errors'],
- index=pd.Index(['record-1', 'record-2', 'record-3', 'record-4'],
- name='id'))
-
- self.serialized = (
- 'id\tsample\tbarcode-sequence-id\tbarcode-uncorrected\t'
- 'barcode-corrected\tbarcode-errors\n'
- '#q2:types\tcategorical\tcategorical\tcategorical\tcategorical\t'
- 'numeric\n'
- 'record-1\ts1\tseq-1\tAAC\tAAA\t2\n'
- 'record-2\ts1\tseq-4\tACA\tAAA\t20\n'
- 'record-3\ts2\tseq-5\tCCA\tCCC\t1\n'
- 'record-4\ts3\tseq-50\tGGT\tGGG\t1\n'
- )
-
- def test_df_to_error_correction_details_fmt(self):
- transformer = self.get_transformer(
- pd.DataFrame, ErrorCorrectionDetailsFmt)
- obs = transformer(self.df)
-
- with obs.open() as obs:
- self.assertEqual(obs.read(), self.serialized)
-
- def test_error_correction_details_fmt_to_df(self):
- transformer = self.get_transformer(
- ErrorCorrectionDetailsFmt, pd.DataFrame)
- ff = ErrorCorrectionDetailsFmt()
- with ff.open() as fh:
- fh.write(self.serialized)
- obs = transformer(ff)
-
- pdt.assert_frame_equal(obs, self.df)
-
- def test_error_correction_details_fmt_to_metadata(self):
- transformer = self.get_transformer(
- ErrorCorrectionDetailsFmt, qiime2.Metadata)
- ff = ErrorCorrectionDetailsFmt()
- with ff.open() as fh:
- fh.write(self.serialized)
- obs = transformer(ff)
-
- self.assertEqual(obs, qiime2.Metadata(self.df))
-
-
-if __name__ == "__main__":
- unittest.main()
View it on GitLab: https://salsa.debian.org/med-team/q2-demux/-/commit/e4d40ae8961b3cd2d25d0607ccb0c87a8d2dfc46
--
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/q2-demux/-/commit/e4d40ae8961b3cd2d25d0607ccb0c87a8d2dfc46
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240626/a39f45b3/attachment-0001.htm>
More information about the debian-med-commit
mailing list