[med-svn] [Git][med-team/umis][upstream] New upstream version 1.0.8
Andreas Tille (@tille)
gitlab at salsa.debian.org
Tue Oct 12 08:18:13 BST 2021
Andreas Tille pushed to branch upstream at Debian Med / umis
Commits:
dd42ef2b by Andreas Tille at 2021-10-12T09:00:16+02:00
New upstream version 1.0.8
- - - - -
3 changed files:
- HISTORY.md
- setup.py
- umis/umis.py
Changes:
=====================================
HISTORY.md
=====================================
@@ -1,3 +1,6 @@
+## 1.0.8
+- Add support for dual UMI indexes. Thanks @lbeltrame!
+
## 1.0.7
- Ensure headers are not written when writing out a Series, to make us compatible with pandas > 0.24.
- Fix for deprecated .ix call, .loc is the new replacement. Thanks to @naumenko-sa.
=====================================
setup.py
=====================================
@@ -8,7 +8,7 @@ def read(fname):
setup(
name='umis',
- version='1.0.7',
+ version='1.0.8',
description='Package for estimating UMI counts in Transcript Tag Counting data.',
packages=find_packages(),
install_requires=['click', 'pysam>=0.8.3', 'pandas', 'regex', 'scipy', 'toolz'],
=====================================
umis/umis.py
=====================================
@@ -102,7 +102,7 @@ def write_fastq(filename):
"""
if filename:
if filename.endswith('gz'):
- filename_fh = gzip.open(filename, mode='wb')
+ filename_fh = gzip.open(filename, mode='wt')
else:
filename_fh = open(filename, mode='w')
else:
@@ -190,7 +190,7 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
options = _infer_transform_options(transform)
read_template = '{name}'
logger.info("Transforming %s." % fastq1)
- if options.dual_index:
+ if options.dual_index and options.CB:
logger.info("Detected dual cellular indexes.")
if separate_cb:
read_template += ':CELL_{CB1}-{CB2}'
@@ -205,9 +205,12 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
elif options.CB or demuxed_cb:
logger.info("Detected cellular barcodes.")
read_template += ':CELL_{CB}'
- if options.MB:
+ if options.MB and options.dual_index:
+ logger.info("Detected dual UMI.")
+ read_template += ':UMI_{MB1}-{MB2}'
+ elif options.MB:
logger.info("Detected UMI.")
- read_template += ':UMI_{MB}'
+ read_template += ":UMI_{MB}"
if options.SB:
logger.info("Detected sample.")
read_template += ':SAMPLE_{SB}'
@@ -251,7 +254,7 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
for chunk in p.map(transform, list(bigchunk)):
if paired:
for read1_dict, read2_dict in tz.partition(2, chunk):
- if options.dual_index:
+ if options.dual_index and options.CB:
if not separate_cb:
read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']
read2_dict['CB'] = read2_dict['CB1'] + read2_dict['CB2']
@@ -259,6 +262,8 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
if demuxed_cb:
read1_dict['CB'] = demuxed_cb
read2_dict['CB'] = demuxed_cb
+ if options.dual_index and options.MB:
+ read1_dict['MB'] = read1_dict['MB1'] + read2_dict['MB2']
# Deal with spaces in read names
if keep_fastq_tags:
@@ -282,13 +287,16 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
fastq2out_fh.write(read_template.format(**read2_dict))
else:
for read1_dict in chunk:
- if options.dual_index:
+ if options.dual_index and options.CB:
if not separate_cb:
read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']
if demuxed_cb:
read1_dict['CB'] = demuxed_cb
+ if options.dual_index and options.MB:
+ read1_dict['MB'] = read1_dict['MB1'] + read1_dict['MB2']
+
# Deal with spaces in read names
if keep_fastq_tags:
name, tag = read1_dict['name'].split(' ')
@@ -326,6 +334,8 @@ def _infer_transform_options(transform):
triple_index = True
else:
dual_index = True
+ if "MB1" in rx:
+ dual_index = True
if "SB" in rx:
SB = True
if "CB" in rx:
@@ -353,7 +363,7 @@ def _extract_readnum(read_dict):
def transformer(chunk, read1_regex, read2_regex, read3_regex, read4_regex,
paired=False):
# Parse the reads with the regexes
- update_keys = ("MB", "CB", "CB1", "CB2", "SP")
+ update_keys = ("MB", "CB", "CB1", "CB2", "SP", "MB1", "MB2")
reads = []
for read1, read2, read3, read4 in chunk:
read1_match = read1_regex.search(read1)
View it on GitLab: https://salsa.debian.org/med-team/umis/-/commit/dd42ef2b3ba77febd0afbda28f8e0139f95d662c
--
View it on GitLab: https://salsa.debian.org/med-team/umis/-/commit/dd42ef2b3ba77febd0afbda28f8e0139f95d662c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211012/37853285/attachment-0001.htm>
More information about the debian-med-commit
mailing list