[med-svn] [Git][med-team/umis][upstream] New upstream version 1.0.8

Tue Oct 12 08:18:13 BST 2021


Andreas Tille pushed to branch upstream at Debian Med / umis


Commits:
dd42ef2b by Andreas Tille at 2021-10-12T09:00:16+02:00
New upstream version 1.0.8
- - - - -


3 changed files:

- HISTORY.md
- setup.py
- umis/umis.py


Changes:

=====================================
HISTORY.md
=====================================
@@ -1,3 +1,6 @@
+## 1.0.8
+- Add support for dual UMI indexes. Thanks @lbeltrame!
+
 ## 1.0.7
 - Ensure headers are not written when writing out a Series, to make us compatible with pandas > 0.24.
 - Fix for deprecated .ix call, .loc is the new replacement. Thanks to @naumenko-sa.


=====================================
setup.py
=====================================
@@ -8,7 +8,7 @@ def read(fname):
 
 setup(
         name='umis',
-        version='1.0.7',
+        version='1.0.8',
         description='Package for estimating UMI counts in Transcript Tag Counting data.',
         packages=find_packages(),
         install_requires=['click', 'pysam>=0.8.3', 'pandas', 'regex', 'scipy', 'toolz'],


=====================================
umis/umis.py
=====================================
@@ -102,7 +102,7 @@ def write_fastq(filename):
     """
     if filename:
         if filename.endswith('gz'):
-            filename_fh = gzip.open(filename, mode='wb')
+            filename_fh = gzip.open(filename, mode='wt')
         else:
             filename_fh = open(filename, mode='w')
     else:
@@ -190,7 +190,7 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
     options = _infer_transform_options(transform)
     read_template = '{name}'
     logger.info("Transforming %s." % fastq1)
-    if options.dual_index:
+    if options.dual_index and options.CB:
         logger.info("Detected dual cellular indexes.")
         if separate_cb:
             read_template += ':CELL_{CB1}-{CB2}'
@@ -205,9 +205,12 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
     elif options.CB or demuxed_cb:
         logger.info("Detected cellular barcodes.")
         read_template += ':CELL_{CB}'
-    if options.MB:
+    if options.MB and options.dual_index:
+        logger.info("Detected dual UMI.")
+        read_template += ':UMI_{MB1}-{MB2}'
+    elif options.MB:
         logger.info("Detected UMI.")
-        read_template += ':UMI_{MB}'
+        read_template += ":UMI_{MB}"
     if options.SB:
         logger.info("Detected sample.")
         read_template += ':SAMPLE_{SB}'
@@ -251,7 +254,7 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
         for chunk in p.map(transform, list(bigchunk)):
             if paired:
                 for read1_dict, read2_dict in tz.partition(2, chunk):
-                    if options.dual_index:
+                    if options.dual_index and options.CB:
                         if not separate_cb:
                             read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']
                             read2_dict['CB'] = read2_dict['CB1'] + read2_dict['CB2']
@@ -259,6 +262,8 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
                     if demuxed_cb:
                         read1_dict['CB'] = demuxed_cb
                         read2_dict['CB'] = demuxed_cb
+                    if options.dual_index and options.MB:
+                        read1_dict['MB'] = read1_dict['MB1'] + read2_dict['MB2']
 
                     # Deal with spaces in read names
                     if keep_fastq_tags:
@@ -282,13 +287,16 @@ def fastqtransform(transform, fastq1, fastq2, fastq3, fastq4, keep_fastq_tags,
                         fastq2out_fh.write(read_template.format(**read2_dict))
             else:
                 for read1_dict in chunk:
-                    if options.dual_index:
+                    if options.dual_index and options.CB:
                         if not separate_cb:
                             read1_dict['CB'] = read1_dict['CB1'] + read1_dict['CB2']
 
                     if demuxed_cb:
                         read1_dict['CB'] = demuxed_cb
 
+                    if options.dual_index and options.MB:
+                        read1_dict['MB'] = read1_dict['MB1'] + read1_dict['MB2']
+
                     # Deal with spaces in read names
                     if keep_fastq_tags:
                         name, tag = read1_dict['name'].split(' ')
@@ -326,6 +334,8 @@ def _infer_transform_options(transform):
                 triple_index = True
             else:
                 dual_index = True
+        if "MB1" in rx:
+            dual_index = True
         if "SB" in rx:
             SB = True
         if "CB" in rx:
@@ -353,7 +363,7 @@ def _extract_readnum(read_dict):
 def transformer(chunk, read1_regex, read2_regex, read3_regex, read4_regex,
                 paired=False):
     # Parse the reads with the regexes
-    update_keys = ("MB", "CB", "CB1", "CB2", "SP")
+    update_keys = ("MB", "CB", "CB1", "CB2", "SP", "MB1", "MB2")
     reads = []
     for read1, read2, read3, read4 in chunk:
         read1_match = read1_regex.search(read1)



View it on GitLab: https://salsa.debian.org/med-team/umis/-/commit/dd42ef2b3ba77febd0afbda28f8e0139f95d662c

-- 
View it on GitLab: https://salsa.debian.org/med-team/umis/-/commit/dd42ef2b3ba77febd0afbda28f8e0139f95d662c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211012/37853285/attachment-0001.htm>