[med-svn] [Git][med-team/catfishq][upstream] New upstream version 1.1.5+ds

Tony Mancill (@tmancill) gitlab at salsa.debian.org
Tue Oct 26 04:04:03 BST 2021



Tony Mancill pushed to branch upstream at Debian Med / catfishq


Commits:
6e580da3 by tony mancill at 2021-10-25T19:37:11-07:00
New upstream version 1.1.5+ds
- - - - -


5 changed files:

- PKG-INFO
- catfishq.egg-info/PKG-INFO
- catfishq/__init__.py
- catfishq/cat_fastq.py
- setup.py


Changes:

=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: catfishq
-Version: 1.1.3
+Version: 1.1.5
 Summary: Cat FASTQ files
 Home-page: UNKNOWN
 Author: philres


=====================================
catfishq.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: catfishq
-Version: 1.1.3
+Version: 1.1.5
 Summary: Cat FASTQ files
 Home-page: UNKNOWN
 Author: philres


=====================================
catfishq/__init__.py
=====================================
@@ -0,0 +1 @@
+__version__ = "1.1.5"


=====================================
catfishq/cat_fastq.py
=====================================
@@ -10,6 +10,9 @@ import sys
 from pathlib import Path
 from datetime import datetime, timedelta
 
+from . import __version__
+
+
 
 LOOKUP = []
 
@@ -80,15 +83,19 @@ def parse_args(argv):
         "-q", "--min-qscore", dest="MIN_QSCORE", type=int, default=0, help="Minimum q-score"
     )
     parser.add_argument(
-        "--max-sequencing-time", dest="MAX_SEQ_TIME", type=int, default=0, help="Only output reads that where sequenced up to the given time (minutes)."
+        "--max-sequencing-time", dest="MAX_SEQ_TIME", type=int, default=None, help="Only output reads that where sequenced at or up to the given time (minutes)."
     )
     parser.add_argument(
-        "--min-sequencing-time", dest="MIN_SEQ_TIME", type=int, default=0, help="Only output reads that where sequenced after the given time (minutes)."
+        "--min-sequencing-time", dest="MIN_SEQ_TIME", type=int, default=None, help="Only output reads that where sequenced at or after the given time (minutes)."
     )
     parser.add_argument(
         "--start-time", dest="START_TIME", type=str, default=None, help="Starttime of the run as guppy time stamp (only required with --sequencing-time). If 'min' is given as argument the minimal time is detected automatically."
     )
 
+    parser.add_argument(
+        "--filter-id", dest="FILTER_ID", type=str, default=None, help="Only print reads with IDs present in file."
+    )
+
     parser.add_argument(
         "--print-start-time", dest="PRINT_START_TIME", action="store_true", help="Print the minimal start_time of all fastq files"
     )
@@ -122,6 +129,15 @@ def parse_args(argv):
         help="FASTQ files or folders containing FASTQ files",
     )
 
+    parser.add_argument(
+        "-v",
+        '--version',
+        action='version',
+        version='catfish ' + __version__,
+        help="Print version",
+    )
+
+
     args = parser.parse_args(argv)
 
     return args
@@ -159,7 +175,7 @@ def find_file_in_folder(
 
 
 def check_seq_time(comment, max_start_time,min_start_time):
-    #This tests if the start time of the respective read is between 
+    #This tests if the start time of the respective read is between
     #max_sequencing_time and min_sequencing_time
     #If one of the times is not given the condition is automatically considered true
     if (max_start_time == None and min_start_time == None):
@@ -168,15 +184,15 @@ def check_seq_time(comment, max_start_time,min_start_time):
         matchObj = re.search( r'start_time=([^ ]+)', comment, re.M|re.I)
         start_str = matchObj.group(1)
         start = datetime.strptime(start_str,'%Y-%m-%dT%H:%M:%SZ')
-        
+
         bool_min=0
         bool_max=0
-        
+
         if (max_start_time == None or start<=max_start_time):
             bool_max=1
         if (min_start_time == None or start>=min_start_time):
             bool_min=1
-        
+
         if (bool_min == 1 and bool_max == 1):
             return True
         else:
@@ -195,8 +211,8 @@ def compare_start_time(comment,min_start_time):
         return min_start_time
     else:
         return start_time
-            
-    
+
+
 def parse_fastqs(filename, min_len=0, min_qscore=0, max_start_time=None, min_start_time=None):
     with pysam.FastxFile(filename) as fh:
         for entry in fh:
@@ -230,9 +246,9 @@ def get_file_names(path, recursive):
 def get_start_time(paths,recursive=False):
     """
     Only print the start time.
-    This function automatically detects the minmal start_time of 
+    This function automatically detects the minmal start_time of
     all the given fastq files
-            
+
 
     :param paths: Input FASTQ files or folders containing FASTQ files
     :return: min_start_time
@@ -249,7 +265,7 @@ def get_start_time(paths,recursive=False):
     return min_start_time
 
 
-def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, recursive=False, dedup=False, max_seq_time=0, min_seq_time=0, start_time=0):
+def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, recursive=False, dedup=False, max_seq_time=0, min_seq_time=0, start_time=0, filter_read_ids_file=None):
     """
     Concatenate FASTQ files
 
@@ -261,18 +277,33 @@ def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, r
     max_start_time = None
     min_start_time = None
 
+    keep_ids = None
+    if filter_read_ids_file:
+        keep_ids = set()
+        with open(filter_read_ids_file, "r") as fh:
+            for line in fh:
+                read_id = line.strip()
+                keep_ids.add(read_id)
+            logging.info("Found {} read ids.".format(len(keep_ids)))
+
+
     if start_time:
         if not start_time=="min":
             start = datetime.strptime(start_time,'%Y-%m-%dT%H:%M:%SZ')
-            max_start_time = start + timedelta(minutes=min_seq_time)
-            min_start_time = start + timedelta(minutes=max_seq_time)
+
+            if(max_seq_time):
+                max_start_time = start + timedelta(minutes=max_seq_time)
+            if(min_seq_time):
+                min_start_time = start + timedelta(minutes=min_seq_time)
         else:
-            #This option allows to automatically use the minmal start_time of 
+            #This option allows to automatically use the minmal start_time of
             #all the given fastq files as input for --start-time
             auto_start_time=get_start_time(paths,recursive)
-            max_start_time=auto_start_time + timedelta(minutes=max_seq_time)
-            min_start_time=auto_start_time + timedelta(minutes=min_seq_time)
 
+            if(max_seq_time):
+                max_start_time = start + timedelta(minutes=max_seq_time)
+            if(min_seq_time):
+                min_start_time = start + timedelta(minutes=min_seq_time)
     read_ids = set()
 
     n = 0
@@ -288,6 +319,9 @@ def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, r
                     if dedup and entry.name in read_ids:
                         continue
 
+                    if keep_ids and entry.name not in keep_ids:
+                        continue
+
                     fout.write(str(entry) + "\n")
                     if dedup:
                         read_ids.add(entry.name)
@@ -328,7 +362,8 @@ def main(argv=sys.argv[1:]):
             dedup=args.DEDUP,
             max_seq_time=args.MAX_SEQ_TIME,
             min_seq_time=args.MIN_SEQ_TIME,
-            start_time=args.START_TIME
+            start_time=args.START_TIME,
+            filter_read_ids_file=args.FILTER_ID
         )
 
 


=====================================
setup.py
=====================================
@@ -5,9 +5,9 @@ CatfishQ
 import os
 from setuptools import setup, find_packages
 
-os.environ['GIT_SSL_NO_VERIFY'] = 'true'
+from catfishq import __version__
 
-__version__ = '1.1.3'
+os.environ['GIT_SSL_NO_VERIFY'] = 'true'
 
 setup(
     name='catfishq',



View it on GitLab: https://salsa.debian.org/med-team/catfishq/-/commit/6e580da315004eac7e4e6599a713d04310c3b692

-- 
View it on GitLab: https://salsa.debian.org/med-team/catfishq/-/commit/6e580da315004eac7e4e6599a713d04310c3b692
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211026/738e933a/attachment-0001.htm>


More information about the debian-med-commit mailing list