[med-svn] [Git][med-team/catfishq][master] 4 commits: New upstream version 1.1.5+ds
Tony Mancill (@tmancill)
gitlab at salsa.debian.org
Tue Nov 9 17:12:39 GMT 2021
Tony Mancill pushed to branch master at Debian Med / catfishq
Commits:
6e580da3 by tony mancill at 2021-10-25T19:37:11-07:00
New upstream version 1.1.5+ds
- - - - -
68f924e2 by tony mancill at 2021-11-09T09:06:23-08:00
New upstream version 1.2.1+ds
- - - - -
4541e872 by tony mancill at 2021-11-09T09:06:24-08:00
Update upstream source from tag 'upstream/1.2.1+ds'
Update to upstream version '1.2.1+ds'
with Debian dir 8e496952f6f12004dce567e3d532c43726d082a7
- - - - -
cd48c49c by tony mancill at 2021-11-09T09:09:01-08:00
Prepare changelog for upload to unstable
- - - - -
7 changed files:
- PKG-INFO
- README.md
- catfishq.egg-info/PKG-INFO
- catfishq/__init__.py
- catfishq/cat_fastq.py
- debian/changelog
- setup.py
Changes:
=====================================
PKG-INFO
=====================================
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: catfishq
-Version: 1.1.5
+Version: 1.2.1
Summary: Cat FASTQ files
Home-page: UNKNOWN
Author: philres
Author-email: UNKNOWN
-License: UNKNOWN
+License: MIT
Description: UNKNOWN
Platform: UNKNOWN
=====================================
README.md
=====================================
@@ -3,14 +3,55 @@ Takes paths to an arbritary number of zipped and unzipped FASTQ files and/or fol
Supported file extensions are: `'*.fastq', '*.fastq.gz', '*.fasta', '*.fasta.gz', '*.fa', '*.fa.gz', '*.fq', '*.fq.gz'`
+May also be used to filter FQ reads by read ID, read length, q-score, and min/max sequencing time.
+
+
# Install
``` bash
pip install catfishq
```
-# Example
+# Examples
+
+Check full command list:
+
```bash
-$ catfishq -r test/
+$ catfishq --help;
```
+Merge all FQ files within a target directory:
+
+```bash
+$ catfishq test/ > test.fastq;
+```
+Merge all FQ files within a target directory and its sub-directories (recurive):
+
+```bash
+$ catfishq -r test/ > test.fastq;
+```
+
+Merge the first 1000 reads:
+
+```bash
+$ catfishq -n 1000 test/ > test_1st_1000.fastq;
+```
+
+Merge reads with a length >=50bp and a q-score >=10:
+```bash
+$ catfishq -l 50 -q 10 -l test/ > test_filt.fastq;
+```
+
+Merge reads collected <60mins from sequencing start:
+
+```bash
+catfishq --min-sequencing-time 0 --max-sequencing-time 60 test/ > test_60_min.fastq; #merge reads
+```
+
+Note that when looping catfishq over multiple folders from the same run, it is quicker to grab the start time via `--print-start-time` and providing it to catfishq via `--start-time "$timestamp"`.
+
+```bash
+$ t0="$(catfishq --print-start-time test1)";
+catfishq --max-sequencing-time 60 --start-time "$t0" test1/ > test1_60_min.fastq;
+catfishq --max-sequencing-time 60 --start-time "$t0" test2/ > test2_60_min.fastq;
+```
=====================================
catfishq.egg-info/PKG-INFO
=====================================
@@ -1,10 +1,10 @@
Metadata-Version: 1.0
Name: catfishq
-Version: 1.1.5
+Version: 1.2.1
Summary: Cat FASTQ files
Home-page: UNKNOWN
Author: philres
Author-email: UNKNOWN
-License: UNKNOWN
+License: MIT
Description: UNKNOWN
Platform: UNKNOWN
=====================================
catfishq/__init__.py
=====================================
@@ -1 +1 @@
-__version__ = "1.1.5"
+__version__ = "1.2.1"
=====================================
catfishq/cat_fastq.py
=====================================
@@ -122,6 +122,14 @@ def parse_args(argv):
help="Remove duplicated reads.",
)
+ parser.add_argument(
+ "--comments",
+ choices=['forward', 'skip', 'wrap'],
+ default='wrap',
+ help="How to treat FASTQ header comments. "
+ "`forward` them 'as is', `wrap` them into 'CO:Z:xxx' tag, `skip` them in the output.",
+ )
+
parser.add_argument(
"FASTQ",
nargs="+",
@@ -174,6 +182,14 @@ def find_file_in_folder(
return files
+def parse_timestamp(time_str):
+ try:
+ time_obj = datetime.strptime(time_str,'%Y-%m-%dT%H:%M:%SZ')
+ except ValueError:
+ time_obj = datetime.strptime(time_str,'%Y-%m-%dT%H:%M:%S.%f%z').replace(tzinfo=None)
+ return time_obj
+
+
def check_seq_time(comment, max_start_time,min_start_time):
#This tests if the start time of the respective read is between
#max_sequencing_time and min_sequencing_time
@@ -183,7 +199,7 @@ def check_seq_time(comment, max_start_time,min_start_time):
else:
matchObj = re.search( r'start_time=([^ ]+)', comment, re.M|re.I)
start_str = matchObj.group(1)
- start = datetime.strptime(start_str,'%Y-%m-%dT%H:%M:%SZ')
+ start = parse_timestamp(start_str)
bool_min=0
bool_max=0
@@ -203,7 +219,7 @@ def compare_start_time(comment,min_start_time):
#The smaller time is returned
matchObj = re.search( r'start_time=([^ ]+)', comment, re.M|re.I)
start_time_str = matchObj.group(1)
- start_time = datetime.strptime(start_time_str,'%Y-%m-%dT%H:%M:%SZ')
+ start_time = parse_timestamp(start_time_str)
if(min_start_time==0):
return start_time
@@ -213,7 +229,7 @@ def compare_start_time(comment,min_start_time):
return start_time
-def parse_fastqs(filename, min_len=0, min_qscore=0, max_start_time=None, min_start_time=None):
+def parse_fastqs(filename, min_len=0, min_qscore=0, max_start_time=None, min_start_time=None, comments='wrap'):
with pysam.FastxFile(filename) as fh:
for entry in fh:
if min_len and len(entry.sequence) < min_len:
@@ -225,9 +241,10 @@ def parse_fastqs(filename, min_len=0, min_qscore=0, max_start_time=None, min_sta
continue
if not check_seq_time(entry.comment, max_start_time, min_start_time):
continue
- if entry.comment:
+ if entry.comment and comments == 'wrap':
entry.comment = "CO:Z:{}".format(entry.comment)
-
+ elif comments == 'skip':
+ entry.comment = None
yield entry
@@ -265,7 +282,7 @@ def get_start_time(paths,recursive=False):
return min_start_time
-def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, recursive=False, dedup=False, max_seq_time=0, min_seq_time=0, start_time=0, filter_read_ids_file=None):
+def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, recursive=False, dedup=False, max_seq_time=0, min_seq_time=0, start_time=0, filter_read_ids_file=None, comments='wrap'):
"""
Concatenate FASTQ files
@@ -289,7 +306,7 @@ def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, r
if start_time:
if not start_time=="min":
- start = datetime.strptime(start_time,'%Y-%m-%dT%H:%M:%SZ')
+ start = parse_timestamp(start_time)
if(max_seq_time):
max_start_time = start + timedelta(minutes=max_seq_time)
@@ -298,7 +315,7 @@ def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, r
else:
#This option allows to automatically use the minmal start_time of
#all the given fastq files as input for --start-time
- auto_start_time=get_start_time(paths,recursive)
+ start=get_start_time(paths,recursive)
if(max_seq_time):
max_start_time = start + timedelta(minutes=max_seq_time)
@@ -314,7 +331,7 @@ def format_fq(paths, out_filename, min_len=0, min_qscore=0, max_n=0, max_bp=0, r
logging.debug("Found {} files".format(len(filenames)))
for filename in filenames:
for entry in parse_fastqs(
- filename, min_len=min_len, min_qscore=min_qscore, max_start_time=max_start_time, min_start_time=min_start_time
+ filename, min_len=min_len, min_qscore=min_qscore, max_start_time=max_start_time, min_start_time=min_start_time, comments=comments
):
if dedup and entry.name in read_ids:
continue
@@ -363,7 +380,8 @@ def main(argv=sys.argv[1:]):
max_seq_time=args.MAX_SEQ_TIME,
min_seq_time=args.MIN_SEQ_TIME,
start_time=args.START_TIME,
- filter_read_ids_file=args.FILTER_ID
+ filter_read_ids_file=args.FILTER_ID,
+ comments=args.comments,
)
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+catfishq (1.2.1+ds-1) unstable; urgency=medium
+
+ * Team upload
+ * New upstream version 1.2.1+ds
+
+ -- tony mancill <tmancill at debian.org> Tue, 09 Nov 2021 09:06:33 -0800
+
catfishq (1.1.5+ds-1) unstable; urgency=medium
* Team upload
=====================================
setup.py
=====================================
@@ -14,6 +14,7 @@ setup(
version=__version__,
author='philres',
description='Cat FASTQ files',
+ license='MIT',
zip_safe=False,
install_requires=[
'pysam'
View it on GitLab: https://salsa.debian.org/med-team/catfishq/-/compare/f5be8cea2cdc1da7c1a36a4ac3f8dd6bc063dc65...cd48c49c69a973f6a1bb5edb96bc929c411c7c2e
--
View it on GitLab: https://salsa.debian.org/med-team/catfishq/-/compare/f5be8cea2cdc1da7c1a36a4ac3f8dd6bc063dc65...cd48c49c69a973f6a1bb5edb96bc929c411c7c2e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211109/00d7c488/attachment-0001.htm>
More information about the debian-med-commit
mailing list