[med-svn] [Git][med-team/nanolyse][master] 4 commits: New upstream version 1.2.0

Andreas Tille gitlab at salsa.debian.org
Fri Oct 23 14:08:50 BST 2020



Andreas Tille pushed to branch master at Debian Med / nanolyse


Commits:
bb65c8a5 by Andreas Tille at 2020-10-23T15:07:05+02:00
New upstream version 1.2.0
- - - - -
770239ef by Andreas Tille at 2020-10-23T15:07:05+02:00
routine-update: New upstream version

- - - - -
8b2485ba by Andreas Tille at 2020-10-23T15:07:06+02:00
Update upstream source from tag 'upstream/1.2.0'

Update to upstream version '1.2.0'
with Debian dir 37bea00ea1e3ec30eddc47879ce3ef07d58b3723
- - - - -
60efe4b0 by Andreas Tille at 2020-10-23T15:08:25+02:00
routine-update: Ready to upload to unstable

- - - - -


6 changed files:

- .travis.yml
- MANIFEST.in
- debian/changelog
- nanolyse/NanoLyse.py
- nanolyse/version.py
- setup.py


Changes:

=====================================
.travis.yml
=====================================
@@ -1,9 +1,9 @@
 language: python
 
 python:
-  - "3.5"
   - "3.6"
   - "3.7"
+  - "3.8"
 
 before_install:
   - cp README.md README.rst


=====================================
MANIFEST.in
=====================================
@@ -1 +1,2 @@
 include reference/lambda.fasta.gz
+include README.md


=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+nanolyse (1.2.0-1) unstable; urgency=medium
+
+  * New upstream version
+
+ -- Andreas Tille <tille at debian.org>  Fri, 23 Oct 2020 15:07:15 +0200
+
 nanolyse (1.1.1-1) unstable; urgency=medium
 
   * Initial release (Closes: #963620)


=====================================
nanolyse/NanoLyse.py
=====================================
@@ -43,7 +43,14 @@ def main():
     try:
         logging.info('NanoLyse {} started with arguments {}'.format(__version__, args))
         aligner = getIndex(args.reference)
-        align(aligner, sys.stdin)
+        if args.summary_in:
+            import tempfile
+            tmp = tempfile.TemporaryFile()
+            filter_reads(aligner, sys.stdin, tmp=tmp)
+            logging.info('Filtering the summary file.')
+            filter_summary(args.summary_in, args.summary_out, tmp)
+        else:
+            filter_reads(aligner, sys.stdin)
         logging.info('NanoLyse finished.')
     except Exception as e:
         logging.error(e, exc_info=True)
@@ -53,11 +60,11 @@ def main():
 def get_args():
     epilog = """EXAMPLES:
     gunzip -c reads.fastq.gz | NanoLyse | gzip > reads_without_lambda.fastq.gz
-    gunzip -c reads.fastq.gz | NanoLyse | NanoFilt -q 12 | gzip > filtered_reads_without_lambda.fastq.gz
-    gunzip -c reads.fastq.gz | NanoLyse --reference mygenome.fa.gz | gzip > reads_without_mygenome.fastq.gz
+    gunzip -c reads.fastq.gz | NanoLyse | NanoFilt -q 12 | gzip > filt_reads_without_lambda.fastq.gz
+    gunzip -c reads.fastq.gz | NanoLyse --reference mydb.fa.gz | gzip > reads_without_mydb.fastq.gz
     """
     parser = ArgumentParser(
-        description="Remove reads mapping to the lambda genome. Reads fastq from stdin and writes to stdout.",
+        description="Remove reads mapping to DNA CS. Reads fastq on stdin and writes to stdout.",
         epilog=epilog,
         formatter_class=custom_formatter,
         add_help=False)
@@ -70,12 +77,17 @@ def get_args():
                          help="Print version and exit.",
                          action="version",
                          version='NanoLyse {}'.format(__version__))
+    parser.add_argument("--summary_in", help="Summary file to filter")
+    parser.add_argument("--summary_out", help="with --summary_in: name of output file.")
     parser.add_argument("-r", "--reference",
-                        help="Specify a reference fasta file against which to filter.")
+                        help="Specify a fasta file against which to filter. Standard is DNA CS.")
     parser.add_argument("--logfile",
                         help="Specify the path and filename for the log file.",
                         default="NanoLyse.log")
-    return parser.parse_args()
+    args = parser.parse_args()
+    if bool(args.summary_in) != bool(args.summary_out):
+        sys.exit("ERROR: With --summary_in also --summary_out is required and vice versa!")
+    return args
 
 
 def getIndex(reference):
@@ -98,19 +110,47 @@ def getIndex(reference):
     return aligner
 
 
-def align(aligner, reads):
+def filter_reads(aligner, reads, tmp=None):
     '''
     Test if reads can get aligned to the lambda genome,
     if not: write to stdout
+
+    if tmp is not None, then write lambda read identifiers to this file
+    To filter the summary file on later
     '''
     i = 0
     for record in SeqIO.parse(reads, "fastq"):
         try:
             next(aligner.map(str(record.seq)))
             i += 1
+            if tmp:
+                tmp.write(record.id.encode('utf-8') + b"\n")
         except StopIteration:
             print(record.format("fastq"), end='')
     sys.stderr.write("NanoLyse: removed {} reads.\n".format(i))
+    logging.info("NanoLyse: removed {} reads.".format(i))
+
+
+def filter_summary(summary_file, output, read_ids_file):
+    '''
+    Optional function to filter entries from a sequencing_summary file
+    using a read_ids_file (tmp) to which the identifiers have been written
+    '''
+    read_ids_file.seek(0)
+    lambda_identifiers = [line.rstrip() for line in read_ids_file]
+    sys.stderr.write(f"{len(lambda_identifiers)} lambda reads to remove from the summary\n")
+    i = 0
+    j = 0
+    with open(output, 'wb') as summary_out, open(summary_file, 'rb') as summary_in:
+        header = next(summary_in)
+        summary_out.write(header)
+        index = header.split(b'\t').index(b'read_id')
+        for line in summary_in:
+            i += 1
+            if not line.split(b'\t')[index] in lambda_identifiers:
+                summary_out.write(line)
+                j += 1
+    sys.stderr.write(f"summary had {i} lines, of which {j} got kept\n")
 
 
 if __name__ == '__main__':


=====================================
nanolyse/version.py
=====================================
@@ -1 +1 @@
-__version__ = "1.1.1"
+__version__ = "1.2.0"


=====================================
setup.py
=====================================
@@ -17,7 +17,7 @@ setup(
     url='https://github.com/wdecoster/nanolyse',
     author='Wouter De Coster',
     author_email='decosterwouter at gmail.com',
-    license='MIT',
+    license='GPLv3',
     classifiers=[
         'Development Status :: 4 - Beta',
         'Intended Audience :: Science/Research',



View it on GitLab: https://salsa.debian.org/med-team/nanolyse/-/compare/4a607f5fd9b826ac5e2642d5a2509dfeb2813811...60efe4b0297b711eb8eddd19015f9990a88f30ab

-- 
View it on GitLab: https://salsa.debian.org/med-team/nanolyse/-/compare/4a607f5fd9b826ac5e2642d5a2509dfeb2813811...60efe4b0297b711eb8eddd19015f9990a88f30ab
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201023/0990aa58/attachment-0001.html>


More information about the debian-med-commit mailing list