[med-svn] [Git][med-team/changeo][upstream] New upstream version 1.0.2

Tue Jan 19 13:14:38 GMT 2021


Nilesh Patra pushed to branch upstream at Debian Med / changeo


Commits:
5156d719 by Nilesh Patra at 2021-01-19T18:39:51+05:30
New upstream version 1.0.2
- - - - -


10 changed files:

- INSTALL.rst
- NEWS.rst
- PKG-INFO
- bin/AlignRecords.py
- bin/BuildTrees.py
- bin/ParseDb.py
- changeo.egg-info/PKG-INFO
- changeo/IO.py
- changeo/Receptor.py
- changeo/Version.py


Changes:

=====================================
INSTALL.rst
=====================================
@@ -117,9 +117,7 @@ Windows
    follow step 6 below.
 
 6. Add both the ``C:\Python34`` and ``C:\Python34\Scripts`` directories
-   to your ``%Path%``. On Windows 7 the ``%Path%`` setting is located
-   under Control Panel -> System and Security -> System -> Advanced
-   System Settings -> Environment variables -> System variables -> Path.
+   to your ``%Path%``. On both Windows 7 and Windows 10, the ``%Path%`` setting is located under Control Panel -> System and Security -> System -> Advanced System Settings -> Environment variables -> System variables -> Path.
 
 7. If you have trouble with the ``.py`` file associations, try adding ``.PY``
    to your ``PATHEXT`` environment variable. Also, try opening a


=====================================
NEWS.rst
=====================================
@@ -1,6 +1,23 @@
 Release Notes
 ===============================================================================
 
+Version 1.0.2:  January 18, 2021
+-------------------------------------------------------------------------------
+
+AlignRecords:
+
++ Fixed a bug caused the program to exit when encountering missing sequence
+data. It will now fail the row or group with missing data and continue.
+
+MakeDb:
+
++ Added support for IgBLAST v1.17.0.
+
+ParseDb:
+
++ Added a relevant error message when an input field is missing from the data.
+
+
 Version 1.0.1:  October 13, 2020
 -------------------------------------------------------------------------------
 


=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: changeo
-Version: 1.0.1
+Version: 1.0.2
 Summary: A bioinformatics toolkit for processing high-throughput lymphocyte receptor sequencing data.
 Home-page: http://changeo.readthedocs.io
 Author: Namita Gupta, Jason Anthony Vander Heiden


=====================================
bin/AlignRecords.py
=====================================
@@ -85,18 +85,28 @@ def alignBlocks(data, field_map, muscle_exec=default_muscle_exec):
     Returns:
       changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
     """
+    # Define sequence fields
+    seq_fields = list(field_map.keys())
+
+    # Function to validate record
+    def _pass(rec):
+        if all([len(rec.getField(f)) > 0 for f in seq_fields]):
+            return True
+        else:
+            return False
+
     # Define return object
     result = DbResult(data.id, data.data)
     result.results = data.data
     result.valid = True
 
     # Fail invalid groups
-    if result.id is None:
+    if result.id is None or not all([_pass(x) for x in data.data]):
         result.log = None
         result.valid = False
         return result
 
-    seq_fields = list(field_map.keys())
+    # Run muscle and map results
     seq_list = [SeqRecord(r.getSeq(f), id='%s_%s' % (r.sequence_id.replace(' ', '_'), f)) for f in seq_fields \
                 for r in data.data]
     seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
@@ -128,13 +138,23 @@ def alignAcross(data, field_map, muscle_exec=default_muscle_exec):
     Returns:
       changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
     """
+    # Define sequence fields
+    seq_fields = list(field_map.keys())
+
+    # Function to validate record
+    def _pass(rec):
+        if all([len(rec.getField(f)) > 0 for f in seq_fields]):
+            return True
+        else:
+            return False
+
     # Define return object
     result = DbResult(data.id, data.data)
     result.results = data.data
     result.valid = True
 
     # Fail invalid groups
-    if result.id is None:
+    if result.id is None or not all([_pass(x) for x in data.data]):
         result.log = None
         result.valid = False
         return result
@@ -169,19 +189,28 @@ def alignWithin(data, field_map, muscle_exec=default_muscle_exec):
     Returns:
       changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
     """
+    # Define sequence fields
+    seq_fields = list(field_map.keys())
+
+    # Function to validate record
+    def _pass(rec):
+        if all([len(rec.getField(f)) > 0 for f in seq_fields]):
+            return True
+        else:
+            return False
+
     # Define return object
     result = DbResult(data.id, data.data)
     result.results = data.data
     result.valid = True
 
     # Fail invalid groups
-    if result.id is None:
+    if result.id is None or not _pass(data.data):
         result.log = None
         result.valid = False
         return result
 
     record = data.data
-    seq_fields = list(field_map.keys())
     seq_list = [SeqRecord(record.getSeq(f), id=f) for f in seq_fields]
     seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
     if seq_aln is not None:


=====================================
bin/BuildTrees.py
=====================================
@@ -485,7 +485,7 @@ def characterizePartitionErrors(sequences, clones, meta_data):
     nseqs = len(sequences)
     imgtar = clones[0].getField("imgtpartlabels")
     germline = clones[0].getField("germline_imgt_d_mask")
-    if germline is "":
+    if germline == "":
         germline = clones[0].getField("germline_imgt")
 
     correctseqs = False
@@ -540,7 +540,7 @@ def characterizePartitionErrors(sequences, clones, meta_data):
     resolveglines = False
     for c in clones:
         ngermline = c.getField("germline_imgt_d_mask")
-        if ngermline is "":
+        if ngermline == "":
             ngermline = c.getField("germline_imgt")
         if ngermline != germline:
             resolveglines = True
@@ -798,7 +798,7 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer, mas
         #If IMGT regions are provided, record their positions
         rd = RegionDefinition(r.junction_length, amino_acid=False)
         regions = rd.getRegions(r.sequence_imgt)
-        if regions["cdr3_imgt"] is not "" and regions["cdr3_imgt"] is not None:
+        if regions["cdr3_imgt"] != "" and regions["cdr3_imgt"] is not None:
             simgt = regions["fwr1_imgt"] + regions["cdr1_imgt"] + regions["fwr2_imgt"] + regions["cdr2_imgt"] + \
                     regions["fwr3_imgt"] + regions["cdr3_imgt"] + regions["fwr4_imgt"]
             if len(simgt) < len(r.sequence_imgt):
@@ -824,7 +824,7 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer, mas
                 fails["region_fail"] += 1
                 return 0
 
-        elif regions["fwr3_imgt"] is not "" and regions["fwr3_imgt"] is not None:
+        elif regions["fwr3_imgt"] != "" and regions["fwr3_imgt"] is not None:
             simgt = regions["fwr1_imgt"] + regions["cdr1_imgt"] + regions["fwr2_imgt"] + regions["cdr2_imgt"] + \
                     regions["fwr3_imgt"]
             nseq = r.sequence_imgt[len(simgt):len(r.sequence_imgt)]


=====================================
bin/ParseDb.py
=====================================
@@ -21,7 +21,7 @@ from time import time
 from presto.IO import printLog, printProgress, printMessage
 from changeo.Defaults import default_csv_size, default_out_args
 from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
-from changeo.IO import countDbFile, getOutputHandle, splitName, TSVReader, TSVWriter
+from changeo.IO import checkFields, countDbFile, getOutputHandle, splitName, TSVReader, TSVWriter
 
 # System settings
 csv.field_size_limit(default_csv_size)
@@ -59,6 +59,12 @@ def splitDbFile(db_file, field, num_split=None, out_args=default_out_args):
     out_fields = db_iter.fields
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields([field], db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Determine total numbers of records
     rec_count = countDbFile(db_file)
 
@@ -309,6 +315,12 @@ def dropDbFile(db_file, fields, out_file=None, out_args=default_out_args):
     db_iter = TSVReader(db_handle)
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields(fields, db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Exclude dropped field from output
     out_fields = [f for f in db_iter.fields if f not in fields]
 
@@ -390,6 +402,12 @@ def deleteDbFile(db_file, fields, values, logic='any', regex=False,
     out_fields = db_iter.fields
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields(fields, db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Open output
     if out_file is not None:
         pass_handle = open(out_file, 'w')
@@ -462,6 +480,12 @@ def renameDbFile(db_file, fields, names, out_file=None, out_args=default_out_arg
     db_iter = TSVReader(db_handle)
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields(fields, db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Get header and rename fields
     out_fields = list(db_iter.fields)
     for f, n in zip(fields, names):
@@ -544,7 +568,7 @@ def selectDbFile(db_file, fields, values, logic='any', regex=False,
     log['FILE'] = os.path.basename(db_file)
     log['FIELDS'] = ','.join(fields)
     log['VALUES'] = ','.join(values)
-    log['REGEX'] =regex
+    log['REGEX'] = regex
     printLog(log)
 
     # Open input
@@ -553,6 +577,12 @@ def selectDbFile(db_file, fields, values, logic='any', regex=False,
     out_fields = db_iter.fields
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields(fields, db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Open output
     if out_file is not None:
         pass_handle = open(out_file, 'w')
@@ -631,6 +661,12 @@ def sortDbFile(db_file, field, numeric=False, descend=False,
     out_fields = db_iter.fields
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields([field], db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Open output
     if out_file is not None:
         pass_handle = open(out_file, 'w')
@@ -707,6 +743,12 @@ def updateDbFile(db_file, field, values, updates, out_file=None, out_args=defaul
     out_fields = db_iter.fields
     __, __, out_args['out_type'] = splitName(db_file)
 
+    # Check fields
+    try:
+        checkFields([field], db_iter.fields, schema=None)
+    except LookupError as e:
+        exit(e)
+
     # Open output
     if out_file is not None:
         pass_handle = open(out_file, 'w')


=====================================
changeo.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: changeo
-Version: 1.0.1
+Version: 1.0.2
 Summary: A bioinformatics toolkit for processing high-throughput lymphocyte receptor sequencing data.
 Home-page: http://changeo.readthedocs.io
 Author: Namita Gupta, Jason Anthony Vander Heiden


=====================================
changeo/IO.py
=====================================
@@ -943,7 +943,8 @@ class IgBLASTReader:
                        'stop codon': 'stop_codon',
                        'V-J frame': 'vj_frame',
                        'Productive': 'productive',
-                       'Strand': 'strand'}
+                       'Strand': 'strand',
+                       'V Frame shift': 'v_frameshift'}
 
         # Extract column names from comments
         f = next((x for x in chunk if x.startswith('# V-(D)-J rearrangement summary')))
@@ -1063,6 +1064,10 @@ class IgBLASTReader:
         else:
             result['rev_comp'] = 'F'
 
+        # Add v_frameshift field if present
+        if 'v_frameshift' in summary:
+            result['v_frameshift'] = 'T' if summary['v_frameshift'] == 'Yes' else 'F'
+
         return result
 
     def _parseSubregionSection(self, section, sequence):
@@ -2444,7 +2449,8 @@ def checkFields(attributes, header, schema=AIRRSchema):
     Raises:
         LookupError:
     """
-    columns = [schema.fromReceptor(f) for f in attributes]
+    if schema is None:  columns = attributes
+    else:  columns = [schema.fromReceptor(f) for f in attributes]
     missing = [x for x in columns if x not in header]
 
     if len(missing) > 0:


=====================================
changeo/Receptor.py
=====================================
@@ -124,6 +124,7 @@ class AIRRSchema:
                                ('productive', 'functional'),
                                ('stop_codon', 'stop'),
                                ('vj_in_frame', 'in_frame'),
+                               ('v_frameshift', 'v_frameshift'),
                                ('locus', 'locus'),
                                ('v_call', 'v_call'),
                                ('d_call', 'd_call'),
@@ -344,6 +345,7 @@ class ChangeoSchema:
                                ('STOP', 'stop'),
                                ('MUTATED_INVARIANT', 'mutated_invariant'),
                                ('INDELS', 'indels'),
+                               ('V_FRAMESHIFT', 'v_frameshift'),
                                ('LOCUS', 'locus'),
                                ('V_CALL', 'v_call'),
                                ('D_CALL', 'd_call'),
@@ -503,6 +505,7 @@ class ReceptorData:
       stop (bool): whether a stop codon is present in the V(D)J sequence.
       mutated_invariant (bool): whether the conserved amino acids are mutated in the V(D)J sequence.
       indels (bool): whether the V(D)J nucleotide sequence contains insertions and/or deletions.
+      v_frameshift (bool): whether the V segment contains a frameshift
 
       sequence_input (Bio.Seq.Seq): input nucleotide sequence.
       sequence_vdj (Bio.Seq.Seq): Aligned V(D)J nucleotide sequence without IMGT-gaps.
@@ -663,6 +666,7 @@ class ReceptorData:
                'stop': 'logical',
                'mutated_invariant': 'logical',
                'indels': 'logical',
+               'v_frameshift': 'logical',
                'sequence_input': 'nucleotide',
                'sequence_imgt': 'nucleotide',
                'sequence_vdj': 'nucleotide',


=====================================
changeo/Version.py
=====================================
@@ -5,5 +5,5 @@ Version and authorship information
 __author__    = 'Namita Gupta, Jason Anthony Vander Heiden'
 __copyright__ = 'Copyright 2020 Kleinstein Lab, Yale University. All rights reserved.'
 __license__   = 'GNU Affero General Public License 3 (AGPL-3)'
-__version__   = '1.0.1'
-__date__      = '2020.10.13'
+__version__   = '1.0.2'
+__date__      = '2021.01.18'



View it on GitLab: https://salsa.debian.org/med-team/changeo/-/commit/5156d71903319dcbf76bd442a78cc8f659f62448

-- 
View it on GitLab: https://salsa.debian.org/med-team/changeo/-/commit/5156d71903319dcbf76bd442a78cc8f659f62448
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210119/7f74b3a8/attachment-0001.html>