[med-svn] [Git][med-team/changeo][upstream] New upstream version 1.0.2
Nilesh Patra
gitlab at salsa.debian.org
Tue Jan 19 13:14:38 GMT 2021
Nilesh Patra pushed to branch upstream at Debian Med / changeo
Commits:
5156d719 by Nilesh Patra at 2021-01-19T18:39:51+05:30
New upstream version 1.0.2
- - - - -
10 changed files:
- INSTALL.rst
- NEWS.rst
- PKG-INFO
- bin/AlignRecords.py
- bin/BuildTrees.py
- bin/ParseDb.py
- changeo.egg-info/PKG-INFO
- changeo/IO.py
- changeo/Receptor.py
- changeo/Version.py
Changes:
=====================================
INSTALL.rst
=====================================
@@ -117,9 +117,7 @@ Windows
follow step 6 below.
6. Add both the ``C:\Python34`` and ``C:\Python34\Scripts`` directories
- to your ``%Path%``. On Windows 7 the ``%Path%`` setting is located
- under Control Panel -> System and Security -> System -> Advanced
- System Settings -> Environment variables -> System variables -> Path.
+ to your ``%Path%``. On both Windows 7 and Windows 10, the ``%Path%`` setting is located under Control Panel -> System and Security -> System -> Advanced System Settings -> Environment variables -> System variables -> Path.
7. If you have trouble with the ``.py`` file associations, try adding ``.PY``
to your ``PATHEXT`` environment variable. Also, try opening a
=====================================
NEWS.rst
=====================================
@@ -1,6 +1,23 @@
Release Notes
===============================================================================
+Version 1.0.2: January 18, 2021
+-------------------------------------------------------------------------------
+
+AlignRecords:
+
++ Fixed a bug caused the program to exit when encountering missing sequence
+data. It will now fail the row or group with missing data and continue.
+
+MakeDb:
+
++ Added support for IgBLAST v1.17.0.
+
+ParseDb:
+
++ Added a relevant error message when an input field is missing from the data.
+
+
Version 1.0.1: October 13, 2020
-------------------------------------------------------------------------------
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: changeo
-Version: 1.0.1
+Version: 1.0.2
Summary: A bioinformatics toolkit for processing high-throughput lymphocyte receptor sequencing data.
Home-page: http://changeo.readthedocs.io
Author: Namita Gupta, Jason Anthony Vander Heiden
=====================================
bin/AlignRecords.py
=====================================
@@ -85,18 +85,28 @@ def alignBlocks(data, field_map, muscle_exec=default_muscle_exec):
Returns:
changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
"""
+ # Define sequence fields
+ seq_fields = list(field_map.keys())
+
+ # Function to validate record
+ def _pass(rec):
+ if all([len(rec.getField(f)) > 0 for f in seq_fields]):
+ return True
+ else:
+ return False
+
# Define return object
result = DbResult(data.id, data.data)
result.results = data.data
result.valid = True
# Fail invalid groups
- if result.id is None:
+ if result.id is None or not all([_pass(x) for x in data.data]):
result.log = None
result.valid = False
return result
- seq_fields = list(field_map.keys())
+ # Run muscle and map results
seq_list = [SeqRecord(r.getSeq(f), id='%s_%s' % (r.sequence_id.replace(' ', '_'), f)) for f in seq_fields \
for r in data.data]
seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
@@ -128,13 +138,23 @@ def alignAcross(data, field_map, muscle_exec=default_muscle_exec):
Returns:
changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
"""
+ # Define sequence fields
+ seq_fields = list(field_map.keys())
+
+ # Function to validate record
+ def _pass(rec):
+ if all([len(rec.getField(f)) > 0 for f in seq_fields]):
+ return True
+ else:
+ return False
+
# Define return object
result = DbResult(data.id, data.data)
result.results = data.data
result.valid = True
# Fail invalid groups
- if result.id is None:
+ if result.id is None or not all([_pass(x) for x in data.data]):
result.log = None
result.valid = False
return result
@@ -169,19 +189,28 @@ def alignWithin(data, field_map, muscle_exec=default_muscle_exec):
Returns:
changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
"""
+ # Define sequence fields
+ seq_fields = list(field_map.keys())
+
+ # Function to validate record
+ def _pass(rec):
+ if all([len(rec.getField(f)) > 0 for f in seq_fields]):
+ return True
+ else:
+ return False
+
# Define return object
result = DbResult(data.id, data.data)
result.results = data.data
result.valid = True
# Fail invalid groups
- if result.id is None:
+ if result.id is None or not _pass(data.data):
result.log = None
result.valid = False
return result
record = data.data
- seq_fields = list(field_map.keys())
seq_list = [SeqRecord(record.getSeq(f), id=f) for f in seq_fields]
seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
if seq_aln is not None:
=====================================
bin/BuildTrees.py
=====================================
@@ -485,7 +485,7 @@ def characterizePartitionErrors(sequences, clones, meta_data):
nseqs = len(sequences)
imgtar = clones[0].getField("imgtpartlabels")
germline = clones[0].getField("germline_imgt_d_mask")
- if germline is "":
+ if germline == "":
germline = clones[0].getField("germline_imgt")
correctseqs = False
@@ -540,7 +540,7 @@ def characterizePartitionErrors(sequences, clones, meta_data):
resolveglines = False
for c in clones:
ngermline = c.getField("germline_imgt_d_mask")
- if ngermline is "":
+ if ngermline == "":
ngermline = c.getField("germline_imgt")
if ngermline != germline:
resolveglines = True
@@ -798,7 +798,7 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer, mas
#If IMGT regions are provided, record their positions
rd = RegionDefinition(r.junction_length, amino_acid=False)
regions = rd.getRegions(r.sequence_imgt)
- if regions["cdr3_imgt"] is not "" and regions["cdr3_imgt"] is not None:
+ if regions["cdr3_imgt"] != "" and regions["cdr3_imgt"] is not None:
simgt = regions["fwr1_imgt"] + regions["cdr1_imgt"] + regions["fwr2_imgt"] + regions["cdr2_imgt"] + \
regions["fwr3_imgt"] + regions["cdr3_imgt"] + regions["fwr4_imgt"]
if len(simgt) < len(r.sequence_imgt):
@@ -824,7 +824,7 @@ def maskCodonsLoop(r, clones, cloneseqs, logs, fails, out_args, fail_writer, mas
fails["region_fail"] += 1
return 0
- elif regions["fwr3_imgt"] is not "" and regions["fwr3_imgt"] is not None:
+ elif regions["fwr3_imgt"] != "" and regions["fwr3_imgt"] is not None:
simgt = regions["fwr1_imgt"] + regions["cdr1_imgt"] + regions["fwr2_imgt"] + regions["cdr2_imgt"] + \
regions["fwr3_imgt"]
nseq = r.sequence_imgt[len(simgt):len(r.sequence_imgt)]
=====================================
bin/ParseDb.py
=====================================
@@ -21,7 +21,7 @@ from time import time
from presto.IO import printLog, printProgress, printMessage
from changeo.Defaults import default_csv_size, default_out_args
from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs
-from changeo.IO import countDbFile, getOutputHandle, splitName, TSVReader, TSVWriter
+from changeo.IO import checkFields, countDbFile, getOutputHandle, splitName, TSVReader, TSVWriter
# System settings
csv.field_size_limit(default_csv_size)
@@ -59,6 +59,12 @@ def splitDbFile(db_file, field, num_split=None, out_args=default_out_args):
out_fields = db_iter.fields
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields([field], db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Determine total numbers of records
rec_count = countDbFile(db_file)
@@ -309,6 +315,12 @@ def dropDbFile(db_file, fields, out_file=None, out_args=default_out_args):
db_iter = TSVReader(db_handle)
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields(fields, db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Exclude dropped field from output
out_fields = [f for f in db_iter.fields if f not in fields]
@@ -390,6 +402,12 @@ def deleteDbFile(db_file, fields, values, logic='any', regex=False,
out_fields = db_iter.fields
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields(fields, db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Open output
if out_file is not None:
pass_handle = open(out_file, 'w')
@@ -462,6 +480,12 @@ def renameDbFile(db_file, fields, names, out_file=None, out_args=default_out_arg
db_iter = TSVReader(db_handle)
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields(fields, db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Get header and rename fields
out_fields = list(db_iter.fields)
for f, n in zip(fields, names):
@@ -544,7 +568,7 @@ def selectDbFile(db_file, fields, values, logic='any', regex=False,
log['FILE'] = os.path.basename(db_file)
log['FIELDS'] = ','.join(fields)
log['VALUES'] = ','.join(values)
- log['REGEX'] =regex
+ log['REGEX'] = regex
printLog(log)
# Open input
@@ -553,6 +577,12 @@ def selectDbFile(db_file, fields, values, logic='any', regex=False,
out_fields = db_iter.fields
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields(fields, db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Open output
if out_file is not None:
pass_handle = open(out_file, 'w')
@@ -631,6 +661,12 @@ def sortDbFile(db_file, field, numeric=False, descend=False,
out_fields = db_iter.fields
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields([field], db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Open output
if out_file is not None:
pass_handle = open(out_file, 'w')
@@ -707,6 +743,12 @@ def updateDbFile(db_file, field, values, updates, out_file=None, out_args=defaul
out_fields = db_iter.fields
__, __, out_args['out_type'] = splitName(db_file)
+ # Check fields
+ try:
+ checkFields([field], db_iter.fields, schema=None)
+ except LookupError as e:
+ exit(e)
+
# Open output
if out_file is not None:
pass_handle = open(out_file, 'w')
=====================================
changeo.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: changeo
-Version: 1.0.1
+Version: 1.0.2
Summary: A bioinformatics toolkit for processing high-throughput lymphocyte receptor sequencing data.
Home-page: http://changeo.readthedocs.io
Author: Namita Gupta, Jason Anthony Vander Heiden
=====================================
changeo/IO.py
=====================================
@@ -943,7 +943,8 @@ class IgBLASTReader:
'stop codon': 'stop_codon',
'V-J frame': 'vj_frame',
'Productive': 'productive',
- 'Strand': 'strand'}
+ 'Strand': 'strand',
+ 'V Frame shift': 'v_frameshift'}
# Extract column names from comments
f = next((x for x in chunk if x.startswith('# V-(D)-J rearrangement summary')))
@@ -1063,6 +1064,10 @@ class IgBLASTReader:
else:
result['rev_comp'] = 'F'
+ # Add v_frameshift field if present
+ if 'v_frameshift' in summary:
+ result['v_frameshift'] = 'T' if summary['v_frameshift'] == 'Yes' else 'F'
+
return result
def _parseSubregionSection(self, section, sequence):
@@ -2444,7 +2449,8 @@ def checkFields(attributes, header, schema=AIRRSchema):
Raises:
LookupError:
"""
- columns = [schema.fromReceptor(f) for f in attributes]
+ if schema is None: columns = attributes
+ else: columns = [schema.fromReceptor(f) for f in attributes]
missing = [x for x in columns if x not in header]
if len(missing) > 0:
=====================================
changeo/Receptor.py
=====================================
@@ -124,6 +124,7 @@ class AIRRSchema:
('productive', 'functional'),
('stop_codon', 'stop'),
('vj_in_frame', 'in_frame'),
+ ('v_frameshift', 'v_frameshift'),
('locus', 'locus'),
('v_call', 'v_call'),
('d_call', 'd_call'),
@@ -344,6 +345,7 @@ class ChangeoSchema:
('STOP', 'stop'),
('MUTATED_INVARIANT', 'mutated_invariant'),
('INDELS', 'indels'),
+ ('V_FRAMESHIFT', 'v_frameshift'),
('LOCUS', 'locus'),
('V_CALL', 'v_call'),
('D_CALL', 'd_call'),
@@ -503,6 +505,7 @@ class ReceptorData:
stop (bool): whether a stop codon is present in the V(D)J sequence.
mutated_invariant (bool): whether the conserved amino acids are mutated in the V(D)J sequence.
indels (bool): whether the V(D)J nucleotide sequence contains insertions and/or deletions.
+ v_frameshift (bool): whether the V segment contains a frameshift
sequence_input (Bio.Seq.Seq): input nucleotide sequence.
sequence_vdj (Bio.Seq.Seq): Aligned V(D)J nucleotide sequence without IMGT-gaps.
@@ -663,6 +666,7 @@ class ReceptorData:
'stop': 'logical',
'mutated_invariant': 'logical',
'indels': 'logical',
+ 'v_frameshift': 'logical',
'sequence_input': 'nucleotide',
'sequence_imgt': 'nucleotide',
'sequence_vdj': 'nucleotide',
=====================================
changeo/Version.py
=====================================
@@ -5,5 +5,5 @@ Version and authorship information
__author__ = 'Namita Gupta, Jason Anthony Vander Heiden'
__copyright__ = 'Copyright 2020 Kleinstein Lab, Yale University. All rights reserved.'
__license__ = 'GNU Affero General Public License 3 (AGPL-3)'
-__version__ = '1.0.1'
-__date__ = '2020.10.13'
+__version__ = '1.0.2'
+__date__ = '2021.01.18'
View it on GitLab: https://salsa.debian.org/med-team/changeo/-/commit/5156d71903319dcbf76bd442a78cc8f659f62448
--
View it on GitLab: https://salsa.debian.org/med-team/changeo/-/commit/5156d71903319dcbf76bd442a78cc8f659f62448
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210119/7f74b3a8/attachment-0001.html>
More information about the debian-med-commit
mailing list